In [55]:
import pandas as pd
from sklearn.model_selection import train_test_split


def getActionDf(raw_df, action):
    return raw_df.loc[raw_df["actionType"] == action].drop("actionType", 1)

def split_data(df, steamid, test_size=0.3):
    y = df["steamid"].map(lambda id: isFilterPlayer(id, steamid))
    
    return train_test_split(df.drop("steamid", 1), y,
                           stratify=y, test_size=test_size, random_state=99)

def isFilterPlayer(steamID, filterID):
    if steamID == filterID:
        return 1
    else:
        return 0

In [56]:
raw_df = pd.read_csv("/cs/scratch/sy35/dota-data/mouseaction.csv").dropna()

In [57]:
attack_df = getActionDf(raw_df, "ATTACK")
move_df = getActionDf(raw_df, "MOVE")
cast_df = getActionDf(raw_df, "CAST")

print("Got {} attacks, {} moves, {} casts".format(len(attack_df.index), len(move_df.index), len(cast_df.index)))

Got 52904 attacks, 252964 moves, 5571 casts


In [58]:
# Select a player from data
filterID = 76561198051158462

In [152]:
X_train, X_test, y_train, y_test = split_data(attack_df, filterID, test_size=0.5)

In [159]:
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score

def logistic_regression(X, y):
    lr = LogisticRegression()
    lr.fit(X, y)
    
    return lr

def get_scores(lr, X, y):
    predictions = lr.predict(X)
    
    accuracy = accuracy_score(y_test, predictions)
    precision = precision_score(y_test, predictions)
    recall = recall_score(y_test, predictions)
    
    return accuracy, precision, recall
    
lr = logistic_regression(X_train, y_train)
accuracy, precision, recall = get_scores(lr, X_test, y_test)

In [160]:
accuracy

0.9351277786178739

In [161]:
precision

0.8916429408809244

In [162]:
recall

0.8327858595338652

In [139]:
player_df = pd.read_csv("/cs/scratch/sy35/dota-data/{}-mouseaction.csv".format(76561198064336398))

In [140]:
test_df = getActionDf(player_df, "CAST")
test_df = test_df.drop("steamid", 1)
test_df = test_df.dropna()

In [141]:
predictions = lr.predict(test_df)
y = [0 for i in range(len(test_df.index))]

In [142]:
accuracy_score(y, predictions)

0.9580973952434881