In [1]:
import pandas as pd

def get_action_df(raw_df, action):
    return raw_df.loc[raw_df["actionType"] == action].drop("actionType", 1)


def is_filter_player(steam_id, filter_id):
    if steam_id == filter_id:
        return 1
    else:
        return 0
    
def get_dfs(csvpath, dropna=True):
    raw_df = pd.read_csv(csvpath)
    if dropna:
        raw_df = raw_df.dropna()
    
    attack_df = get_action_df(raw_df, "ATTACK")
    move_df = get_action_df(raw_df, "MOVE")
    cast_df = get_action_df(raw_df, "CAST")

    return attack_df, move_df, cast_df

In [7]:
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.model_selection import train_test_split


def split_data(df, filterID, test_size=0.3):
    y = df["steamid"].map(lambda steamid: is_filter_player(steamid, filterID))
    
    return train_test_split(df.drop("steamid", 1), y,
                           stratify=y, test_size=test_size, random_state=99)

def logistic_regression(X, y):
    lr = LogisticRegression()
    lr.fit(X, y)
    
    return lr

def get_scores(lr, X, y):
    predictions = lr.predict(X)
    
    accuracy = accuracy_score(y, predictions)
    precision = precision_score(y, predictions)
    recall = recall_score(y, predictions)
    
    return accuracy, precision, recall


In [8]:
def ml(csvpath, filter_id):
    # Create dataframes
    attack_df, move_df, cast_df = get_dfs(csvpath)
    print("Got {} attacks, {} moves, {} casts".format(len(attack_df.index), len(move_df.index), len(cast_df.index)))

    # Run through learning model
    for name,df in [("ATTACK",attack_df), ("MOVE", move_df), ("CAST", cast_df)]:
        X_train, X_test, y_train, y_test = split_data(df, filter_id, test_size=0.5)
        
        lr = logistic_regression(X_train, y_train)
        accuracy, precision, recall = get_scores(lr, X_test, y_test)
        print("{} - Accuracy: {}, Precision: {}, Recall: {}".format(name, accuracy, precision, recall))

In [9]:
# Spectre
filterID = 76561198051158462
ml("/cs/scratch/sy35/dota-data/67/67-mouseaction.csv", filterID)

Got 52904 attacks, 252964 moves, 5571 casts
ATTACK - Accuracy: 0.9351277786178739, Precision: 0.8916429408809244, Recall: 0.8327858595338652
MOVE - Accuracy: 0.9090937840957607, Precision: 0.8682067158847344, Recall: 0.8594059405940594
CAST - Accuracy: 0.9300071787508973, Precision: 0.8125, Recall: 0.7878787878787878


In [10]:
# Pudge
filterID = 76561198119286646
ml("/cs/scratch/sy35/dota-data/14/14-mouseaction.csv", filterID)

Got 19553 attacks, 268340 moves, 3481 casts
ATTACK - Accuracy: 0.8073028536360847, Precision: 0.7539370078740157, Recall: 0.6033396345305608
MOVE - Accuracy: 0.7851904300514273, Precision: 0.7846318551244741, Recall: 0.7816264607804849
CAST - Accuracy: 0.8012636415852958, Precision: 0.7787742899850523, Recall: 0.7246175243393602
