In [28]:
import pandas as pd

def get_action_df(raw_df, action):
    return raw_df.loc[raw_df["actionType"] == action].drop("actionType", 1)


def is_filter_player(steam_id, filter_id):
    if steam_id == filter_id:
        return 1
    else:
        return 0

def get_dfs(dfs):
    raw_df = pd.concat(dfs, ignore_index=True)
    raw_df = raw_df.dropna()
    
    attack_df = get_action_df(raw_df, "ATTACK")
    move_df = get_action_df(raw_df, "MOVE")
    cast_df = get_action_df(raw_df, "CAST")
    
    return attack_df, move_df, cast_df    

In [42]:
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import accuracy_score
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score

    
def fit_model(model, df, filter_id):
    y = df["steamid"].map(lambda steamid: is_filter_player(steamid, filter_id))
    X = df.drop("steamid", 1)
    
    model.fit(X, y)
    
    return model


def cross_validate(dfs, ys, splits, filter_id):
    skf = StratifiedKFold(n_splits=splits)
    
    for train, test in skf.split(dfs, ys):
        training_dfs = [dfs[i] for i in train]
        testing_dfs = [dfs[i] for i in test]
        
        training_attack_df, training_move_df, training_cast_df = get_dfs(training_dfs)
        testing_attack_df, testing_move_df, testing_cast_df = get_dfs(testing_dfs)
        
        print("Training on {} attacks, {} moves and {} casts".format(len(training_attack_df.index), len(training_move_df.index), len(training_cast_df.index)))
        print("Testing on {} attacks, {} moves and {} casts".format(len(testing_attack_df.index), len(testing_move_df.index), len(testing_cast_df.index)))
        for name, train_df, test_df in [("ATTACK", training_attack_df, testing_attack_df), 
                                        ("MOVE", training_move_df, testing_move_df), 
                                        ("CAST", training_cast_df, testing_cast_df)]:

            lr = fit_model(LogisticRegression(class_weight="balanced"), train_df, filter_id)
            
            acc, pre, rec = test_model(lr, test_df, filter_id)
            print_scores(name, acc, pre, rec)
        print("")
    
def test_model(model, df, filter_id):
    y = df["steamid"].map(lambda steamid: is_filter_player(steamid, filter_id))
    X = df.drop("steamid", 1)
    
    predictions = model.predict(X)
    
    accuracy = accuracy_score(y, predictions)
    precision = precision_score(y, predictions)
    recall = recall_score(y, predictions)
    
    return accuracy, precision, recall

def print_scores(name, accuracy, precision, recall):
    print("{} - Accuracy: {}, Precision: {}, Recall: {}".format(name, accuracy, precision, recall)) 

    

In [43]:
import ntpath
import os
import random

def ml(path, splits):
    csvs = ["{}/{}".format(path,file) for file in os.listdir(path)]
    filter_id = 76561198119286646
    
    dfs = [pd.read_csv(csv_file) for csv_file in csvs]
    ys = [is_filter_player(filter_id, ntpath.basename(csv_file)[:17]) for csv_file in csvs]
    
    cross_validate(dfs, ys, splits, filter_id)
    

In [16]:
path = "/cs/scratch/sy35/dota-data/14-2/data/mouseaction"

In [44]:
ml(path, 5)

Training on 47851 attacks, 722209 moves and 11755 casts
Testing on 11967 attacks, 170655 moves and 2857 casts
ATTACK - Accuracy: 0.765104036099273, Precision: 0.36394736842105263, Recall: 0.7782779966235228
MOVE - Accuracy: 0.8039143300811579, Precision: 0.540292756764195, Recall: 0.8233983432945371
CAST - Accuracy: 0.8246412320616031, Precision: 0.42876165113182424, Recall: 0.817258883248731

Training on 48131 attacks, 705118 moves and 11475 casts
Testing on 11687 attacks, 187746 moves and 3137 casts
ATTACK - Accuracy: 0.7164370668263883, Precision: 0.4953785644051131, Recall: 0.7710437710437711
MOVE - Accuracy: 0.7334377296986354, Precision: 0.5983670209225445, Recall: 0.7957804662142225
CAST - Accuracy: 0.7590054191903092, Precision: 0.49594155844155846, Recall: 0.8190348525469169

Training on 47778 attacks, 708918 moves and 11535 casts
Testing on 12040 attacks, 183946 moves and 3077 casts
ATTACK - Accuracy: 0.7641196013289037, Precision: 0.636969350847135, Recall: 0.781958401495676