In [28]:
import pandas as pd

def get_action_df(raw_df, action):
    return raw_df.loc[raw_df["actionType"] == action].drop("actionType", 1)


def is_filter_player(steam_id, filter_id):
    if steam_id == filter_id:
        return 1
    else:
        return 0

def get_dfs(dfs):
    raw_df = pd.concat(dfs, ignore_index=True)
    raw_df = raw_df.dropna()
    
    attack_df = get_action_df(raw_df, "ATTACK")
    move_df = get_action_df(raw_df, "MOVE")
    cast_df = get_action_df(raw_df, "CAST")
    
    return attack_df, move_df, cast_df    

In [48]:
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import accuracy_score
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score

    
def fit_model(model, df, filter_id):
    y = df["steamid"].map(lambda steamid: is_filter_player(steamid, filter_id))
    X = df.drop("steamid", 1)
    
    model.fit(X, y)
    
    return model


def cross_validate(dfs, ys, splits, filter_id, class_weight):
    skf = StratifiedKFold(n_splits=splits)
    
    for train, test in skf.split(dfs, ys):
        training_dfs = [dfs[i] for i in train]
        testing_dfs = [dfs[i] for i in test]
        
        training_attack_df, training_move_df, training_cast_df = get_dfs(training_dfs)
        testing_attack_df, testing_move_df, testing_cast_df = get_dfs(testing_dfs)
        
        print("Training on {} attacks, {} moves and {} casts".format(len(training_attack_df.index), len(training_move_df.index), len(training_cast_df.index)))
        print("Testing on {} attacks, {} moves and {} casts".format(len(testing_attack_df.index), len(testing_move_df.index), len(testing_cast_df.index)))
        for name, train_df, test_df in [("ATTACK", training_attack_df, testing_attack_df), 
                                        ("MOVE", training_move_df, testing_move_df), 
                                        ("CAST", training_cast_df, testing_cast_df)]:

            lr = fit_model(LogisticRegression(class_weight=class_weight), train_df, filter_id)
            
            acc, pre, rec = test_model(lr, test_df, filter_id)
            print_scores(name, acc, pre, rec)
        print("")
    
def test_model(model, df, filter_id):
    y = df["steamid"].map(lambda steamid: is_filter_player(steamid, filter_id))
    X = df.drop("steamid", 1)
    
    predictions = model.predict(X)
    
    accuracy = accuracy_score(y, predictions)
    precision = precision_score(y, predictions)
    recall = recall_score(y, predictions)
    
    return accuracy, precision, recall

def print_scores(name, accuracy, precision, recall):
    print("{} - Accuracy: {}, Precision: {}, Recall: {}".format(name, accuracy, precision, recall)) 

    

In [49]:
import ntpath
import os
import random

def ml(path, splits, class_weight):
    csvs = ["{}/{}".format(path,file) for file in os.listdir(path)]
    filter_id = 76561198119286646
    
    dfs = [pd.read_csv(csv_file) for csv_file in csvs]
    ys = [is_filter_player(filter_id, ntpath.basename(csv_file)[:17]) for csv_file in csvs]
    
    cross_validate(dfs, ys, splits, filter_id, class_weight)
    

In [50]:
path = "/cs/scratch/sy35/dota-data/14-2/data/mouseaction"

In [51]:
ml(path, 7, "balanced")

Training on 51789 attacks, 761860 moves and 12463 casts
Testing on 8029 attacks, 131004 moves and 2149 casts
ATTACK - Accuracy: 0.796114086436667, Precision: 0.526778882938026, Recall: 0.7749015194147439
MOVE - Accuracy: 0.8315776617507863, Precision: 0.6730429917803503, Recall: 0.8199381356820803
CAST - Accuracy: 0.8641228478362029, Precision: 0.5944444444444444, Recall: 0.8147208121827412

Training on 51271 attacks, 773101 moves and 12619 casts
Testing on 8547 attacks, 119763 moves and 1993 casts
ATTACK - Accuracy: 0.7031707031707032, Precision: 0.3577537058152794, Recall: 0.815464587394412
MOVE - Accuracy: 0.7361539039603217, Precision: 0.5125122811073224, Recall: 0.8086813788072224
CAST - Accuracy: 0.7596588058203713, Precision: 0.4005805515239477, Recall: 0.8070175438596491

Training on 51123 attacks, 751060 moves and 12272 casts
Testing on 8695 attacks, 141804 moves and 2340 casts
ATTACK - Accuracy: 0.7313398504887867, Precision: 0.5596590909090909, Recall: 0.774481772694782
MOVE

In [52]:
ml(path, 7, None)

Training on 51789 attacks, 761860 moves and 12463 casts
Testing on 8029 attacks, 131004 moves and 2149 casts
ATTACK - Accuracy: 0.8527836592352721, Precision: 0.771689497716895, Recall: 0.47552054023635343
MOVE - Accuracy: 0.852989221703154, Precision: 0.7622088342263805, Recall: 0.719670756002936
CAST - Accuracy: 0.896696137738483, Precision: 0.8006993006993007, Recall: 0.5812182741116751

Training on 51271 attacks, 773101 moves and 12619 casts
Testing on 8547 attacks, 119763 moves and 1993 casts
ATTACK - Accuracy: 0.8262548262548263, Precision: 0.5170454545454546, Recall: 0.5321637426900585
MOVE - Accuracy: 0.7739285088048896, Precision: 0.5710381847017202, Recall: 0.7114110280260199
CAST - Accuracy: 0.8489713998996488, Precision: 0.554089709762533, Recall: 0.6140350877192983

Training on 51123 attacks, 751060 moves and 12272 casts
Testing on 8695 attacks, 141804 moves and 2340 casts
ATTACK - Accuracy: 0.7781483611270845, Precision: 0.7434173669467787, Recall: 0.4742673338098642
MOVE