In [14]:
import seaborn

In [15]:
import pandas as pd

def get_action_df(raw_df, action):
    return raw_df.loc[raw_df["actionType"] == action].drop("actionType", 1)


def is_filter_player(steam_id, filter_id):
    if steam_id == filter_id:
        return 1
    else:
        return 0
    
def get_dfs(csvpath, dropna=True):
    raw_df = pd.read_csv(csvpath)
    if dropna:
        raw_df = raw_df.dropna()
    
    attack_df = get_action_df(raw_df, "ATTACK")
    move_df = get_action_df(raw_df, "MOVE")
    cast_df = get_action_df(raw_df, "CAST")

    return attack_df, move_df, cast_df

In [16]:
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.model_selection import train_test_split


def split_data(df, filterID, test_size=0.01):
    y = df["steamid"].map(lambda steamid: is_filter_player(steamid, filterID))
    
    return train_test_split(df.drop("steamid", 1), y,
                           stratify=y, test_size=test_size, random_state=99)

def fit_model(model, X, y):
    model.fit(X, y)
    
    return model

def get_scores(model, X, y):
    predictions = model.predict(X)
    
    accuracy = accuracy_score(y, predictions)
    precision = precision_score(y, predictions)
    recall = recall_score(y, predictions)
    
    return accuracy, precision, recall

In [17]:
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier

def print_scores(model, X_test, y_test, name):
    accuracy, precision, recall = get_scores(model, X_test, y_test)
    print("{} - Accuracy: {}, Precision: {}, Recall: {}".format(name, accuracy, precision, recall)) 

    
def ml(csvpath, filter_id):
    # Create dataframes
    attack_df, move_df, cast_df = get_dfs(csvpath)
    print("Got {} attacks, {} moves, {} casts\n".format(len(attack_df.index), len(move_df.index), len(cast_df.index)))

    models = {}
    
    # Run through learning model
    for name,df in [("ATTACK",attack_df), ("MOVE", move_df), ("CAST", cast_df)]:
        X_train, X_test, y_train, y_test = split_data(df, filter_id, test_size=0.5)
        
        lr = fit_model(LogisticRegression(class_weight={0:.7, 1: 1}), X_train, y_train)
        #rf = fit_model(RandomForestClassifier(), X_train, y_train)
        #tree = fit_model(DecisionTreeClassifier(), X_train, y_train)
        
        models[name] = (lr) #(lr, rf, tree)
        
        print_scores(lr, X_test, y_test, "LR {}".format(name))
        #print_scores(rf, X_test, y_test, "RF {}".format(name))
        #print_scores(tree, X_test, y_test, "Tree {}".format(name))
        print("----")
    
    return models


In [18]:
# Evaluation
from sklearn.metrics import confusion_matrix
import seaborn as sn
import matplotlib.pyplot as plt

def plot_confusion_matrix(cm, title):
    df_cm = pd.DataFrame(cm)
    fig = plt.figure(figsize=(10,7))
    fig.suptitle(title)
    sn.heatmap(df_cm, cmap="Blues", annot=True, fmt="g")

def evaluate(csvpath):
    attack_df, move_df, cast_df = get_dfs(csvpath)

    for name,eval_df in [("ATTACK", attack_df),("MOVE", move_df), ("CAST", cast_df)]:
        _, X_test, _, y_test = split_data(eval_df, filterID, test_size=0.99)
        
        model = models[name]
        #for model in models[name]:
        print_scores(model, X_test, y_test, name)
        pred = model.predict(X_test)
        cm = confusion_matrix(y_test, pred)
        plot_confusion_matrix(cm, "{} confusion matrix".format(name))

In [19]:
# Spectre
#filterID = 76561198051158462
#ml("/cs/scratch/sy35/dota-data/67-mouseaction.csv", filterID)

In [20]:
# Pudge
#Got 19553 attacks, 268340 moves, 3481 casts
filterID = 76561198119286646
models = ml("/cs/scratch/sy35/dota-data/14-mouseaction.csv", filterID)

Got 19553 attacks, 268340 moves, 3481 casts

LR ATTACK - Accuracy: 0.7899151068835021, Precision: 0.6745635910224439, Recall: 0.6817895400126024
----
LR MOVE - Accuracy: 0.7678765744950437, Precision: 0.7310129883678472, Recall: 0.8420254153273049
----
LR CAST - Accuracy: 0.7886272257323378, Precision: 0.7158671586715867, Recall: 0.8094575799721836
----


In [None]:
evaluate("/cs/scratch/sy35/dota-data/14/evaluation.csv")