In [1]:
import os
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
from sklearn.metrics import confusion_matrix

sns.set(context="notebook", style="darkgrid", palette="muted", color_codes=True)

In [2]:
def sensitivity(tp, tn, fp, fn):
    return tp*1.0 / (tp + fn)

def specificity(tp, tn, fp, fn):
    return tn*1.0 / (tn + fp)

def accuracy(tp, tn, fp, fn):
    return (tp + tn)*1.0 / (tp + tn + fp + fn)

def precision(tp, tn, fp, fn):
    return tp*1.0 / (tp + fp)

def recall(tp, tn, fp, fn):
    return tp*1.0 / (tp + fn)

In [3]:
# dfs = [pd.read_csv("/home/mejia/Downloads/"+file) for root, subs, files in os.walk("/home/mejia/Downloads/") for file in files if file.startswith("ML SP500") and file.endswith(".csv")]

# kws = "AP BPM BDT DF DJ LR NN SVM".split()
# data = pd.DataFrame(data={"Class": dfs[0]["class"]})
# for i, df in enumerate(dfs):
#     data["Labels_{}".format(kws[i])] = df["Scored Labels"]
#     data["Probabilities_{}".format(kws[i])] = df["Scored Probabilities"]

# data.to_csv("../data/processed/sp500-mi-results.csv")

In [4]:
ml_results = pd.read_csv("../data/processed/sp500-mi-results.csv")

In [5]:
models = {
    "AP": "Averaged Perceptron",
    "BPM": "Bayes Point Machine",
    "BDT": "Boosted Descision Tree",
    "DF": "Decision Forest",
    "DJ": "Decision Jungle",
    "LR": "Logistic Regression",
    "NN": "Neural Network",
    "SVM": "Support Vector Machine"
}
model_evaluation = pd.DataFrame(
    index=models.values(),
    columns=["Sensitivity", "Specificity", "Accuracy", "Precision"],
    data=np.nan
)
for kw in models:
    tn, fp, fn, tp = confusion_matrix(ml_results["Class"], ml_results["Labels_{}".format(kw)]).ravel()
    
    model_evaluation.loc[models[kw], "Sensitivity"] = sensitivity(tp, tn, fp, fn)
    model_evaluation.loc[models[kw], "Specificity"] = specificity(tp, tn, fp, fn)
    model_evaluation.loc[models[kw], "Accuracy"] = accuracy(tp, tn, fp, fn)
    model_evaluation.loc[models[kw], "Precision"] = precision(tp, tn, fp, fn)

In [6]:
model_evaluation

Unnamed: 0,Sensitivity,Specificity,Accuracy,Precision
Averaged Perceptron,0.947507,0.080838,0.542657,0.540419
Support Vector Machine,0.863517,0.176647,0.542657,0.544702
Decision Jungle,0.664042,0.368263,0.525874,0.545259
Neural Network,0.897638,0.158683,0.552448,0.548957
Boosted Descision Tree,0.979003,0.017964,0.53007,0.532097
Decision Forest,0.889764,0.116766,0.528671,0.5347
Logistic Regression,0.748031,0.251497,0.516084,0.53271
Bayes Point Machine,0.950131,0.02994,0.52028,0.527697
