In [1]:
import pandas as pd
import numpy as np
import warnings
import matplotlib.pyplot as plt
import seaborn as sns
from matplotlib import rcParams
%matplotlib inline
rcParams['figure.figsize'] = 15, 5
sns.set_style('darkgrid')
from sklearn.model_selection import train_test_split,cross_val_score
from sklearn.metrics import classification_report, confusion_matrix, r2_score
from sklearn.preprocessing import StandardScaler,LabelEncoder,RobustScaler
from sklearn.neural_network import MLPClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from IPython.display import display_markdown
warnings.filterwarnings("ignore")

In [2]:
def display(classification_list,scores):
    display_markdown(f'\n|            |{classification_list[pd.Series(scores).idxmax()][14:23].capitalize()}|{classification_list[pd.Series(scores).idxmax()][27:33].capitalize()}|{classification_list[pd.Series(scores).idxmax()][35:43].capitalize()}|{classification_list[pd.Series(scores).idxmax()][46:53].capitalize()}|\n|:---|-----|-----|-----|---:|\n| 0  |{classification_list[pd.Series(scores).idxmax()].split("support")[1][21:][:4]}|{classification_list[pd.Series(scores).idxmax()].split("support")[1][21:][10:14]}|{classification_list[pd.Series(scores).idxmax()].split("support")[1][21:][20:24]}|{classification_list[pd.Series(scores).idxmax()].split("support")[1][21:].split("           1")[0][31:-1]}|\n| 1  |{classification_list[pd.Series(scores).idxmax()].split("support")[1][21:].split("           1       ")[1][:4]}|{classification_list[pd.Series(scores).idxmax()].split("support")[1][21:].split("           1       ")[1][10:14]}|{classification_list[pd.Series(scores).idxmax()].split("support")[1][21:].split("           1       ")[1][20:24]}|{classification_list[pd.Series(scores).idxmax()].split("support")[1][21:].split("           1       ")[1].split("    accuracy")[0][31:-2]}|\n|Accuracy|||{classification_list[pd.Series(scores).idxmax()].split("accuracy                           ")[1][0:4]}|{classification_list[pd.Series(scores).idxmax()].split("accuracy                           ")[1].split("   macro avg")[0][11:-1]}|\n|Macro Avg|{classification_list[pd.Series(scores).idxmax()].split("macro avg       ")[1][:4]}|{classification_list[pd.Series(scores).idxmax()].split("macro avg       ")[1][10:14]}|{classification_list[pd.Series(scores).idxmax()].split("macro avg       ")[1][20:24]}|{classification_list[pd.Series(scores).idxmax()].split("macro avg       ")[1].split("weighted avg")[0][31:-1]}|\n|Weighted Avg|{classification_list[pd.Series(scores).idxmax()].split("weighted avg")[1][7:11]}|{classification_list[pd.Series(scores).idxmax()].split("weighted avg")[1][17:21]}|{classification_list[pd.Series(scores).idxmax()].split("weighted avg")[1][27:31]}|{classification_list[pd.Series(scores).idxmax()].split("weighted avg")[1][38:-1]}|\n', raw=True)
    return

In [35]:
window_list = ['10']
dataframe_list = []
for window in window_list:
    print("Starting " + window + " Day Model")
    concat_window = pd.read_csv(window + "_Concatenated.csv")
    concat_window.drop(columns = 'Unnamed: 0',inplace=True)
    X = concat_window.drop(['Home_Win','Visitor','Home'], axis=1)
    y = concat_window[['Home_Win','Date']]

    X_train = X[X['Date'] < X.Date.unique()[-1]]
    X_test = X[X['Date'] == X.Date.unique()[-1]]
    y_train = y[y['Date'] < y.Date.unique()[-1]]
    y_test = y[y['Date'] == y.Date.unique()[-1]]

    y_train.drop(columns='Date',inplace=True)
    y_test.drop(columns='Date',inplace=True)
    X_train.drop(columns='Date',inplace=True)
    X_test.drop(columns='Date',inplace=True)

    scores = []
    matricies_scaled = []
    classification_list = []
    prediction_list = []
    for x in range(50):
        mlp = MLPClassifier(max_iter=500, activation='relu',hidden_layer_sizes=(32,16))
        mlp.fit(X_train,y_train)
        predictions = mlp.predict(X_test)
        prediction_list.append(predictions)
        scores.append(r2_score(y_test,predictions))
        cm_scale = confusion_matrix(y_test,predictions)
        classif = classification_report(y_test,predictions)
        matricies_scaled.append(cm_scale)
        classification_list.append(classif)
    temp_df = concat_window[concat_window['Date'] == concat_window.Date.unique()[-1]][['Visitor','Home']]
    total_list = []
    for y in range(len(temp_df)):
        team_wl = []
        for x in range(50):
            team_wl.append(prediction_list[x][y])
        total_list.append(round((sum(team_wl)/len(team_wl))*100,2))
    temp_df['Home_Win'] = predictions
    temp_df['Home_Win_Percent'] = total_list
    dataframe_list.append(temp_df)
    print("Finished " + window + " Day Model")

Starting 10 Day Model
Finished 10 Day Model


In [36]:
dataframe_list[0]

Unnamed: 0,Visitor,Home,Home_Win,Home_Win_Percent
3193,TEX,WSN,1,84.0
3194,OAK,BOS,1,84.0
3195,CHN,NYA,1,62.0
3196,KCR,CLE,1,82.0
3197,TOR,DET,1,28.0
3198,PHI,MIA,1,36.0
3199,ATL,TBA,1,26.0
3200,SLN,CHA,1,68.0
3201,SEA,HOU,1,72.0
3202,CIN,MIL,1,74.0
