In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier, BaggingClassifier, ExtraTreesClassifier, GradientBoostingClassifier, AdaBoostClassifier
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import OneHotEncoder
from sklearn.tree import DecisionTreeClassifier
from sklearn.naive_bayes import MultinomialNB


In [2]:
#Set viewing options
pd.set_option('display.max_columns', None)  
pd.set_option('display.expand_frame_repr', False)
pd.set_option("display.max_columns", 2000)
pd.set_option('display.max_colwidth', -1)

  """


In [4]:
#read in data
qwins = pd.read_csv('Quarterly_Wins-for_Modeling.csv')

In [5]:
qwins.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5324 entries, 0 to 5323
Data columns (total 29 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   x0_H    5324 non-null   float64
 1   x0_HH   5324 non-null   float64
 2   x0_HHH  5324 non-null   float64
 3   x0_T    5324 non-null   float64
 4   x0_V    5324 non-null   float64
 5   x0_VV   5324 non-null   float64
 6   x0_VVV  5324 non-null   float64
 7   x1_H    5324 non-null   float64
 8   x1_HH   5324 non-null   float64
 9   x1_HHH  5324 non-null   float64
 10  x1_T    5324 non-null   float64
 11  x1_V    5324 non-null   float64
 12  x1_VV   5324 non-null   float64
 13  x1_VVV  5324 non-null   float64
 14  x2_H    5324 non-null   float64
 15  x2_HH   5324 non-null   float64
 16  x2_HHH  5324 non-null   float64
 17  x2_T    5324 non-null   float64
 18  x2_V    5324 non-null   float64
 19  x2_VV   5324 non-null   float64
 20  x2_VVV  5324 non-null   float64
 21  x3_H    5324 non-null   float64
 22  

In [6]:
from sklearn.pipeline import make_pipeline

In [7]:
## Define 'clumps' (combinations) of quarters to model on
# Clump 0
Clump_Q1 = qwins[['x0_H', 'x0_HH', 'x0_HHH', 'x0_T', 'x0_V', 'x0_VV', 'x0_VVV']]
# Clump 1
Clump_Q2 = qwins[['x1_H', 'x1_HH', 'x1_HHH', 'x1_T', 'x1_V', 'x1_VV', 'x1_VVV']]
# Clump 2
Clump_Q3 = qwins[['x2_H', 'x2_HH', 'x2_HHH', 'x2_T', 'x2_V', 'x2_VV', 'x2_VVV']]
# Clump 3
Clump_Q4 = qwins[['x3_H', 'x3_HH', 'x3_HHH', 'x3_T', 'x3_V', 'x3_VV', 'x3_VVV']]
# Clump 4
Clump_Q1_Q2=qwins[['x0_H', 'x0_HH', 'x0_HHH', 'x0_T', 'x0_V', 'x0_VV', 'x0_VVV', 'x1_H',
       'x1_HH', 'x1_HHH', 'x1_T', 'x1_V', 'x1_VV', 'x1_VVV']] 
# Clump 5
Clump_Q2_Q3=qwins[['x1_H','x1_HH', 'x1_HHH', 'x1_T', 'x1_V', 'x1_VV', 'x1_VVV', 'x2_H', 
        'x2_HH','x2_HHH', 'x2_T', 'x2_V', 'x2_VV', 'x2_VVV']]
# Clump 6
Clump_Q3_Q4=qwins[['x2_H', 'x2_HH','x2_HHH', 'x2_T', 'x2_V', 'x2_VV', 'x2_VVV', 'x3_H', 
        'x3_HH', 'x3_HHH','x3_T', 'x3_V', 'x3_VV', 'x3_VVV']]
# Clump 7
Clump_Q1_Q3=qwins[['x0_H', 'x0_HH', 'x0_HHH', 'x0_T', 'x0_V', 'x0_VV', 'x0_VVV', 'x1_H',
       'x2_H', 'x2_HH','x2_HHH', 'x2_T', 'x2_V', 'x2_VV', 'x2_VVV']]
# Clump 8
Clump_Q1_Q4=qwins[['x0_H', 'x0_HH', 'x0_HHH', 'x0_T', 'x0_V', 'x0_VV', 'x0_VVV', 'x1_H',
        'x3_H', 'x3_HH', 'x3_HHH','x3_T', 'x3_V', 'x3_VV', 'x3_VVV']]
# Clump 9
Clump_Q2_Q4=qwins[['x1_H','x1_HH', 'x1_HHH', 'x1_T', 'x1_V', 'x1_VV', 'x1_VVV', 'x3_H', 
        'x3_HH', 'x3_HHH','x3_T', 'x3_V', 'x3_VV', 'x3_VVV']]
# Clump 10
Clump_Q1_Q2_Q3=qwins[['x0_H', 'x0_HH', 'x0_HHH', 'x0_T', 'x0_V', 'x0_VV', 'x0_VVV', 'x1_H',
       'x1_HH', 'x1_HHH', 'x1_T', 'x1_V', 'x1_VV', 'x1_VVV', 'x2_H', 'x2_HH',
       'x2_HHH', 'x2_T', 'x2_V', 'x2_VV', 'x2_VVV']]
# Clump 11
Clump_Q1_Q3_Q4=qwins[['x0_H', 'x0_HH', 'x0_HHH', 'x0_T', 'x0_V', 'x0_VV', 'x0_VVV', 'x2_H',
         'x2_HH','x2_HHH', 'x2_T', 'x2_V', 'x2_VV', 'x2_VVV', 'x3_H', 'x3_HH', 'x3_HHH',
       'x3_T', 'x3_V', 'x3_VV', 'x3_VVV']]
# Clump 12
Clump_Q1_Q2_Q4=qwins[['x0_H', 'x0_HH', 'x0_HHH', 'x0_T', 'x0_V', 'x0_VV', 'x0_VVV', 'x1_H',
       'x1_HH', 'x1_HHH', 'x1_T', 'x1_V', 'x1_VV', 'x1_VVV', 'x3_H', 'x3_HH', 'x3_HHH',
       'x3_T', 'x3_V', 'x3_VV', 'x3_VVV']]
# Clump 13
Clump_Q2_Q3_Q4=qwins[['x1_H','x1_HH', 'x1_HHH', 'x1_T', 'x1_V', 'x1_VV', 'x1_VVV', 'x2_H', 
        'x2_HH','x2_HHH', 'x2_T', 'x2_V', 'x2_VV', 'x2_VVV', 'x3_H', 'x3_HH', 'x3_HHH',
       'x3_T', 'x3_V', 'x3_VV', 'x3_VVV']]
# Clump 14
Clump_Q1_Q2_Q3_Q4=qwins[['x0_H', 'x0_HH', 'x0_HHH', 'x0_T', 'x0_V', 'x0_VV', 'x0_VVV',
        'x1_H','x1_HH', 'x1_HHH', 'x1_T', 'x1_V', 'x1_VV', 'x1_VVV', 'x2_H', 'x2_HH',
       'x2_HHH', 'x2_T', 'x2_V', 'x2_VV', 'x2_VVV', 'x3_H', 'x3_HH', 'x3_HHH',
       'x3_T', 'x3_V', 'x3_VV', 'x3_VVV']]

In [8]:
clumps = [Clump_Q1,Clump_Q2,Clump_Q3,Clump_Q4,Clump_Q1_Q2,Clump_Q2_Q3,Clump_Q3_Q4,
          Clump_Q1_Q3,Clump_Q1_Q4,Clump_Q2_Q4,Clump_Q1_Q2_Q3,Clump_Q1_Q3_Q4,Clump_Q1_Q2_Q4,
         Clump_Q2_Q3_Q4,Clump_Q1_Q2_Q3_Q4]

In [None]:
# X clumps: Q1, Q2, Q3, Q4, Q1/Q2, Q2/Q3, Q3/Q4, Q1/Q3, Q1/Q4, Q2/Q4,
#           Q1/Q3/Q4, Q1/Q2/Q4, Q2/Q3/Q4, Q1/Q2/Q3, Q1/Q2/Q3/Q4
record = pd.DataFrame(columns=['Clump', 'Estimator', 'param_grid', 'best','accuracy'])
for index, clump in enumerate(clumps):
    X = clump
    y = qwins['Winner']
    X_train, X_test, y_train, y_test = train_test_split(X,y, random_state = 32221)
    modelname = 'LogisticRegressionPipeline'
    pipe = make_pipeline(LogisticRegression(max_iter = 1000))
    param_grid = {
        'logisticregression__penalty': ['l1', 'l2', 'elasticnet', 'none'],
        'logisticregression__C': [.01, .1, 1, 10,100],
        'logisticregression__random_state' : [32221],
        'logisticregression__solver' : ['newton-cg', 'lbfgs', 'liblinear']
}
    grid = GridSearchCV(pipe, param_grid = param_grid)
    grid.fit(X_train, y_train)
    #pred = model.predict(X_test)
    score = grid.score(X_test,y_test)
    print(score)
    record = record.append({'Clump': index, 'Estimator': modelname, 
                   'param_grid': param_grid, 'best': grid.best_params_,'accuracy': score}, ignore_index=True)

In [None]:
record

In [None]:
# X clumps: Q1, Q2, Q3, Q4, Q1/Q2, Q2/Q3, Q3/Q4, Q1/Q3, Q1/Q4, Q2/Q4,
#           Q1/Q3/Q4, Q1/Q2/Q4, Q2/Q3/Q4, Q1/Q2/Q3, Q1/Q2/Q3/Q4
#record = pd.DataFrame(columns=['Clump', 'Estimator', 'param_grid', 'best','accuracy'])
for index, clump in enumerate(clumps):
    X = clump
    y = qwins['Winner']
    X_train, X_test, y_train, y_test = train_test_split(X,y, random_state = 32221)
    modelname = 'BaggingPipeline'
    pipe = make_pipeline(BaggingClassifier())
    param_grid = {
        'baggingclassifier__n_estimators': [10,20,30,40,50],
        'baggingclassifier__max_samples': [1,4,7,10],
          'baggingclassifier__random_state' : [32221]
          ,'baggingclassifier__max_features' : [4, 8, 12, 16, 20]
}
    grid = GridSearchCV(pipe, param_grid = param_grid)
    grid.fit(X_train, y_train)
    #pred = model.predict(X_test)
    score = grid.score(X_test,y_test)
    print(score)
    record = record.append({'Clump': index, 'Estimator': modelname, 
                   'param_grid': param_grid, 'best': grid.best_params_,'accuracy': score}, ignore_index=True)

In [None]:
record.tail(15)

In [None]:
for index, clump in enumerate(clumps):
    X = clump
    y = qwins['Winner']
    X_train, X_test, y_train, y_test = train_test_split(X,y, random_state = 32221)
    modelname = 'RandomForest'
    pipe = make_pipeline(RandomForestClassifier())
    param_grid = {
        'randomforestclassifier__n_estimators': [20,50, 100, 150, 200],
        'randomforestclassifier__min_samples_split': [1,2,4,7,10],
          'randomforestclassifier__max_features' : [4, 7, 10, 15, 20],
          'randomforestclassifier__max_depth' : [4, 8, 12, 16, 20]
}
    grid = GridSearchCV(pipe, param_grid = param_grid)
    grid.fit(X_train, y_train)
    #pred = model.predict(X_test)
    score = grid.score(X_test,y_test)
    print(score)
    record = record.append({'Clump': index, 'Estimator': modelname, 
                   'param_grid': param_grid, 'best': grid.best_params_,'accuracy': score}, ignore_index=True)

In [None]:
record.tail(15)

In [None]:
#record = pd.DataFrame(columns=['Clump', 'Estimator', 'param_grid', 'best','accuracy'])
for index + 11, clump in enumerate(clumps):
    X = clump
    y = qwins['Winner']
    X_train, X_test, y_train, y_test = train_test_split(X,y, random_state = 32221)
    modelname = 'ExtraTrees'
    pipe = make_pipeline(ExtraTreesClassifier())
    param_grid = {
        'extratreesclassifier__n_estimators': [20,50, 100, 150, 200],
        'extratreesclassifier__min_samples_split': [1,2,4,7,10],
          'extratreesclassifier__max_features' : [4, 7, 10, 15, 20],
          'extratreesclassifier__max_depth' : [4, 8, 12, 16, 20]
}
    grid = GridSearchCV(pipe, param_grid = param_grid)
    grid.fit(X_train, y_train)
    #pred = model.predict(X_test)
    score = grid.score(X_test,y_test)
    print(score)
    record = record.append({'Clump': index + 11, 'Estimator': modelname, 
                   'param_grid': param_grid, 'best': grid.best_params_,'accuracy': score}, ignore_index=True)

In [11]:
record

Unnamed: 0,Clump,Estimator,param_grid,best,accuracy
0,0,ExtraTrees,"{'extratreesclassifier__n_estimators': [20, 50, 100, 150, 200], 'extratreesclassifier__min_samples_split': [1, 2, 4, 7, 10], 'extratreesclassifier__max_features': [4, 7, 10, 15, 20], 'extratreesclassifier__max_depth': [4, 8, 12, 16, 20]}","{'extratreesclassifier__max_depth': 4, 'extratreesclassifier__max_features': 4, 'extratreesclassifier__min_samples_split': 2, 'extratreesclassifier__n_estimators': 50}",0.650639
1,1,ExtraTrees,"{'extratreesclassifier__n_estimators': [20, 50, 100, 150, 200], 'extratreesclassifier__min_samples_split': [1, 2, 4, 7, 10], 'extratreesclassifier__max_features': [4, 7, 10, 15, 20], 'extratreesclassifier__max_depth': [4, 8, 12, 16, 20]}","{'extratreesclassifier__max_depth': 4, 'extratreesclassifier__max_features': 4, 'extratreesclassifier__min_samples_split': 2, 'extratreesclassifier__n_estimators': 20}",0.640872
2,2,ExtraTrees,"{'extratreesclassifier__n_estimators': [20, 50, 100, 150, 200], 'extratreesclassifier__min_samples_split': [1, 2, 4, 7, 10], 'extratreesclassifier__max_features': [4, 7, 10, 15, 20], 'extratreesclassifier__max_depth': [4, 8, 12, 16, 20]}","{'extratreesclassifier__max_depth': 4, 'extratreesclassifier__max_features': 4, 'extratreesclassifier__min_samples_split': 2, 'extratreesclassifier__n_estimators': 20}",0.619835
3,3,ExtraTrees,"{'extratreesclassifier__n_estimators': [20, 50, 100, 150, 200], 'extratreesclassifier__min_samples_split': [1, 2, 4, 7, 10], 'extratreesclassifier__max_features': [4, 7, 10, 15, 20], 'extratreesclassifier__max_depth': [4, 8, 12, 16, 20]}","{'extratreesclassifier__max_depth': 4, 'extratreesclassifier__max_features': 4, 'extratreesclassifier__min_samples_split': 2, 'extratreesclassifier__n_estimators': 20}",0.633358
4,4,ExtraTrees,"{'extratreesclassifier__n_estimators': [20, 50, 100, 150, 200], 'extratreesclassifier__min_samples_split': [1, 2, 4, 7, 10], 'extratreesclassifier__max_features': [4, 7, 10, 15, 20], 'extratreesclassifier__max_depth': [4, 8, 12, 16, 20]}","{'extratreesclassifier__max_depth': 8, 'extratreesclassifier__max_features': 4, 'extratreesclassifier__min_samples_split': 7, 'extratreesclassifier__n_estimators': 150}",0.703982
5,5,ExtraTrees,"{'extratreesclassifier__n_estimators': [20, 50, 100, 150, 200], 'extratreesclassifier__min_samples_split': [1, 2, 4, 7, 10], 'extratreesclassifier__max_features': [4, 7, 10, 15, 20], 'extratreesclassifier__max_depth': [4, 8, 12, 16, 20]}","{'extratreesclassifier__max_depth': 8, 'extratreesclassifier__max_features': 4, 'extratreesclassifier__min_samples_split': 10, 'extratreesclassifier__n_estimators': 20}",0.684448
6,6,ExtraTrees,"{'extratreesclassifier__n_estimators': [20, 50, 100, 150, 200], 'extratreesclassifier__min_samples_split': [1, 2, 4, 7, 10], 'extratreesclassifier__max_features': [4, 7, 10, 15, 20], 'extratreesclassifier__max_depth': [4, 8, 12, 16, 20]}","{'extratreesclassifier__max_depth': 8, 'extratreesclassifier__max_features': 10, 'extratreesclassifier__min_samples_split': 2, 'extratreesclassifier__n_estimators': 20}",0.709992
7,7,ExtraTrees,"{'extratreesclassifier__n_estimators': [20, 50, 100, 150, 200], 'extratreesclassifier__min_samples_split': [1, 2, 4, 7, 10], 'extratreesclassifier__max_features': [4, 7, 10, 15, 20], 'extratreesclassifier__max_depth': [4, 8, 12, 16, 20]}","{'extratreesclassifier__max_depth': 12, 'extratreesclassifier__max_features': 4, 'extratreesclassifier__min_samples_split': 2, 'extratreesclassifier__n_estimators': 50}",0.694215
8,8,ExtraTrees,"{'extratreesclassifier__n_estimators': [20, 50, 100, 150, 200], 'extratreesclassifier__min_samples_split': [1, 2, 4, 7, 10], 'extratreesclassifier__max_features': [4, 7, 10, 15, 20], 'extratreesclassifier__max_depth': [4, 8, 12, 16, 20]}","{'extratreesclassifier__max_depth': 12, 'extratreesclassifier__max_features': 4, 'extratreesclassifier__min_samples_split': 7, 'extratreesclassifier__n_estimators': 50}",0.697971
9,9,ExtraTrees,"{'extratreesclassifier__n_estimators': [20, 50, 100, 150, 200], 'extratreesclassifier__min_samples_split': [1, 2, 4, 7, 10], 'extratreesclassifier__max_features': [4, 7, 10, 15, 20], 'extratreesclassifier__max_depth': [4, 8, 12, 16, 20]}","{'extratreesclassifier__max_depth': 8, 'extratreesclassifier__max_features': 4, 'extratreesclassifier__min_samples_split': 7, 'extratreesclassifier__n_estimators': 50}",0.69121


In [None]:
for index, clump in enumerate(clumps):
    X = clump
    y = qwins['Winner']
    X_train, X_test, y_train, y_test = train_test_split(X,y, random_state = 32221)
    modelname = 'BaggingClassifier'
    model = BaggingClassifier() # change estimator each time
    # and GridSearch here?
    model.fit(X_train, y_train)
    pred = model.predict(X_test)
    score = accuracy_score(y_test, pred)
    print(score)
    record = record.append({'Clump': index, 'Estimator': modelname, 
                   'params': 'NA', 'accuracy': score}, ignore_index=True)

In [None]:
for index, clump in enumerate(clumps):
    X = clump
    y = qwins['Winner']
    X_train, X_test, y_train, y_test = train_test_split(X,y, random_state = 32221)
    modelname = 'DecisionTreeClassifier'
    model = DecisionTreeClassifier() # change estimator each time
    # and GridSearch here?
    model.fit(X_train, y_train)
    pred = model.predict(X_test)
    score = accuracy_score(y_test, pred)
    print(score)
    record = record.append({'Clump': index, 'Estimator': modelname, 
                   'params': 'NA', 'accuracy': score}, ignore_index=True)

In [None]:
for index, clump in enumerate(clumps):
    X = clump
    y = qwins['Winner']
    X_train, X_test, y_train, y_test = train_test_split(X,y, random_state = 32221)
    modelname = 'AdaBoostClassifier'
    model = AdaBoostClassifier() # change estimator each time
    # and GridSearch here?
    model.fit(X_train, y_train)
    pred = model.predict(X_test)
    score = accuracy_score(y_test, pred)
    print(score)
    record = record.append({'Clump': index, 'Estimator': modelname, 
                   'params': 'NA', 'accuracy': score}, ignore_index=True)

In [None]:
for index, clump in enumerate(clumps):
    X = clump
    y = qwins['Winner']
    X_train, X_test, y_train, y_test = train_test_split(X,y, random_state = 32221)
    modelname = 'BayesClassifier'
    model = MultinomialNB() # change estimator each time
    # and GridSearch here?
    model.fit(X_train, y_train)
    pred = model.predict(X_test)
    score = accuracy_score(y_test, pred)
    print(score)
    record = record.append({'Clump': index, 'Estimator': modelname, 
                   'params': 'NA', 'accuracy': score}, ignore_index=True)

In [None]:
for index, clump in enumerate(clumps):
    X = clump
    y = qwins['Winner']
    X_train, X_test, y_train, y_test = train_test_split(X,y, random_state = 32221)
    modelname = 'BayesClassifier'
    model = MultinomialNB() # change estimator each time
    # and GridSearch here?
    model.fit(X_train, y_train)
    pred = model.predict(X_test)
    score = accuracy_score(y_test, pred)
    print(score)
    record = record.append({'Clump': index, 'Estimator': modelname, 
                   'params': 'NA', 'accuracy': score}, ignore_index=True)

In [None]:
record['Estimator'].unique()

array(['BaggingClassifier', 'RandomForestClassifier',
       'LogisticRegression', 'DecisionTreeClassifier',
       'ExtraTreesClassifier', 'AdaBoostClassifier', 'BayesClassifier'],
      dtype=object)

In [None]:
for index, clump in enumerate(clumps):
    X = clump
    y = qwins['Winner']
    X_train, X_test, y_train, y_test = train_test_split(X,y, random_state = 32221)
    modelname = 'GradientBoostingClassifier'
    model = GradientBoostingClassifier() # change estimator each time
    # and GridSearch here?
    model.fit(X_train, y_train)
    pred = model.predict(X_test)
    score = accuracy_score(y_test, pred)
    print(score)
    record = record.append({'Clump': index, 'Estimator': modelname, 
                   'params': 'NA', 'accuracy': score}, ignore_index=True)

0.6506386175807664
0.6408715251690458
0.6198347107438017
0.6333583771600301
0.7054845980465815
0.6867017280240421
0.709992486851991
0.6987227648384673
0.7032306536438768
0.6912096168294516
0.7618332081141999
0.7761081893313299
0.7648384673178061
0.763335837716003
0.8542449286250939


In [None]:
record.tail()

Unnamed: 0,Clump,Estimator,params,accuracy
115,10,GradientBoostingClassifier,,0.761833
116,11,GradientBoostingClassifier,,0.776108
117,12,GradientBoostingClassifier,,0.764838
118,13,GradientBoostingClassifier,,0.763336
119,14,GradientBoostingClassifier,,0.854245


In [None]:
#Neural Networks
from keras.models import Sequential
from keras.layers import Dense

In [None]:
X = Clump_Q1
y = qwins['Winner']
X_train, X_test, y_train, y_test = train_test_split(X,y, random_state = 32221)

In [None]:
model=Sequential()
model.add(Dense(20, activation='relu'))
model.add(Dense(1, activation = 'softmax'))

In [None]:
model.compile(loss='categorical_crossentropy', metrics=['accuracy'], optimizer='adam')

In [None]:
history = model.fit(X_train,y_train,
                   validation_data=(X_test,y_test),
                   epochs=20)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [None]:
model=Sequential()
model.add(Dense(40, activation='relu'))
model.add(Dense(20, activation='relu'))
model.add(Dense(1, activation = 'softmax'))

In [None]:
model.compile(loss='categorical_crossentropy', metrics=['accuracy'], optimizer='adam')

In [None]:
history = model.fit(X_train,y_train,
                   validation_data=(X_test,y_test),
                   epochs=30)

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


In [None]:
model=Sequential()
model.add(Dense(80, activation='relu'))
model.add(Dense(60, activation='relu'))
model.add(Dense(40, activation='relu'))
model.add(Dense(1, activation = 'softmax'))

In [None]:
model.compile(loss='categorical_crossentropy', metrics=['accuracy'], optimizer='adam')

In [None]:
history = model.fit(X_train,y_train,
                   validation_data=(X_test,y_test),
                   epochs=20)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [None]:
from keras.regularizers import l2
from keras.layers import Dropout

In [None]:
model=Sequential()
model.add(Dense(80, activation='relu'))
model.add(Dropout(.5))
model.add(Dense(60, activation='relu'))
model.add(Dropout(.4))
model.add(Dense(40, activation='relu'))
model.add(Dropout(.3))
model.add(Dense(1, activation = 'softmax'))

In [None]:
model.compile(loss='categorical_crossentropy', metrics=['accuracy'], optimizer='adam')

In [None]:
history = model.fit(X_train,y_train,
                   validation_data=(X_test,y_test),
                   epochs=30)

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


In [None]:
x = model.evaluate(X_test,y_test)
x[1]



0.40646129846572876

In [None]:
#Neural Network loop
for index, clump in enumerate(clumps):
    X = clump
    y = qwins['Winner']
    X_train, X_test, y_train, y_test = train_test_split(X,y, random_state = 32221)
    modelname = 'NeuralNetwork'
    model=Sequential()
    model.add(Dense(80, activation='relu'))
    model.add(Dropout(.5))
    model.add(Dense(60, activation='relu'))
    model.add(Dropout(.4))
    model.add(Dense(40, activation='relu'))
    model.add(Dropout(.3))
    model.add(Dense(1, activation = 'softmax'))
    model.compile(loss='categorical_crossentropy', metrics=['accuracy'], optimizer='adam')
    history = model.fit(X_train,y_train,validation_data=(X_test,y_test),epochs=30)
    eval = model.evaluate(X_test,y_test)
    score = eval[1]
    print(score)
    record = record.append({'Clump': index, 'Estimator': modelname, 
                   'params': '3 Dense layers (80,60,60 nodes), 3 dropout layers(.5,.4,.3)', 'accuracy': score}, ignore_index=True)


Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
0.40646129846572876
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
0.40646129846572876
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epo

In [None]:
model=Sequential()
model.add(Dense(80, activation='relu'))
model.add(Dropout(.5))
model.add(Dense(60, activation='relu'))
model.add(Dropout(.4))
model.add(Dense(40, activation='relu'))
model.add(Dropout(.3))
model.add(Dense(1, activation = 'softmax'))

In [None]:
record.tail()

Unnamed: 0,Clump,Estimator,params,accuracy
130,10,NeuralNetwork,"3 Dense layers (80,60,60 nodes), 3 dropout layers(.5,.4,.3)",0.406461
131,11,NeuralNetwork,"3 Dense layers (80,60,60 nodes), 3 dropout layers(.5,.4,.3)",0.406461
132,12,NeuralNetwork,"3 Dense layers (80,60,60 nodes), 3 dropout layers(.5,.4,.3)",0.406461
133,13,NeuralNetwork,"3 Dense layers (80,60,60 nodes), 3 dropout layers(.5,.4,.3)",0.406461
134,14,NeuralNetwork,"3 Dense layers (80,60,60 nodes), 3 dropout layers(.5,.4,.3)",0.406461
