Implementing Grid Search on keras, based on this <a href="https://machinelearningmastery.com/grid-search-hyperparameters-deep-learning-models-python-keras/"> tutorial</a> 

In [1]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score
from sklearn.datasets import load_iris
from sklearn.model_selection import GridSearchCV
from tensorflow import keras
from tensorflow.keras.wrappers.scikit_learn import KerasClassifier
import matplotlib.pyplot as plt

In [2]:
df = pd.read_csv("test.csv")
df = df.dropna()

In [3]:
df["ha"] = np.where(df["ha"].eq("h"),1,0)

In [4]:
X = df[['opp_goal_difference_last', 'ha', 'opp_goal_difference_5MA', 'opp_goal_difference_10MA',
       'since_last_win', 'goal_difference_5MA', 'goal_difference_10MA', 'goal_difference_last',
        'yellow_cards_referee_hist','odds_5MA', 'odds_10MA', 'odds_last'
       ]].values
y = df["class_var"]

In [5]:
class_weights = 1/y.value_counts()
class_weights = (class_weights/np.sum(class_weights))*len(class_weights)
class_weights = class_weights.to_dict()

In [7]:
class_weights[1] = 0

We split the dataset into train and test. We shuffle the examples as the order is not important here.

In [None]:
from sklearn.preprocessing import LabelEncoder

le = LabelEncoder()
y = le.fit_transform(y)

n_class_var = len(np.unique(y))

y = keras.utils.to_categorical(y, num_classes=n_class_var)

We scale with the Standard Scaler. That is the features are normalized. Models usually work better when this is the case.

In [None]:
scaler = StandardScaler()
X = scaler.fit_transform(X)

The model's architecture. 

In [None]:
def create_model(optimizer="sgd"):
    model = keras.Sequential(
        [
            keras.layers.Dense(
                8, activation="relu", input_shape=(X.shape[-1],)
            ),
            keras.layers.Dense(12, activation="relu"),
            keras.layers.Dropout(0.2),
            keras.layers.Dense(8, activation="relu"),
            keras.layers.Dropout(0.2),
            keras.layers.Dense(n_class_var, activation="softmax")
        ]
    )
    model.compile(
        optimizer=optimizer,
        loss="categorical_crossentropy",
        metrics=['accuracy']
    )
    return model

In [None]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.8, stratify=y)

model = create_model(
    optimizer = keras.optimizers.SGD(
        learning_rate=0.01, momentum=0.8, nesterov=False, name="SGD"
    )
)
history = model.fit(X_train, y_train, validation_split=0.1, batch_size=8, epochs=20, class_weight=class_weights)

plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')
plt.show()

In [None]:
from sklearn.metrics import accuracy_score, classification_report

y_pred = np.argmax(model.predict(X_test), axis=1)

print(accuracy_score(np.argmax(y_test, axis=1), y_pred))

print(classification_report(np.argmax(y_test, axis=1), y_pred))

In [None]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.8, stratify=y)

model = create_model(optimizer="adam")
history = model.fit(X_train, y_train, validation_split=0.1, batch_size=8, epochs=10)

plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')
plt.show()

In [None]:
from sklearn.metrics import accuracy_score, classification_report

y_pred = np.argmax(model.predict(X_test), axis=1)

print(accuracy_score(np.argmax(y_test, axis=1), y_pred))

print(classification_report(np.argmax(y_test, axis=1), y_pred))

In [None]:
model = KerasClassifier(build_fn=create_model, verbose=0)
# define the grid search parameters
batch_size = [1, 4, 8]
epochs = [10, 20]
optimizers = ['SGD', 'Adam']
param_grid = dict(batch_size=batch_size, epochs=epochs, optimizer=optimizers) 
grid = GridSearchCV(estimator=model, param_grid=param_grid, verbose=3)
grid_result = grid.fit(X, y)
# summarize results
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
    print("%f (%f) with: %r" % (mean, stdev, param))

In [None]:
accuracy_score(grid.best_estimator_.predict(X_test), np.argmax(y_test, axis=1))

In [None]:
grid.best_params_