In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

import setuptools

# Keras is a high-level neural network Python API
# Runs on top of lower-level libraries like TensorFlow
from keras.models import Sequential
from keras.layers import Dense
from keras.utils import to_categorical

In [2]:
class MLClass:

    def __init__(self, data=None, predictorVar=None, targetCol=None,
                xTrain=None, xTest=None, yTest=None, yTrain=None):
        self.data = data
        self.predictorVar = predictorVar
        self.targetCol = targetCol
        self.xTrain = xTrain
        self.xTest= xTest
        self.yTest= yTest
        self.yTrain = yTrain

    
    def exploringDataset(self):      
        try:
            print("The shape of the data is: ", self.data.shape)
            print("The dataset has {} columns".format(self.data.columns))
            print("The name of the columns are: {}\n".foramt([x for x in self.data.columns]))

        except Exception as es:
            print("Data type is None, load the dataset first")


    def cleanData(self):
        playType = ['Run','Pass',"Field Goal",'Punt']
        self.data = self.data[self.data["PlayType"].isin(playType)]
        self.data = self.data.dropna(subset=["PlayType", "down"])
        self.data["FieldGoalDistance"] = self.data["FieldGoalDistance"].fillna(0)

        predictorCols = ["GameID", "Drive", "qtr", "down", "time", "yrdline100", "ydstogo", "GoalToGo", "PosTeamScore", "DefTeamScore",\
                 "FieldGoalDistance", "ScoreDiff"]
        self.__setPredictorVar(predictorCols)
        target = "PlayType"
        self.__setTargetCol(target)


    def __setPredictorVar(self, predictorCols):
        df = self.data[predictorCols]
        df["time"] = df["time"].apply(lambda x: int(x.replace(':','')))
        scaler = StandardScaler().set_output(transform='pandas')
        feature_mat = scaler.fit_transform(df)
        self.predictorVar = feature_mat

    def __setTargetCol(self, targetColumn):
        target = self.data[targetColumn]
        target_col = target.replace({'Pass': 0, 'Run': 1, 'Punt':2, "Field Goal":3})
        self.targetCol = target_col

    def getPredictorVar(self):
        return self.predictorVar

    def getTargetCol(self):
        return self.targetCol

    def getTrain(self):
        try:
            return {"X-Train": self.xTrain,
                     "Y-Train": self.yTrain}
        except Exception as es:
            print("The Training dataset has not been created yet")

    def getTest(self):
        try:
            return{"X-Test": self.xTest,
                   "Y-Test": self.yTest}
        except Exception as es:
            print("The Testing dataset has not been created yet")
            
    def loadDataset(self, filePath):
        self.data = pd.read_csv(filePath)
        return self.data

    def trainTestDataset(self, testSize=0.10, randomState=24):
        self.xTrain, self.xTest, self.yTrain, self.yTest = \
        train_test_split(self.predictorVar, self.targetCol, test_size=testSize, random_state=randomState)

        print("X_train:", self.xTrain.shape); print("X_test:", self.xTest.shape); print("y_train:", self.yTrain.shape); print("y_test:", self.yTest.shape)


In [3]:
class CNNClass(MLClass):

    def __init__(self, model=None, data=None, predictorVar=None, targetCol=None, 
                 xTrain=None, xTest=None, yTest=None, yTrain=None):
        
        self.model = model

        super().__init__(data, predictorVar, targetCol,
                xTrain, xTest, yTest, yTrain)
        
        
    def createCnnModel(self, featuresNum=None, outputDim=None):
        self.model = Sequential()
        self.model.add(Dense(128, activation='relu', input_dim=featuresNum)) 
        self.model.add(Dense(64, activation='relu')) 
        self.model.add(Dense(32, activation='relu'))
        self.model.add(Dense(16, activation='relu'))
        self.model.add(Dense(outputDim))

        self.__compileModel()
        self.__getModelSummary()


    def __compileModel(self):
        self.model.compile(optimizer='adam',
                           loss='mean_squared_error',
                           metrics=['accuracy'])

    def __getModelSummary(self):
        self.model.summary()

    
    def trainModel(self, numEpochs=1):
        train = self.getTrain()
        self.model.fit(train["X-Train"], train["Y-Train"], epochs= numEpochs)

    
    def trainDataEvaluate(self):
        train = self.getTrain()
        loss, accuracy = self.model.evaluate(train["X-Train"],
                                             train["Y-Train"])

        print(f"Training Loss: {loss}")
        print(f"Training Accuracy: {accuracy}")

    def testDataEvaluate(self):
        test = self.getTest()
        loss, accuracy = self.model.evaluate(test["X-Test"],
                                             test["Y-Test"])
        
        print(f"Testing Loss: {loss}")
        print(f"Testing Accuracy: {accuracy}")

    def performanceMeasure(self):
        train = self.getTrain()
        test = self.getTest()

        x_train = train["X-Train"]
        y_train = train["Y-Train"]
        x_test = test["X-Test"]
        y_test = test["Y-Test"]

        history = self.model.fit(x_train, y_train, validation_data=(x_test, y_test))
        history_dict = history.history

        plt.plot(history_dict['accuracy'], label='accuracy')
        plt.plot(history_dict['val_accuracy'], label='val_accuracy')
        plt.xlabel('Epoch')
        plt.ylabel('Accuracy')
        plt.ylim([0, 1])
        plt.legend(loc='lower right')

#Testing

In [4]:
cnnModel = CNNClass()
data = cnnModel.loadDataset('/Users/gauravkharel/Documents/BoozAllen/Data Science/Capstone/NFLPlaybyPlay2015.csv')
cnnModel.cleanData()

  self.data = pd.read_csv(filePath)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["time"] = df["time"].apply(lambda x: int(x.replace(':','')))
  target_col = target.replace({'Pass': 0, 'Run': 1, 'Punt':2, "Field Goal":3})


In [5]:
cnnModel.trainTestDataset()
cnnModel.createCnnModel(12, 5)

X_train: (31279, 12)
X_test: (3476, 12)
y_train: (31279,)
y_test: (3476,)


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [6]:
cnnModel.trainModel(numEpochs=10)

Epoch 1/10
[1m978/978[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 312us/step - accuracy: 0.1342 - loss: 0.3370
Epoch 2/10
[1m978/978[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 305us/step - accuracy: 0.1776 - loss: 0.2004
Epoch 3/10
[1m978/978[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 309us/step - accuracy: 0.1576 - loss: 0.1977
Epoch 4/10
[1m978/978[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 299us/step - accuracy: 0.1596 - loss: 0.1954
Epoch 5/10
[1m978/978[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 302us/step - accuracy: 0.1538 - loss: 0.1907
Epoch 6/10
[1m978/978[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 304us/step - accuracy: 0.1529 - loss: 0.1890
Epoch 7/10
[1m978/978[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 302us/step - accuracy: 0.1535 - loss: 0.1888
Epoch 8/10
[1m978/978[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 301us/step - accuracy: 0.1529 - loss: 0.1876
Epoch 9/10
[1m978/978[

In [7]:
cnnModel.performanceMeasure()

[1m978/978[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 405us/step - accuracy: 0.1529 - loss: 0.1861 - val_accuracy: 0.1887 - val_loss: 0.1881


TypeError: 'History' object is not subscriptable