In [None]:
import numpy as np
import pandas as pd
import requests
import cfbd
from cfbd.rest import ApiException

In [None]:
dataset = pd.DataFrame()

In [None]:
# Configure API key authorization: ApiKeyAuth
configuration = cfbd.Configuration()
configuration.api_key['Authorization'] = '+eiRK37qNRqjoDcgyigTAWo4QyGrMtYR1o/hifwQmzj8hO1OwzwDdbjRnJKfSr2t'
configuration.api_key_prefix['Authorization'] = 'Bearer'

for year in range (2021, 2024):
    api_config = cfbd.ApiClient(configuration)
    for week in range (1,16):
        response = plays = cfbd.PlaysApi(api_config).get_plays(year=year, week=week, offense='Michigan')
        data = (pd.DataFrame.from_records([p.to_dict() for p in plays]))
        dataset = pd.concat([dataset, data]) 
          
      
dataset

In [None]:
#choose data for predicting the plays. Not all data we have is going to be relevant 
data = dataset[['home', 'away', 'offense_score', 'defense_score', 'period', 'clock', 'yards_to_goal', 'down', 'distance', 'play_type']]

data

In [None]:
#create column with 1 (true) and 0 (false) to see when Michihgan (team predicting for) is home or not
data['is_home'] = np.where(data['home'] == 'Michigan', 1, 0)

data.head(1)

In [None]:
pd.DataFrame(data['clock'].tolist())

In [None]:
#split clock into minutes and seconds
data[['minutes','seconds']] = pd.DataFrame(data['clock'].tolist())

data

In [None]:
#get total seconds remaining now
data['seconds_remaining'] = ((data['minutes'] * 60) + data['seconds'])

data

In [None]:
data['play_type'].unique()

In [None]:
#define each plays type
pass_types = ['Pass Reception', 'Pass Interception Return', 'Pass Incompletion', 'Sack', 'Passing Touchdown', 'Interception Return Touchdown']
rush_types = ['Rush', 'Rushing Touchdown']
punt_types = ['Punt', 'Punt Return Touchdown', 'Blocked Punt', 'Blocked Punt Touchdown']
fg_types = ['Field Goal Good', 'Field Goal Missed', 'Blocked Field Goal']

#write function to get the play type into the dataframe
def getPlayCall(x):
    if x in pass_types:
            return 'pass'
    elif x in rush_types:
        return 'rush'
    elif x in punt_types:
        return 'punt'
    elif x in fg_types:
        return 'fg'
    else:
        return None

#get the play type into the dataframe
data['play_call'] = data['play_type'].apply(getPlayCall)


data.head()

In [None]:
#drop the rows with 'none' as play type

data.dropna(subset=['play_call'], inplace = True)

data.head()

In [None]:
plays = data[['offense_score', 'defense_score', 'period', 'yards_to_goal', 'down', 'distance', 'is_home', 'seconds_remaining', 'play_call']]

plays.head()

In [None]:
#import training and test split and randm forest clssifier
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier

In [None]:
#split the data between out independent variables (the stuff used to predict) and the dependent variable (the thing we are trying to predict (the play call))
play_calls = plays['play_call']
plays = plays.drop(['play_call'], axis = 1)


In [None]:
#split the data into training and test sets
plays_train, plays_validation, calls_train, calls_validation = train_test_split(plays, play_calls, train_size=0.8, test_size=0.2, random_state=0)
plays_train.head()

In [None]:
#convert our play calls (pass, rush, punt, fg) to numeric for predicting
y, y_keys = pd.factorize(calls_train)

In [None]:
#build the classifier
classifier = RandomForestClassifier(random_state=0, n_estimators=100)

# train the classifier with our test set
classifier.fit(plays_train, y)

In [None]:
classifier.predict(plays_validation)

In [None]:
(classifier.predict_proba(plays_validation)[0:10]).tolist()


In [None]:
#map labels back to preidctions
predicted_calls = y_keys[classifier.predict(plays_validation)]

predicted_calls

In [None]:
#compare predicted to actual output
pd.crosstab(calls_validation, predicted_calls, rownames=['Actual Calls'], colnames=['Predicted Calls'])

In [None]:
#see where most of the 'weight' of our predictions is at
list(zip(plays_train, classifier.feature_importances_))

In [None]:
# incorporate period into seconds_remaining
plays['seconds_remaining'] = ((4 - plays['period']) * 15 * 60 ) + plays['seconds_remaining']

# drop is_home and period columns
plays = plays.drop(columns=['is_home', 'period'])

In [None]:
plays

In [None]:
plays_train, plays_validation, calls_train, calls_validation = train_test_split(plays, play_calls, train_size=0.8, test_size=0.2, random_state=0)
y, y_keys = pd.factorize(calls_train)

classifier = RandomForestClassifier(n_estimators=100, random_state=0)
classifier.fit(plays_train, y)

predicted_calls = y_keys[classifier.predict(plays_validation)]

pd.crosstab(calls_validation, predicted_calls, rownames=['Actual Calls'], colnames=['Predicted Calls'])

In [None]:
#see where most of the 'weight' of our predictions is at
list(zip(plays_train, classifier.feature_importances_))

In [None]:
# calculate new scoring margin field and drop the individual score columns. Find out how much who we are prediciting for is down or up
plays['margin'] = plays['offense_score'] - plays['defense_score']
plays = plays.drop(columns=['offense_score', 'defense_score'])
plays


In [None]:
plays_train, plays_validation, calls_train, calls_validation = train_test_split(plays, play_calls, train_size=0.8, test_size=0.2, random_state=0)
y, y_keys = pd.factorize(calls_train)

classifier = RandomForestClassifier(n_estimators=100, random_state=0)
classifier.fit(plays_train, y)

predicted_calls = y_keys[classifier.predict(plays_validation)]

pd.crosstab(calls_validation, predicted_calls, rownames=['Actual Calls'], colnames=['Predicted Calls'])

In [None]:
list(zip(plays_train, classifier.feature_importances_))

In [None]:
from sklearn.metrics import accuracy_score
accuracy = accuracy_score(calls_validation, predicted_calls)
accuracy

In [None]:
classifier.predict_proba(plays_validation)[0:10]

In [None]:
#create function used to predict plays
def predict_call(yards, down, distance, seconds, margin):
    test_plays = pd.DataFrame({'yards_to_goal': [yards], 'down': [down], 'distance': [distance], 'seconds_remaining': [seconds], 'margin': [margin]})
    call = (classifier.predict_proba(test_plays)[0]).tolist()
    columns = ['PROBA']
    columnsx=y_keys[classifier.classes_].to_list()
    columnsx
    rows = columnsx
    data = call
    df = pd.DataFrame(data=data, index=rows, columns=columns)
    df['PROBA']=df['PROBA']*100
    df = df.transpose()
    return df
    # return y_keys[classifier.predict(test_plays)][0]

In [None]:
call = predict_call(27,4,7,720,-3)
call

In [None]:
max_pred_call = call.loc['PROBA'].idxmax()
max_pred_call

In [None]:
if max_pred_call == 'fg':
    final_pred = 'Field Goal'
elif max_pred_call == 'pass':
    final_pred = 'Pass'
elif max_pred_call == 'rush':
    final_pred = 'Rush'
elif max_pred_call == 'punt':
    final_pred = 'Punt'

In [None]:
print(final_pred)

In [None]:
import joblib

In [None]:
saved_model = joblib.dump(classifier, 'cfbplaypred.joblib') 

In [None]:
loaded_model = joblib.load('cfbplaypred.joblib')

In [None]:
loaded_model.predict_proba(plays_validation)[0:10]