In [1]:
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
from sklearn.metrics import precision_score

In [117]:
#function to make sure the date is a valid date for sorting data and not one from the future
#this function will add the year onto the date as it currently doesn't have it 
def adjust_date(row):
    
    season_year = row['Season']
    month = row['Date'].split(' ')[0] 
    day = row['Date'].split(' ')[1]
    
    month_numbers = {
        "January": 1, "February": 2, "March": 3, "April": 4,
        "May": 5, "June": 6, "July": 7, "August": 8,
        "September": 9, "October": 10, "November": 11, "December": 12
    }
    month_number = month_numbers[month]

    #adjusting for january and february games as the season rolls into the next year
    year = season_year + 1 if month_number in [1, 2] else season_year

    # Return the adjusted date in MM/DD/YYYY format
    return f"{month_number:02d}/{day}/{year}"

In [118]:
#cleaning data to prepare it for machine learning
#matches.dtypes
matches = pd.read_csv('matches.csv')

#Removing playoff row error due to differing season lengths
#playoffIdx = match_df[ (match_df['Date'] == 'Playoffs')].index
#match_df =  match_df.drop(playoffIdx)
#match_df = match_df.reset_index(drop = 'true')
#match_df.to_csv('matches.csv', index=False, encoding='utf-8')
#match_df


#Removing playoff row error due to differing season lengths
playoffIdx = matches[ (matches['Date'] == 'Playoffs')].index
matches =  matches.drop(playoffIdx)
matches = matches.reset_index(drop = 'true')
matches.to_csv('matches.csv', index=False, encoding='utf-8')

#converting dates to proper dates so that they can be stored as date types
matches['Date'] = matches.apply(adjust_date, axis=1)
matches['Date'] = pd.to_datetime(matches['Date'])


#converting home/away into numeric data
matches['Home/AwayCode'] = matches['Home/Away'].astype('category').cat.codes #0 is away, 1 is home



#converting team into numeric data
#matches['TeamCode'] = matches['Team'].astype('category').cat.codes

#converting opponents into numeric data
#matches['OppCode'] = matches['Opp'].astype('category').cat.codes

#adding hour column 
matches['Hour'] = matches['Time'].str.replace(':.+', '', regex = True).astype('int')


#adding day of the week column
matches['DayCode'] = matches['Date'].dt.dayofweek
#NOTE STILL NEED TO CLEAN UP DATE FIRST


#converting all other stats from objects to ints
for column in matches.columns[10:24]:
    matches[column] = pd.to_numeric(matches[column])

#adding in team year and opp year column to make teams separate by year
matches['TeamYear'] = matches['Team'].astype('str') + '' + matches['Season'].apply(str)
matches['OppYear'] = matches['Opp'].astype('str') + '' + matches['Season'].apply(str)


matches['TeamCode'] = matches['TeamYear'].astype('category').cat.codes

matches['OppCode'] = matches['OppYear'].astype('category').cat.codes

#converting Win/Loss to numbers
matches['Target'] = (matches['Result'] == 'W').astype('int')
display(matches)

Unnamed: 0,Week,Day,Date,Time,Game Link,Result,OT,Rec,Home/Away,Opp,...,Team,Season,Home/AwayCode,Hour,DayCode,TeamYear,OppYear,TeamCode,OppCode,Target
0,1,Sun,2024-09-08,1:00PM ET,https://pro-football-reference.com/boxscores/2...,W,N,1-0,H,Arizona Cardinals,...,Buffalo Bills,2024,1,1,6,Buffalo Bills2024,Arizona Cardinals2024,19,4,1
1,2,Thu,2024-09-12,8:15PM ET,https://pro-football-reference.com/boxscores/2...,W,N,2-0,A,Miami Dolphins,...,Buffalo Bills,2024,0,8,3,Buffalo Bills2024,Miami Dolphins2024,19,99,1
2,3,Mon,2024-09-23,7:30PM ET,https://pro-football-reference.com/boxscores/2...,W,N,3-0,H,Jacksonville Jaguars,...,Buffalo Bills,2024,1,7,0,Buffalo Bills2024,Jacksonville Jaguars2024,19,74,1
3,4,Sun,2024-09-29,8:20PM ET,https://pro-football-reference.com/boxscores/2...,L,N,3-1,A,Baltimore Ravens,...,Buffalo Bills,2024,0,8,6,Buffalo Bills2024,Baltimore Ravens2024,19,14,0
4,5,Sun,2024-10-06,1:00PM ET,https://pro-football-reference.com/boxscores/2...,L,N,3-2,A,Houston Texans,...,Buffalo Bills,2024,0,1,6,Buffalo Bills2024,Houston Texans2024,19,64,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2681,13,Mon,2020-12-07,8:15PM ET,https://pro-football-reference.com/boxscores/2...,L,N,5-7,H,Buffalo Bills,...,San Francisco 49ers,2020,1,8,0,San Francisco 49ers2020,Buffalo Bills2020,135,15,0
2682,14,Sun,2020-12-13,4:25PM ET,https://pro-football-reference.com/boxscores/2...,L,N,5-8,H,Washington Football Team,...,San Francisco 49ers,2020,1,4,6,San Francisco 49ers2020,Washington Football Team2020,135,158,0
2683,15,Sun,2020-12-20,1:00PM ET,https://pro-football-reference.com/boxscores/2...,L,N,5-9,A,Dallas Cowboys,...,San Francisco 49ers,2020,0,1,6,San Francisco 49ers2020,Dallas Cowboys2020,135,40,0
2684,16,Sat,2020-12-26,4:30PM ET,https://pro-football-reference.com/boxscores/2...,W,N,6-9,A,Arizona Cardinals,...,San Francisco 49ers,2020,0,4,5,San Francisco 49ers2020,Arizona Cardinals2020,135,0,1


In [119]:
def make_predictions(data, predVals):
    rf = RandomForestClassifier(n_estimators = 1000, min_samples_split = 20, random_state = 1)
    training = data[data['Date'] < '04-04-2024']
    train = training[training['Date'] > '04-04-2019'] #cuts off the data from only the past two seasons as it tends to make the model more accurate surprisingly
    testing = data[data['Date'] > '04-04-2024']
    test = testing[testing['Date']<'12-24-2024']
    rf.fit(train[predVals], train['Target'])
    preds = rf.predict(test[predVals])
    combined = pd.DataFrame(dict(actual=test['Target'], prediction=preds))
    precision = precision_score(test['Target'],preds)
    accuracy = accuracy_score(test['Target'], preds)
    return combined, precision, accuracy, rf

In [120]:
def rolling_averages(team, cols, new_cols):
    team = team.sort_values('Date')

    # The standard 3-week average for stats to determine "form"
    rolling_stats_3 = team[cols].rolling(3, closed='left').mean()

    # 2-week rolling average for the 3rd week games
    rolling_stats_2 = team[cols].rolling(2, closed='left').mean()

    # 1-week rolling average (previous week) for 2nd week games
    rolling_stats_1 = team[cols].rolling(1, closed='left').mean()

    # Combining them
    rolling_stats = rolling_stats_3.fillna(rolling_stats_2).fillna(rolling_stats_1)

    # Filling first week with 0's as no there are no rolling averages
    rolling_stats = rolling_stats.fillna(0)

    team[new_cols] = rolling_stats
    return team

In [123]:
def rolling_win_pct(team):
    team = team.sort_values('Date')

    rolling_win_pcts = [0.0] #0's for the first week
    week_idx = 1;
    not_played_counts = 0
    while week_idx <= len(team):
        try:
            rec = team[team['Week'] == week_idx]['Rec'].iloc[0]
            wins, losses = rec.split('-')
            wins = int(wins)
            losses = int(losses)
            rolling_win_pcts.append(wins / (wins+losses))
            week_idx += 1
        except IndexError: #Takes into account the bye week
            week_idx += 1

        except AttributeError: #If games haven't been played yet and record values are null
            not_played_counts += 1
            week_idx += 1

        except ValueError: #Takes into account ties in records 
            wins, losses, draws = rec.split('-')
            wins = int(wins)
            draws = int(draws)
            losses = int(losses)
            rolling_win_pcts.append( (wins+0.5) / (wins+losses+0.5) )
            week_idx += 1
            
    while not_played_counts > 0: #fills the remaining games to be played with the current win loss ratio
        rolling_win_pcts = rolling_win_pcts.append( rolling_win_pcts[len(rolling_win_pcts) - 1])
        not_played_counts -= 1
    team['WinPctRolling'] = rolling_win_pcts
    return team

In [124]:
test = matches.set_index('TeamYear').groupby('TeamYear').apply(lambda x: rolling_win_pct(x),include_groups = False)
display(test)

Unnamed: 0_level_0,Unnamed: 1_level_0,Week,Day,Date,Time,Game Link,Result,OT,Rec,Home/Away,Opp,...,Team,Season,Home/AwayCode,Hour,DayCode,OppYear,TeamCode,OppCode,Target,WinPctRolling
TeamYear,TeamYear,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
Arizona Cardinals2020,Arizona Cardinals2020,1,Sun,2020-09-13,4:25PM ET,https://pro-football-reference.com/boxscores/2...,W,N,1-0,A,San Francisco 49ers,...,Arizona Cardinals,2020,0,4,6,San Francisco 49ers2020,0,135,1,0.000000
Arizona Cardinals2020,Arizona Cardinals2020,2,Sun,2020-09-20,4:05PM ET,https://pro-football-reference.com/boxscores/2...,W,N,2-0,H,Washington Football Team,...,Arizona Cardinals,2020,1,4,6,Washington Football Team2020,0,158,1,1.000000
Arizona Cardinals2020,Arizona Cardinals2020,3,Sun,2020-09-27,4:25PM ET,https://pro-football-reference.com/boxscores/2...,L,N,2-1,H,Detroit Lions,...,Arizona Cardinals,2020,1,4,6,Detroit Lions2020,0,50,0,1.000000
Arizona Cardinals2020,Arizona Cardinals2020,4,Sun,2020-10-04,1:00PM ET,https://pro-football-reference.com/boxscores/2...,L,N,2-2,A,Carolina Panthers,...,Arizona Cardinals,2020,0,1,6,Carolina Panthers2020,0,20,0,0.666667
Arizona Cardinals2020,Arizona Cardinals2020,5,Sun,2020-10-11,1:00PM ET,https://pro-football-reference.com/boxscores/2...,W,N,3-2,A,New York Jets,...,Arizona Cardinals,2020,0,1,6,New York Jets2020,0,120,1,0.500000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Washington Football Team2021,Washington Football Team2021,14,Sun,2021-12-12,1:00PM ET,https://pro-football-reference.com/boxscores/2...,L,N,6-7,H,Dallas Cowboys,...,Washington Football Team,2021,1,1,6,Dallas Cowboys2021,159,41,0,0.500000
Washington Football Team2021,Washington Football Team2021,15,Tue,2021-12-21,7:00PM ET,https://pro-football-reference.com/boxscores/2...,L,N,6-8,A,Philadelphia Eagles,...,Washington Football Team,2021,0,7,1,Philadelphia Eagles2021,159,126,0,0.461538
Washington Football Team2021,Washington Football Team2021,16,Sun,2021-12-26,8:20PM ET,https://pro-football-reference.com/boxscores/2...,L,N,6-9,A,Dallas Cowboys,...,Washington Football Team,2021,0,8,6,Dallas Cowboys2021,159,41,0,0.428571
Washington Football Team2021,Washington Football Team2021,17,Sun,2022-01-02,1:00PM ET,https://pro-football-reference.com/boxscores/2...,L,N,6-10,H,Philadelphia Eagles,...,Washington Football Team,2021,1,1,6,Philadelphia Eagles2021,159,126,0,0.400000


In [130]:

#Used to bypass deprecation warning but still have the 'Team' index included in the dataframe
cols = ['TmSc', 'OppSc', 'Off1stD', 'OffTotYd', 'OffPassY',
       'OffRushY', 'OffTO', 'Def1stD', 'DefTotYd', 'DefPassY', 'DefRushY',
       'DefTO', 'EPOff', 'EPDef', 'EPSp.']
new_cols = [f'{col}Rolling' for col in cols]

num_season_games = 3

matches_rolling = matches.set_index('TeamYear').groupby('TeamYear').apply(lambda x: rolling_averages(x, cols, new_cols),include_groups = False)
matches_rolling = matches_rolling.droplevel(0)
matches_rolling = matches_rolling.reset_index()

matches_rolling = matches_rolling.set_index('TeamYear').groupby('TeamYear').apply(lambda x: rolling_win_pct(x),include_groups = False)
matches_rolling = matches_rolling.droplevel(0)
matches_rolling = matches_rolling.reset_index()


#joining rows together so that the rolling averages for both home and away teams are in the same row of data

#match_tester = matches_rolling[matches_rolling['Season'] == 2024].copy(deep = True)

home_match_cols = ['Team', 'Week', 'Day', 'Date', 'Time', 'Game Link', 'OT', 'Opp',
                   'Season', 'Hour', 'DayCode', 'TeamYear', 'OppYear', 'TeamCode', 'OppCode']

away_match_cols = ['Opp', 'Week', 'Day', 'Date', 'Time', 'Game Link', 'OT', 'Team',
                    'Season', 'Hour', 'DayCode', 'OppYear', 'TeamYear', 'OppCode', 'TeamCode']

matches_rolling = matches_rolling.merge(matches_rolling, left_on = home_match_cols, right_on = away_match_cols, suffixes = (None, '_Opp'))

matches_rolling = matches_rolling.drop(['Team_Opp', 'Result_Opp', 'Home/Away_Opp', 'Opp_Opp', 'Home/AwayCode_Opp', 'TeamYear_Opp', 'OppYear_Opp',
       'TeamCode_Opp', 'OppCode_Opp', 'Target_Opp', 'TmSc_Opp', 'OppSc_Opp', 'Off1stD_Opp', 'OffTotYd_Opp', 'OffPassY_Opp', 'OffRushY_Opp', 'OffTO_Opp', 'Def1stD_Opp', 
                       'DefTotYd_Opp', 'DefPassY_Opp', 'DefRushY_Opp', 'DefTO_Opp', 'EPOff_Opp', 'EPDef_Opp', 'EPSp._Opp'], axis=1)



matches_rolling.to_csv('matches_test.csv', index=False, encoding='utf-8') #writes the data to a csv file for visualization

display(matches_rolling)
print(matches_rolling.columns)


predictorTests = []
predictorTests.append(['Home/AwayCode', 'TeamCode', 'OppCode'])
#predictorTests.append(['Off1stD', 'OffTotYd', 'OffPassY', 'OffRushY', 'OffTO', 'Def1stD', 'DefTotYd', 'DefPassY', 'DefRushY', 'DefTO', 'EPOff', 'EPDef', 'EPSp.'])
#predictorTests.append(['Off1stD', 'OffTotYd', 'OffPassY', 'OffRushY', 'OffTO', 'Def1stD', 'DefTotYd', 'DefPassY', 'DefRushY', 'DefTO'])
#predictorTests.append(['OffTotYd', 'DefTotYd', 'OffRushY', 'OffTO'])

#predictorTests.append(['OffTotYd', 'DefTotYd', 'OffRushY', 'OffTO', 'DefTO'])


#predictorTests.append(['TmSc', 'OppSc']) #should always be 100% as TmSc > OppSc = win
#predictorTests.append(['EPOff', 'EPDef', 'EPSp.'])

#predictorTests.append(['Off1stDRolling', 'OffTotYdRolling', 'OffPassYRolling', 'OffRushYRolling', 'OffTORolling',
#                       'Def1stDRolling', 'DefTotYdRolling', 'DefPassYRolling', 'DefRushYRolling', 'DefTORolling'])

#predictorTests.append(['OffTotYdRolling', 'DefTotYdRolling', 'OffRushYRolling', 'OffTORolling'])

predictorTests.append(['Off1stDRolling', 'OffTotYdRolling', 'OffPassYRolling', 'OffRushYRolling', 'OffTORolling',
                       'Def1stDRolling', 'DefTotYdRolling', 'DefPassYRolling', 'DefRushYRolling', 'DefTORolling', 
                       'Off1stDRolling_Opp', 'OffTotYdRolling_Opp', 'OffPassYRolling_Opp','OffRushYRolling_Opp', 'OffTORolling_Opp',
                       'Def1stDRolling_Opp', 'DefTotYdRolling_Opp', 'DefPassYRolling_Opp', 'DefRushYRolling_Opp',
                       'DefTORolling_Opp'])


predictorTests.append(['TmScRolling', 'OppScRolling', 'Off1stDRolling', 'OffTotYdRolling', 'OffPassYRolling', 'OffRushYRolling', 'OffTORolling',
                       'Def1stDRolling', 'DefTotYdRolling', 'DefPassYRolling', 'DefRushYRolling', 'DefTORolling', 

                       'TmScRolling_Opp', 'OppScRolling_Opp',
                       'Off1stDRolling_Opp', 'OffTotYdRolling_Opp', 'OffPassYRolling_Opp','OffRushYRolling_Opp', 'OffTORolling_Opp',
                       'Def1stDRolling_Opp', 'DefTotYdRolling_Opp', 'DefPassYRolling_Opp', 'DefRushYRolling_Opp',
                       'DefTORolling_Opp'])


predictorTests.append(['TmScRolling', 'OppScRolling', 'Off1stDRolling', 'OffTotYdRolling', 'OffPassYRolling', 'OffRushYRolling', 'OffTORolling',
                       'Def1stDRolling', 'DefTotYdRolling', 'DefPassYRolling', 'DefRushYRolling', 'DefTORolling', 

                       'TmScRolling_Opp', 'OppScRolling_Opp',
                       'Off1stDRolling_Opp', 'OffTotYdRolling_Opp', 'OffPassYRolling_Opp','OffRushYRolling_Opp', 'OffTORolling_Opp',
                       'Def1stDRolling_Opp', 'DefTotYdRolling_Opp', 'DefPassYRolling_Opp', 'DefRushYRolling_Opp',
                       'DefTORolling_Opp', 'WinPctRolling', 'WinPctRolling_Opp'])




predictorTests.append(['Off1stD', 'OffTotYd', 'OffPassY', 'OffRushY', 'OffTO', 'Def1stD', 
                       'DefTotYd', 'DefPassY', 'DefRushY', 'DefTO', 'EPOff', 'EPDef', 'EPSp.', 'Off1stDRolling', 'OffTotYdRolling', 'OffPassYRolling', 'OffRushYRolling', 'OffTORolling',
                       'Def1stDRolling', 'DefTotYdRolling', 'DefPassYRolling', 'DefRushYRolling', 'DefTORolling'])

predictorTests.append(['Off1stD', 'OffTotYd', 'OffPassY', 'OffRushY', 'OffTO', 'Def1stD', 
                       'DefTotYd', 'DefPassY', 'DefRushY', 'DefTO', 'Off1stDRolling', 'OffTotYdRolling', 'OffPassYRolling', 'OffRushYRolling', 'OffTORolling',
                       'Def1stDRolling', 'DefTotYdRolling', 'DefPassYRolling', 'DefRushYRolling', 'DefTORolling'])



#predictorTests.append(['Home/AwayCode', 'OppCode', 'TeamCode'])




rf_models = []
predictors = ['Home/AwayCode', 'TeamCode', 'OppCode']
for predictorCols in predictorTests:
    new_predictors = predictorCols
    combined, precision, accuracy, rf = make_predictions(matches_rolling, predictorCols)
    rf_models.append(rf)
    print("Predictors:", new_predictors)
    print(f"Accuracy: {accuracy * 100:.2f}%")
    print(f"Precision: {precision * 100:.2f}%")

    #This shows how many times a correct prediction was made when one team was predicted to win and the other was predicted to lose
    combined = combined.merge(matches_rolling[['Date','Team','Opp','Result']], left_index = True, right_index = True)
    merged = combined.merge(combined, left_on = ['Date', 'Team'], right_on = ['Date', 'Opp'])
    #Prints out the importance of the different features used to predict games
    feature_importances = rf.feature_importances_
    print("Feature Importances")
    for feature, importance in zip(new_predictors, feature_importances):
        print(f"{feature}: {importance}") 

    #remapping 0's and 1's to losses and wins
    mapping_cols = ['actual_x', 'actual_y', 'prediction_x', 'prediction_y']
    for col in mapping_cols:
        merged[col] = merged[col].apply(lambda x : 'W' if x == 1 else 'L')
        

    #prints correct win predictions
    print(merged[(merged['prediction_x'] == 'W') & (merged['prediction_y'] == 'L')]['actual_x'].value_counts())
    
    print("-"*200)
    print("")

Unnamed: 0,TeamYear,Week,Day,Date,Time,Game Link,Result,OT,Rec,Home/Away,...,OffTORolling_Opp,Def1stDRolling_Opp,DefTotYdRolling_Opp,DefPassYRolling_Opp,DefRushYRolling_Opp,DefTORolling_Opp,EPOffRolling_Opp,EPDefRolling_Opp,EPSp.Rolling_Opp,WinPctRolling_Opp
0,Arizona Cardinals2020,1,Sun,2020-09-13,4:25PM ET,https://pro-football-reference.com/boxscores/2...,W,N,1-0,A,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
1,Arizona Cardinals2020,2,Sun,2020-09-20,4:05PM ET,https://pro-football-reference.com/boxscores/2...,W,N,2-0,H,...,0.000000,19.000000,265.000000,208.000000,57.000000,3.000000,-6.280000,16.450000,-1.280000,1.000000
2,Arizona Cardinals2020,3,Sun,2020-09-27,4:25PM ET,https://pro-football-reference.com/boxscores/2...,L,N,2-1,H,...,1.000000,26.000000,425.500000,221.500000,204.000000,0.000000,5.445000,-18.375000,1.370000,0.000000
3,Arizona Cardinals2020,4,Sun,2020-10-04,1:00PM ET,https://pro-football-reference.com/boxscores/2...,L,N,2-2,A,...,1.333333,23.000000,382.333333,258.333333,124.000000,2.000000,5.956667,-7.220000,-1.803333,0.333333
4,Arizona Cardinals2020,5,Sun,2020-10-11,1:00PM ET,https://pro-football-reference.com/boxscores/2...,W,N,3-2,A,...,1.000000,19.666667,357.000000,217.666667,139.333333,1.333333,-11.213333,-10.076667,0.283333,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2681,Washington Football Team2021,14,Sun,2021-12-12,1:00PM ET,https://pro-football-reference.com/boxscores/2...,L,N,6-7,H,...,1.333333,22.333333,428.000000,287.333333,140.666667,2.000000,-3.273333,-2.120000,4.556667,0.666667
2682,Washington Football Team2021,15,Tue,2021-12-21,7:00PM ET,https://pro-football-reference.com/boxscores/2...,L,N,6-8,A,...,1.666667,18.666667,289.333333,206.333333,83.000000,1.333333,4.416667,1.090000,1.870000,0.461538
2683,Washington Football Team2021,16,Sun,2021-12-26,8:20PM ET,https://pro-football-reference.com/boxscores/2...,L,N,6-9,A,...,1.333333,15.333333,310.333333,184.666667,125.666667,4.000000,-3.263333,16.550000,-1.450000,0.714286
2684,Washington Football Team2021,17,Sun,2022-01-02,1:00PM ET,https://pro-football-reference.com/boxscores/2...,L,N,6-10,H,...,0.666667,16.666667,236.666667,164.333333,72.333333,1.000000,13.500000,4.056667,-2.623333,0.533333


Index(['TeamYear', 'Week', 'Day', 'Date', 'Time', 'Game Link', 'Result', 'OT',
       'Rec', 'Home/Away', 'Opp', 'TmSc', 'OppSc', 'Off1stD', 'OffTotYd',
       'OffPassY', 'OffRushY', 'OffTO', 'Def1stD', 'DefTotYd', 'DefPassY',
       'DefRushY', 'DefTO', 'EPOff', 'EPDef', 'EPSp.', 'Team', 'Season',
       'Home/AwayCode', 'Hour', 'DayCode', 'OppYear', 'TeamCode', 'OppCode',
       'Target', 'TmScRolling', 'OppScRolling', 'Off1stDRolling',
       'OffTotYdRolling', 'OffPassYRolling', 'OffRushYRolling', 'OffTORolling',
       'Def1stDRolling', 'DefTotYdRolling', 'DefPassYRolling',
       'DefRushYRolling', 'DefTORolling', 'EPOffRolling', 'EPDefRolling',
       'EPSp.Rolling', 'WinPctRolling', 'Rec_Opp', 'TmScRolling_Opp',
       'OppScRolling_Opp', 'Off1stDRolling_Opp', 'OffTotYdRolling_Opp',
       'OffPassYRolling_Opp', 'OffRushYRolling_Opp', 'OffTORolling_Opp',
       'Def1stDRolling_Opp', 'DefTotYdRolling_Opp', 'DefPassYRolling_Opp',
       'DefRushYRolling_Opp', 'DefTORolling_Opp',

In [131]:
## get week 18 games
week18 = matches_rolling[matches_rolling['Season'] == 2024].copy(deep = True)
week18 = week18[week18['Week'] == 18]
rf_idx = 0
predictors = ['Home/AwayCode', 'TeamCode', 'OppCode']
#predictorCols = ['Off1stD', 'OffTotYd', 'OffPassY', 'OffRushY', 'OffTO', 'Def1stD', 'DefTotYd',
#'DefPassY', 'DefRushY', 'DefTO', 'Off1stDRolling', 'OffTotYdRolling', 'OffPassYRolling', 'OffRushYRolling',
#'OffTORolling', 'Def1stDRolling', 'DefTotYdRolling', 'DefPassYRolling', 'DefRushYRolling', 'DefTORolling']
    
for predictorCols in predictorTests:
    new_predictors = predictorCols
    rf = rf_models[rf_idx]
    rf_idx += 1
    week18_preds = rf.predict_proba(week18[new_predictors])#predict win percentages
    week18_table = pd.DataFrame({'Predicted': week18_preds[:,1]}, index=week18.index) 
    #week18_table['Predicted'] = mapWin(week18_table['Predicted'])
    week18_table = week18_table.merge(week18[['Date', 'Team', 'Opp']], left_index=True, right_index=True)
    #week18_table['Predicted'] = week18_table['Predicted'].apply(lambda x : 'W' if x==1 else 'L')
    
    #week18_table = week18_table[ week18_table['Date'] <= pd.Timestamp.now()]
    print("Predictors:", new_predictors)
    merged18 = week18_table.merge(week18_table, left_on= ['Date', 'Team'], right_on = ['Date','Opp'])

    merged18 = merged18.rename(columns = {'Team_x':'Team', 'Predicted_x':'OrigTeamWinPct', 'Opp_x':'Opp', 'Predicted_y':'OrigOppWinPct'})


    #Normalizing win percentages so they add up to 1
    merged18['TeamWinPct'] = merged18['OrigTeamWinPct'] / (merged18['OrigTeamWinPct'] + merged18['OrigOppWinPct'])
    merged18['OppWinPct'] = merged18['OrigOppWinPct'] / (merged18['OrigTeamWinPct'] + merged18['OrigOppWinPct'])

    merged18['TeamPrediction'] = merged18['TeamWinPct'].apply(lambda x : 'W' if x > 0.5 else ('L' if x < 0.5 else 'D'))
    merged18['OppPrediction'] = merged18['OppWinPct'].apply(lambda x : 'W' if x > 0.5 else ('L' if x < 0.5 else 'D'))

    

    


    #COMMENT THIS OUT TO SEE MORE DETAILS ON THE ORIGINAL WIN PERCENTAGE PREDICTIONS
    merged18 = merged18.reindex(columns = ['Date', 'Team', 'TeamPrediction', 'TeamWinPct', 'Opp', 'OppPrediction', 'OppWinPct'])

    #AND THEN UNCOMMENT TO SEE MORE DETAILS ON THE ORIGINAL WIN PERCENTAGE PREDICTIONS
    #merged18 = merged18.reindex(columns = ['Date', 'Team', 'TeamPredictionLetter', 'TeamPrediction', 'NormalizedTeamPrediction', 'Opp', 'OppPredictionLetter', 'OppPrediction', 'NormalizedOppPrediction'])
    

    
    merged18 = merged18.style.set_properties(**{'text-align': 'center'})

    display(merged18)

        
    #print(merged18[(merged18['Predicted'] == 'W') & (merged18['prediction_y'] == 'L')]['actual_x'].value_counts())
    print("-"*200)
    print("")

Predictors: ['Home/AwayCode', 'TeamCode', 'OppCode']


Unnamed: 0,Date,Team,TeamPrediction,TeamWinPct,Opp,OppPrediction,OppWinPct
0,2025-01-05 00:00:00,Arizona Cardinals,L,0.178618,San Francisco 49ers,W,0.821382
1,2025-01-05 00:00:00,Atlanta Falcons,W,0.82326,Carolina Panthers,L,0.17674
2,2025-01-04 00:00:00,Baltimore Ravens,W,0.702873,Cleveland Browns,L,0.297127
3,2025-01-05 00:00:00,Buffalo Bills,W,0.52262,New England Patriots,L,0.47738
4,2025-01-05 00:00:00,Carolina Panthers,L,0.17674,Atlanta Falcons,W,0.82326
5,2025-01-05 00:00:00,Chicago Bears,L,0.279872,Green Bay Packers,W,0.720128
6,2025-01-04 00:00:00,Cincinnati Bengals,L,0.403899,Pittsburgh Steelers,W,0.596101
7,2025-01-04 00:00:00,Cleveland Browns,L,0.297127,Baltimore Ravens,W,0.702873
8,2025-01-05 00:00:00,Dallas Cowboys,W,0.696799,Washington Commanders,L,0.303201
9,2025-01-05 00:00:00,Denver Broncos,L,0.366404,Kansas City Chiefs,W,0.633596


--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------

Predictors: ['Off1stDRolling', 'OffTotYdRolling', 'OffPassYRolling', 'OffRushYRolling', 'OffTORolling', 'Def1stDRolling', 'DefTotYdRolling', 'DefPassYRolling', 'DefRushYRolling', 'DefTORolling', 'Off1stDRolling_Opp', 'OffTotYdRolling_Opp', 'OffPassYRolling_Opp', 'OffRushYRolling_Opp', 'OffTORolling_Opp', 'Def1stDRolling_Opp', 'DefTotYdRolling_Opp', 'DefPassYRolling_Opp', 'DefRushYRolling_Opp', 'DefTORolling_Opp']


Unnamed: 0,Date,Team,TeamPrediction,TeamWinPct,Opp,OppPrediction,OppWinPct
0,2025-01-05 00:00:00,Arizona Cardinals,W,0.533016,San Francisco 49ers,L,0.466984
1,2025-01-05 00:00:00,Atlanta Falcons,W,0.533928,Carolina Panthers,L,0.466072
2,2025-01-04 00:00:00,Baltimore Ravens,W,0.655236,Cleveland Browns,L,0.344764
3,2025-01-05 00:00:00,Buffalo Bills,W,0.730044,New England Patriots,L,0.269956
4,2025-01-05 00:00:00,Carolina Panthers,L,0.466072,Atlanta Falcons,W,0.533928
5,2025-01-05 00:00:00,Chicago Bears,L,0.274708,Green Bay Packers,W,0.725292
6,2025-01-04 00:00:00,Cincinnati Bengals,W,0.778253,Pittsburgh Steelers,L,0.221747
7,2025-01-04 00:00:00,Cleveland Browns,L,0.344764,Baltimore Ravens,W,0.655236
8,2025-01-05 00:00:00,Dallas Cowboys,L,0.439948,Washington Commanders,W,0.560052
9,2025-01-05 00:00:00,Denver Broncos,L,0.433092,Kansas City Chiefs,W,0.566908


--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------

Predictors: ['TmScRolling', 'OppScRolling', 'Off1stDRolling', 'OffTotYdRolling', 'OffPassYRolling', 'OffRushYRolling', 'OffTORolling', 'Def1stDRolling', 'DefTotYdRolling', 'DefPassYRolling', 'DefRushYRolling', 'DefTORolling', 'TmScRolling_Opp', 'OppScRolling_Opp', 'Off1stDRolling_Opp', 'OffTotYdRolling_Opp', 'OffPassYRolling_Opp', 'OffRushYRolling_Opp', 'OffTORolling_Opp', 'Def1stDRolling_Opp', 'DefTotYdRolling_Opp', 'DefPassYRolling_Opp', 'DefRushYRolling_Opp', 'DefTORolling_Opp']


Unnamed: 0,Date,Team,TeamPrediction,TeamWinPct,Opp,OppPrediction,OppWinPct
0,2025-01-05 00:00:00,Arizona Cardinals,L,0.493824,San Francisco 49ers,W,0.506176
1,2025-01-05 00:00:00,Atlanta Falcons,W,0.574116,Carolina Panthers,L,0.425884
2,2025-01-04 00:00:00,Baltimore Ravens,W,0.59767,Cleveland Browns,L,0.40233
3,2025-01-05 00:00:00,Buffalo Bills,W,0.751436,New England Patriots,L,0.248564
4,2025-01-05 00:00:00,Carolina Panthers,L,0.425884,Atlanta Falcons,W,0.574116
5,2025-01-05 00:00:00,Chicago Bears,L,0.256861,Green Bay Packers,W,0.743139
6,2025-01-04 00:00:00,Cincinnati Bengals,W,0.763688,Pittsburgh Steelers,L,0.236312
7,2025-01-04 00:00:00,Cleveland Browns,L,0.40233,Baltimore Ravens,W,0.59767
8,2025-01-05 00:00:00,Dallas Cowboys,L,0.374777,Washington Commanders,W,0.625223
9,2025-01-05 00:00:00,Denver Broncos,L,0.438114,Kansas City Chiefs,W,0.561886


--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------

Predictors: ['TmScRolling', 'OppScRolling', 'Off1stDRolling', 'OffTotYdRolling', 'OffPassYRolling', 'OffRushYRolling', 'OffTORolling', 'Def1stDRolling', 'DefTotYdRolling', 'DefPassYRolling', 'DefRushYRolling', 'DefTORolling', 'TmScRolling_Opp', 'OppScRolling_Opp', 'Off1stDRolling_Opp', 'OffTotYdRolling_Opp', 'OffPassYRolling_Opp', 'OffRushYRolling_Opp', 'OffTORolling_Opp', 'Def1stDRolling_Opp', 'DefTotYdRolling_Opp', 'DefPassYRolling_Opp', 'DefRushYRolling_Opp', 'DefTORolling_Opp', 'WinPctRolling', 'WinPctRolling_Opp']


Unnamed: 0,Date,Team,TeamPrediction,TeamWinPct,Opp,OppPrediction,OppWinPct
0,2025-01-05 00:00:00,Arizona Cardinals,W,0.510071,San Francisco 49ers,L,0.489929
1,2025-01-05 00:00:00,Atlanta Falcons,W,0.582711,Carolina Panthers,L,0.417289
2,2025-01-04 00:00:00,Baltimore Ravens,W,0.615368,Cleveland Browns,L,0.384632
3,2025-01-05 00:00:00,Buffalo Bills,W,0.78842,New England Patriots,L,0.21158
4,2025-01-05 00:00:00,Carolina Panthers,L,0.417289,Atlanta Falcons,W,0.582711
5,2025-01-05 00:00:00,Chicago Bears,L,0.212731,Green Bay Packers,W,0.787269
6,2025-01-04 00:00:00,Cincinnati Bengals,W,0.68037,Pittsburgh Steelers,L,0.31963
7,2025-01-04 00:00:00,Cleveland Browns,L,0.384632,Baltimore Ravens,W,0.615368
8,2025-01-05 00:00:00,Dallas Cowboys,L,0.358691,Washington Commanders,W,0.641309
9,2025-01-05 00:00:00,Denver Broncos,L,0.442008,Kansas City Chiefs,W,0.557992


--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------

Predictors: ['Off1stD', 'OffTotYd', 'OffPassY', 'OffRushY', 'OffTO', 'Def1stD', 'DefTotYd', 'DefPassY', 'DefRushY', 'DefTO', 'EPOff', 'EPDef', 'EPSp.', 'Off1stDRolling', 'OffTotYdRolling', 'OffPassYRolling', 'OffRushYRolling', 'OffTORolling', 'Def1stDRolling', 'DefTotYdRolling', 'DefPassYRolling', 'DefRushYRolling', 'DefTORolling']


Unnamed: 0,Date,Team,TeamPrediction,TeamWinPct,Opp,OppPrediction,OppWinPct
0,2025-01-05 00:00:00,Arizona Cardinals,L,0.498486,San Francisco 49ers,W,0.501514
1,2025-01-05 00:00:00,Atlanta Falcons,L,0.497285,Carolina Panthers,W,0.502715
2,2025-01-04 00:00:00,Baltimore Ravens,W,0.511578,Cleveland Browns,L,0.488422
3,2025-01-05 00:00:00,Buffalo Bills,W,0.504825,New England Patriots,L,0.495175
4,2025-01-05 00:00:00,Carolina Panthers,W,0.502715,Atlanta Falcons,L,0.497285
5,2025-01-05 00:00:00,Chicago Bears,L,0.489276,Green Bay Packers,W,0.510724
6,2025-01-04 00:00:00,Cincinnati Bengals,W,0.503878,Pittsburgh Steelers,L,0.496122
7,2025-01-04 00:00:00,Cleveland Browns,L,0.488422,Baltimore Ravens,W,0.511578
8,2025-01-05 00:00:00,Dallas Cowboys,L,0.499458,Washington Commanders,W,0.500542
9,2025-01-05 00:00:00,Denver Broncos,W,0.502399,Kansas City Chiefs,L,0.497601


--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------

Predictors: ['Off1stD', 'OffTotYd', 'OffPassY', 'OffRushY', 'OffTO', 'Def1stD', 'DefTotYd', 'DefPassY', 'DefRushY', 'DefTO', 'Off1stDRolling', 'OffTotYdRolling', 'OffPassYRolling', 'OffRushYRolling', 'OffTORolling', 'Def1stDRolling', 'DefTotYdRolling', 'DefPassYRolling', 'DefRushYRolling', 'DefTORolling']


Unnamed: 0,Date,Team,TeamPrediction,TeamWinPct,Opp,OppPrediction,OppWinPct
0,2025-01-05 00:00:00,Arizona Cardinals,L,0.497,San Francisco 49ers,W,0.503
1,2025-01-05 00:00:00,Atlanta Falcons,L,0.494742,Carolina Panthers,W,0.505258
2,2025-01-04 00:00:00,Baltimore Ravens,W,0.508152,Cleveland Browns,L,0.491848
3,2025-01-05 00:00:00,Buffalo Bills,L,0.497277,New England Patriots,W,0.502723
4,2025-01-05 00:00:00,Carolina Panthers,W,0.505258,Atlanta Falcons,L,0.494742
5,2025-01-05 00:00:00,Chicago Bears,L,0.489926,Green Bay Packers,W,0.510074
6,2025-01-04 00:00:00,Cincinnati Bengals,W,0.501735,Pittsburgh Steelers,L,0.498265
7,2025-01-04 00:00:00,Cleveland Browns,L,0.491848,Baltimore Ravens,W,0.508152
8,2025-01-05 00:00:00,Dallas Cowboys,L,0.498961,Washington Commanders,W,0.501039
9,2025-01-05 00:00:00,Denver Broncos,L,0.499436,Kansas City Chiefs,W,0.500564


--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------



In [215]:
homeTeam = input('Input Home Team')
awayTeam = input('Input Away Team')
#predictors = ['Home/AwayCode', 'TeamCode', 'OppCode']

predictors = ['Home/AwayCode', 'TeamCode', 'OppCode', 'Off1stDRolling', 'OffTotYdRolling', 'OffPassYRolling', 'OffRushYRolling', 'OffTORolling', 'Def1stDRolling', 'DefTotYdRolling', 'DefPassYRolling', 'DefRushYRolling', 'DefTORolling']


print(homeTeam)
print(awayTeam)

rf = rf_models[7]
data = matches_rolling[matches_rolling['Season'] == 2024].copy(deep = True)


homeTeamData = data[data['Team'] == homeTeam]
homeTeamData = homeTeamData.sort_values('Date', ascending = False)
#homeTeamData = homeTeamData.dropna(subset = 'Result')
homeTeamData = homeTeamData.iloc[0]

awayTeamData = data[data['Team'] == awayTeam]
awayTeamData = awayTeamData.sort_values('Date', ascending = False)
#awayTeamData = awayTeamData.dropna(subset = 'Result')
awayTeamData = awayTeamData.iloc[0]



matchData = homeTeamData.copy(deep = True)
matchData['Opp'] = awayTeam
matchData['DayCode'] = 6 #defaults to sunday 
matchData['OppCode'] = awayTeamData['TeamCode']
#display(matchData)

matchData = pd.Series.to_frame(matchData).transpose()

gamePreds = rf.predict_proba(matchData[predictors])

teamWinPct = gamePreds[:,1] / (gamePreds[:,1] + gamePreds[:,0])

oppWinPct = gamePreds[:,0] / (gamePreds[:,1] + gamePreds[:,0])


gameTable = pd.DataFrame({'Team': homeTeam, 'TeamPrediction': '_', 'TeamWinPct': teamWinPct, 'Opp' : awayTeam, 'OppPrediction': '_', 'OppWinPct': oppWinPct})

gameTable['TeamPrediction'] = gameTable['TeamWinPct'].apply(lambda x : 'W' if x > 0.5 else ('L' if x < 0.5 else 'D'))
gameTable['OppPrediction'] = gameTable['OppWinPct'].apply(lambda x : 'W' if x > 0.5 else ('L' if x < 0.5 else 'D'))

display(gameTable)




Input Home Team Tampa Bay Buccaneers
Input Away Team Baltimore Ravens


Tampa Bay Buccaneers
Baltimore Ravens


Unnamed: 0,Team,TeamPrediction,TeamWinPct,Opp,OppPrediction,OppWinPct
0,Tampa Bay Buccaneers,W,0.606056,Baltimore Ravens,L,0.393944
