# Model Prediction Notebook

The models and columns used are loaded from the other notebook.

In [22]:
from joblib import load

best_reg_model = load("best_reg_model.joblib")
best_win_model = load("best_win_model.joblib")
best_spread_model = load("best_spread_model.joblib")
stats_per_col = load("stats_per_col.joblib")
stats_total = load("stats_total.joblib")

The **AddedFeatures** method is rewritten here, as it is necessary to re-run the pipelines. These pipelines are also loaded.

In [23]:
from sklearn.base import BaseEstimator, TransformerMixin

class AddedFeatures(BaseEstimator, TransformerMixin):
    """Add relevent features to NFL stats dataframe"""
    def fit(self, X, y=None):
        return self
    def transform(self, X, y=None):
        X_copy = X.copy()
        X_copy["pass_comp_perc_home"] = X_copy["pass_completions_home"] / X_copy["pass_attempts_home"]
        X_copy["pass_comp_perc_away"] = X_copy["pass_completions_away"] / X_copy["pass_attempts_away"]
        X_copy["first_downs_pass_perc_home"] = X_copy["pass_first_downs_home"] / X_copy["first_downs_home"]
        X_copy["first_downs_pass_perc_away"] = X_copy["pass_first_downs_away"] / X_copy["first_downs_away"]
        X_copy["first_downs_rush_perc_home"] = X_copy["rush_first_downs_home"] / X_copy["first_downs_home"]
        X_copy["first_downs_rush_perc_away"] = X_copy["rush_first_downs_away"] / X_copy["first_downs_away"]
        return X_copy
    
nfl_pipe = load("nfl_pipe.joblib")
win_pipe = load("win_pipe.joblib")
spread_pipe = load("spread_pipe.joblib")

The function used to make predictions is re-written here.

In [24]:
from sportsreference.nfl.teams import Teams
import pandas as pd

chosen_col = stats_per_col + stats_total + ["abbreviation"]

def predict_outcome(year, games, model, model_type="reg"):
    """
    Predicts outcome for NFL games based on type of model. Takes in list of tuples or lists for games 
    parameter, with tuples of format (away team, home team, home spread).
    """   
    if any(len(game[0]) > 3 or len(game[1]) > 3 for game in games):
        teams = Teams(year=year)
        team_names = dict()
        for team in teams:     
            team_names[team.name] = team.abbreviation
    all_results = pd.DataFrame()
    for game in games:
        if len(game[1]) > 3:
            home_team = team_names[game[1]]
        else:
            home_team = game[1]
        if len(game[0]) > 3:
            away_team = team_names[game[0]]
        else:
            away_team = game[0]
        teams = Teams(year=year)
        home_df = teams(home_team).dataframe
        home_name = home_df["name"]
        home_df[stats_total] = home_df[stats_total].\
            div(home_df["wins"] + home_df["losses"], axis=0)
        home_df = home_df[chosen_col]
        away_df = teams(away_team).dataframe
        away_name = away_df["name"]
        away_df[stats_total] = away_df[stats_total].\
            div(away_df["wins"] + away_df["losses"], axis=0)
        away_df = away_df[chosen_col]
        home_df.columns = home_df.columns + "_home"
        away_df.columns = away_df.columns + "_away"
        full_series = pd.concat([home_df.iloc[0], away_df.iloc[0]])
        full_series.drop(["abbreviation_home", "abbreviation_away"], inplace=True)
        full_df = full_series.to_frame().T
        full_df.fillna(0, inplace=True)
        if model_type == "reg":
            full_df_prepared = nfl_pipe.transform(full_df)
            
            mod_metric = model.predict(full_df_prepared)
            full_results = {"Away Team": away_name.iloc[0], 
                        "Home Team": home_name.iloc[0],
                        "Home Spread": round(mod_metric[0], 1)}
        elif model_type == "win":
            full_df_prepared = win_pipe.transform(full_df)
            prob = model.predict_proba(full_df_prepared)
            home_prob = prob[0][1]
            away_prob = prob[0][0]
            full_results = {"Away Team": away_name.iloc[0], 
                        "Home Team": home_name.iloc[0],
                        "Away Probability": "{}%".format(round(away_prob * 100, 1)),
                        "Home Probability": "{}%".format(round(home_prob * 100, 1))}
        elif model_type == "spread":
            spread = game[2]
            full_df["home_spread"] = spread
            cols = full_df.columns.tolist()
            new_cols = [cols[-1]] + cols[:-1]
            full_df = full_df[new_cols]
            full_df_prepared = spread_pipe.transform(full_df)
            
            prob = model.predict_proba(full_df_prepared)
            away_spread = prob[0][1]
            home_spread = prob[0][0]
            full_results = {"Away Team": away_name.iloc[0], 
                        "Home Team": home_name.iloc[0],
                        "Away Team Beat Spread": "{}%".format(round(away_spread * 100, 1)),
                        "Home Team Beat Spread": "{}%".format(round(home_spread * 100, 1))}
        full_results_df = pd.DataFrame(full_results, index=[0])
        all_results = pd.concat([all_results, full_results_df], axis=0)
    all_results = all_results.reset_index().drop("index", axis=1)
    return all_results

This is an example run for predicting the spread from week 3.

In [25]:
predict_outcome(2020, [("MIA", "JAX", -2.5), ("CHI", "ATL", -2.5),
                       ("RAM", "BUF", -1.5), ("WAS", "CLE", -7),
                       ("OTI", "MIN", 2.5), ("RAI", "NWE", -7),
                       ("SFO", "NYG", 3), ("CIN", "PHI", -6),
                       ("HTX", "PIT", -3.5), ("NYJ", "CLT", -12),
                       ("CAR", "SDG", -6.5), ("TAM", "DEN", 5.5),
                       ("DET", "CRD", -5), ("DAL", "SEA", -5),
                       ("GNB", "NOR", -3.5), ("KAN", "RAV", -3.5)],
               best_spread_model, model_type="spread")

Unnamed: 0,Away Team,Home Team,Away Team Beat Spread,Home Team Beat Spread
0,Miami Dolphins,Jacksonville Jaguars,82.6%,17.4%
1,Chicago Bears,Atlanta Falcons,65.8%,34.2%
2,Los Angeles Rams,Buffalo Bills,34.3%,65.7%
3,Washington Football Team,Cleveland Browns,37.9%,62.1%
4,Tennessee Titans,Minnesota Vikings,67.8%,32.2%
5,Las Vegas Raiders,New England Patriots,67.1%,32.9%
6,San Francisco 49ers,New York Giants,70.6%,29.4%
7,Cincinnati Bengals,Philadelphia Eagles,83.1%,16.9%
8,Houston Texans,Pittsburgh Steelers,36.3%,63.7%
9,New York Jets,Indianapolis Colts,39.2%,60.8%
