# Exploratory Analysis - EDA

Dataprep.EDA
* Main Site - https://dataprep.ai/
* Docs - https://docs.dataprep.ai/user_guide/eda/introduction.html

## Table of Contents

[EDA DF](#eda_df)
* [Basic Data Overview](#basic_data_overview)  
* [General Exploration - Dataprep.EDA](#dataprep)  
* [Specific Questions:](#specific_questions)
    * [Average Point Spread Error Per Game Over Time](#apsepgot)  

[Game Records](#game_records)
* [Data Overview](#data_overview)  
* [Specific Questions:](#specific_questions_2)
    * [Model Performance Over Time](#mpot)
    * [Distributions of Model Predictions, Vegas Predictions, and Game Results](#dmpvpgp)
    * [Overall Winning vs. Losing Predictions](#owvlp)
    * [Win Percentage vs. Prediction Margin](#wpvpm)
* [Game Score and Component Score Accuracy](#gscsa)
    * [Overall](#overall)
    * [Game Score](#gamescore)
    * [ML Predictions](#ml)
    * [DL Predictions](#dl)
    * [Covers Consensus](#covers)
    * [FiveThirtyEight Raptor](#raptor)

## Imports and Global Settings

In [1]:
import sys
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches
import seaborn as sns
from sqlalchemy import create_engine
from datetime import datetime
from dataprep.eda import plot, plot_correlation, plot_missing, create_report
import warnings
warnings.filterwarnings('ignore')

sys.path.append('../')
from passkeys import RDS_ENDPOINT, RDS_PASSWORD

# Pandas Settings
pd.set_option('display.max_columns', 1000)
pd.set_option('display.max_rows',1000)
pd.options.display.max_info_columns = 1000
pd.options.display.precision = 5

# Graphing Settings
sns.set_theme()

## Database Connection

In [2]:
username = 'postgres'
password = RDS_PASSWORD
endpoint = RDS_ENDPOINT
database = 'nba_betting'
port = '5432'

connection = create_engine(f'postgresql+psycopg2://{username}:{password}@{endpoint}/{database}').connect()

### Datasets

In [3]:
game_records = pd.read_sql_table('game_records', connection)
combined_inbound_data = pd.read_sql_table('combined_inbound_data', connection)
model_training_data = pd.read_sql_table('model_training_data', connection)

<a id='eda_df'></a>

# EDA_DF

In [4]:
game_records.info()

In [5]:
game_records.head()

<a id='basic_data_overview'></a>

## Basic Data Overview

In [6]:
model_training_data.info(verbose=True, show_counts=True)

In [7]:
model_training_data.describe()

In [8]:
model_training_data.head(100)

In [9]:
main_features = [
        "home_team_num",
        "away_team_num",
        "home_spread",
        "league_year_end",
        "day_of_season",
        "elo1_pre",
        "elo2_pre",
        "elo_prob1",
        "elo_prob2",
    ]
targets = ['CLS_TARGET_home_margin_GT_home_spread', 'REG_TARGET_actual_home_margin']
main_features_df = model_training_data[targets + main_features]

In [10]:
main_features_df.info()

<a id='dataprep'></a>

# DataPrep Report

In [11]:
dataprep_report = create_report(main_features_df)
dataprep_report.show_browser()

<a id=specific_questions></a>

## Specific Questions

<a id=apsepgot></a>

### Average Point Spread Error Per Game Over Time

In [12]:
eda_df = game_records.copy()

In [13]:
eda_df['year'] = eda_df['date'].dt.year

In [14]:
eda_df['vegas_predicted_home_margin_of_victory'] = -eda_df['home_line']
# If negative, Home team is predicted to lose.

In [15]:
eda_df['ml_predicted_home_margin_of_victory'] = eda_df['ml_reg_prediction']
eda_df['dl_predicted_home_margin_of_victory'] = eda_df['dl_reg_prediction']

In [16]:
eda_df['actual_home_margin_of_victory'] = eda_df['game_result']

In [17]:
# calculate differences
eda_df['vegas_diff'] = eda_df['vegas_predicted_home_margin_of_victory'] - eda_df['actual_home_margin_of_victory']
eda_df['ml_diff'] = eda_df['ml_predicted_home_margin_of_victory'] - eda_df['actual_home_margin_of_victory']
eda_df['dl_diff'] = eda_df['dl_predicted_home_margin_of_victory'] - eda_df['actual_home_margin_of_victory']

# create new columns for high/low/exact
eda_df['vegas_result'] = pd.cut(eda_df['vegas_diff'], [-float('inf'), 0, float('inf')], labels=['Low', 'High'], include_lowest=True)
eda_df['ml_result'] = pd.cut(eda_df['ml_diff'], [-float('inf'), 0, float('inf')], labels=['Low', 'High'], include_lowest=True)
eda_df['dl_result'] = pd.cut(eda_df['dl_diff'], [-float('inf'), 0, float('inf')], labels=['Low', 'High'], include_lowest=True)
eda_df[['vegas_result', 'ml_result', 'dl_result']] = eda_df[['vegas_result', 'ml_result', 'dl_result']]

# absolute differences
eda_df['vegas_abs_diff'] = eda_df['vegas_diff'].abs()
eda_df['ml_abs_diff'] = eda_df['ml_diff'].abs()
eda_df['dl_abs_diff'] = eda_df['dl_diff'].abs()

In [18]:
eda_df.head(50)

In [19]:
vegas_mean_abs_diff = round(eda_df['vegas_diff'].abs().mean(), 2)
ml_mean_abs_diff = round(eda_df['ml_diff'].abs().mean(), 2)
dl_mean_abs_diff = round(eda_df['dl_diff'].abs().mean(), 2)

In [20]:
print("Mean Absolute Differences:")
print(f"Vegas: {vegas_mean_abs_diff}")
print(f"ML: {ml_mean_abs_diff}")
print(f"DL: {dl_mean_abs_diff}")

In [21]:
grouped_df = eda_df.groupby('year').agg({'vegas_abs_diff': 'mean', 'ml_abs_diff': 'mean', 'dl_abs_diff': 'mean'})
grouped_df = grouped_df.abs().round(2)

In [22]:
print("Mean Absolute Differences Grouped by Year:")
print(grouped_df)

In [23]:
# create line plot of grouped mean absolute differences

def spread_miss_graph(vegas_only=True, save=False, image_name=None):
    fig, ax = plt.subplots(figsize=(12,8))
    ax.set_title('Average Point Spread Error Per Game', fontsize=24, pad=16, fontweight='bold')
    ax.set_xlabel('Year', fontsize=18, labelpad=8, fontweight='bold')
    ax.set_ylabel('Spread Error (Points)', fontsize=18, labelpad=8, fontweight='bold')
    ax.set_ylim(top=11, bottom=8)

    sns.lineplot(x=grouped_df.index, y='vegas_abs_diff', data=grouped_df, ax=ax,
                 linewidth=4, color='#17408B', label='Vegas')

    # Vegas Only
    if vegas_only:
        ax.axhline(vegas_mean_abs_diff, color='#C9082A', linestyle='--', linewidth=2)
        ax.text(x=2014.01, y=vegas_mean_abs_diff + 0.05, s=f'Overall Average: {vegas_mean_abs_diff} PPG', color='#C9082A', fontsize=16, fontweight='bold')

    if not vegas_only:
        sns.lineplot(x=grouped_df.index, y='ml_abs_diff', data=grouped_df, ax=ax,
                     linewidth=4, color='#C9082A', label='ML')
        sns.lineplot(x=grouped_df.index, y='dl_abs_diff', data=grouped_df, ax=ax,
                     linewidth=4, color='#00A6D6', label='DL')

        ax.axhline(vegas_mean_abs_diff, color='#17408B', linestyle='--', linewidth=2,
                   label=f'Vegas (Overall Avg: {vegas_mean_abs_diff})')
        ax.axhline(ml_mean_abs_diff, color='#C9082A', linestyle='--', linewidth=2,
                   label=f'Machine Learning (Overall Avg: {ml_mean_abs_diff})')
        ax.axhline(dl_mean_abs_diff, color='#00A6D6', linestyle='--', linewidth=2,
                   label=f'Deep Learning (Overall Avg: {dl_mean_abs_diff})')

    plt.xticks(grouped_df.index, fontsize=16)
    plt.yticks(fontsize=16)
    plt.tight_layout()
    plt.legend(fontsize=16)

    if save:
        plt.savefig(f'../images/{image_name}.png', dpi=300, bbox_inches='tight')

In [24]:
spread_miss_graph(vegas_only=True, save=False, image_name="spread_miss_graph")

<a id=game_records></a>

# Game Records

## Setting Up Features, Data Manipulation

In [25]:
game_records['vegas_hv_pred'] = game_records['home_line'].apply(lambda x: -x)

In [26]:
game_records['ml_hv_pred'] = game_records['ml_reg_prediction']
game_records['dl_hv_pred'] = game_records['dl_reg_prediction']

In [27]:
game_records['vegas_miss'] = game_records.apply(lambda x: abs(x['game_result'] - x['vegas_hv_pred']),
                                                axis=1)

In [28]:
game_records['model_miss_ml'] = game_records.apply(lambda x: abs(x['game_result'] - x['ml_hv_pred']),
                                                   axis=1)
game_records['model_miss_dl'] = game_records.apply(lambda x: abs(x['game_result'] - x['dl_hv_pred']),
                                                   axis=1)

In [29]:
game_records['ml_model_v_vegas'] = game_records.apply(lambda x: 'vegas' if x['vegas_miss'] < x['model_miss_ml'] else 'ml_model', axis=1)
game_records['dl_model_v_vegas'] = game_records.apply(lambda x: 'vegas' if x['vegas_miss'] < x['model_miss_dl'] else 'dl_model', axis=1)

In [30]:
game_records['ml_pred_line_margin'] = game_records['ml_hv_pred'] - game_records['vegas_hv_pred']
game_records['dl_pred_line_margin'] = game_records['dl_hv_pred'] - game_records['vegas_hv_pred']

In [31]:
game_records['game_result_direction'] = game_records.apply(lambda x: 'Home' if x['game_result'] >= x['vegas_hv_pred'] else 'Away', axis=1)
game_records['covers_pred_direction'] = game_records['covers_home_score'].apply(lambda x: x if pd.isnull(x) else ('Home' if x >= 50 else 'Away'))
game_records['raptor_pred_direction'] = game_records['raptor_home_score'].apply(lambda x: x if pd.isnull(x) else ('Home' if x >= 50 else 'Away'))
game_records['ml_pred_direction'] = game_records['ml_home_score'].apply(lambda x: 'Home' if x >= 50 else 'Away')
game_records['dl_pred_direction'] = game_records['dl_home_score'].apply(lambda x: 'Home' if x >= 50 else 'Away')

<a id=data_overview></a>

## Data Overview

In [32]:
game_records.info()

In [33]:
game_records.describe()

In [34]:
game_records.head(50)

In [35]:
# plot(game_records)

<a id=specific_questions_2></a>

## Specific Questions

#### Future Questions
* Does Winning Percentage increase as Game Score increases?

<a id=mpot></a>

### Model Performance Over Time

#### Overall Prediction Miss Average

In [36]:
vegas_miss_avg = round(game_records['vegas_miss'].mean(), 2)
ml_model_miss_avg = round(game_records['model_miss_ml'].mean(), 2)
dl_model_miss_avg = round(game_records['model_miss_dl'].mean(), 2)

In [37]:
print(f'Vegas Miss Average: {vegas_miss_avg}')
print(f'ML Model Miss Average: {ml_model_miss_avg}')
print(f'DL Model Miss Average: {dl_model_miss_avg}')

#### Prediction Miss Average by Year

In [38]:
date_df = game_records.groupby(game_records.date.dt.year).mean()

In [39]:
date_df[['vegas_miss', 'model_miss_ml', 'model_miss_dl']]

In [40]:
fig, ax = plt.subplots(figsize=(12, 8))
sns.lineplot(data=date_df, x='date', y='vegas_miss', ax=ax, color='r', linewidth=4)
sns.lineplot(data=date_df, x='date', y='model_miss_ml', ax=ax, color='g', linewidth=4)
sns.lineplot(data=date_df, x='date', y='model_miss_dl', ax=ax, color='orange', linewidth=4)

ax.set_title('Average Prediction Miss Amount Over Time', fontsize=24, pad=16, fontweight='bold')
ax.set_xlabel('Year', fontsize=18, labelpad=8, fontweight='bold')
ax.set_ylabel('Prediction Miss (Points)', fontsize=18, labelpad=8, fontweight='bold')

red_patch = mpatches.Patch(color='r', label='Vegas')
green_patch = mpatches.Patch(color='g', label='ML Model')
orange_patch = mpatches.Patch(color='orange', label='DL Model')
ax.legend(handles=[red_patch, green_patch, orange_patch], fontsize='large')

plt.xticks(fontsize=16)
plt.yticks(fontsize=16)
plt.tight_layout()

In [41]:
date_df_2 = game_records.groupby([game_records.date.dt.year])

In [42]:
ml_win_pct_by_year = date_df_2['ml_model_v_vegas'].value_counts(normalize=True)[:, 'ml_model']
dl_win_pct_by_year = date_df_2['dl_model_v_vegas'].value_counts(normalize=True)[:, 'dl_model']

In [43]:
model_win_pct_by_year = pd.concat([ml_win_pct_by_year, dl_win_pct_by_year], axis=1).reset_index()

In [44]:
model_win_pct_by_year

In [45]:
fig, ax = plt.subplots(figsize=(12, 8))
sns.lineplot(data=model_win_pct_by_year, x='date', y='ml_model_v_vegas', ax=ax, color='g', linewidth=4)
sns.lineplot(data=model_win_pct_by_year, x='date', y='dl_model_v_vegas',
             ax=ax, color='orange', linewidth=4)

ax.set_title('Average Win Probability Over Time', fontsize=24, pad=16, fontweight='bold')
ax.set_xlabel('Year', fontsize=18, labelpad=8, fontweight='bold')
ax.set_ylabel('Win Probability', fontsize=18, labelpad=8, fontweight='bold')

green_patch = mpatches.Patch(color='g', label='ML Model')
orange_patch = mpatches.Patch(color='orange', label='DL Model')
ax.legend(handles=[green_patch, orange_patch], fontsize='large')

plt.xticks(fontsize=16)
plt.yticks(fontsize=16)
plt.tight_layout()

<a id=dmpvpgp></a>

### Distributions of Model Predictions, Vegas Predictions, and Game Results 

In [46]:
fig, ax = plt.subplots(figsize=(12,8))
sns.kdeplot(data=game_records, x="game_result", ax=ax, color='b', fill=True)
sns.kdeplot(data=game_records, x="vegas_hv_pred", ax=ax, color='r', fill=False)
sns.kdeplot(data=game_records, x="ml_hv_pred", ax=ax, color='g', fill=False)
sns.kdeplot(data=game_records, x="dl_hv_pred", ax=ax, color='orange', fill=False)

ax.set_title('Distribution of Home Team Win Margin', fontsize=24, pad=16, fontweight='bold')
ax.set_xlabel('Home Win Margin', fontsize=18, labelpad=8, fontweight='bold')
ax.set_ylabel('Density', fontsize=18, labelpad=8, fontweight='bold')

blue_patch = mpatches.Patch(color='b', label='Actual Result')
red_patch = mpatches.Patch(color='r', label='Vegas')
green_patch = mpatches.Patch(color='g', label='ML Model')
orange_patch = mpatches.Patch(color='orange', label='DL Model')
ax.legend(handles=[blue_patch, red_patch, green_patch, orange_patch], fontsize='large')

plt.xticks(fontsize=16)
plt.yticks(fontsize=16)
plt.tight_layout()

<a id='owvlp'></a>

### Overall Winning vs. Losing Predictions
Must win 52.4% to overcome -110 vig

In [47]:
game_records['ml_model_v_vegas'].value_counts(normalize=False)

In [48]:
game_records['ml_model_v_vegas'].value_counts(normalize=True)

In [49]:
game_records['dl_model_v_vegas'].value_counts(normalize=False)

In [50]:
game_records['dl_model_v_vegas'].value_counts(normalize=True)

<a id='wpvpm'></a>

### Win Percentage vs. Prediction Margin

In [51]:
fig, ax = plt.subplots(figsize=(12,8))
sns.histplot(data=game_records, x='ml_pred_line_margin',
             hue='ml_model_v_vegas', kde=True, ax=ax, bins=[0,2,4,6,8,10,15,20],
             palette=('#17408B', '#C9082A'), multiple='dodge')

ax.set_title('Distribution of Bet Wins by Prediction Margin', fontsize=24, pad=16, fontweight='bold')
ax.set_xlabel('Prediction Margin', fontsize=18, labelpad=8, fontweight='bold')
ax.set_ylabel('Count', fontsize=18, labelpad=8, fontweight='bold')

blue_patch = mpatches.Patch(color='b', label='ML Model')
red_patch = mpatches.Patch(color='r', label='Vegas')
ax.legend(handles=[blue_patch, red_patch], fontsize='large')

plt.xticks(fontsize=16)
plt.yticks(fontsize=16)
plt.tight_layout()

In [52]:
fig, ax = plt.subplots(figsize=(12,8))
sns.histplot(data=game_records, x='dl_pred_line_margin',
             hue='dl_model_v_vegas', kde=True, ax=ax, bins=[0,2,4,6,8,10,15,20],
             palette=('#17408B', '#C9082A'), multiple='dodge')

ax.set_title('Distribution of Bet Wins by Prediction Margin', fontsize=24, pad=16, fontweight='bold')
ax.set_xlabel('Prediction Margin', fontsize=18, labelpad=8, fontweight='bold')
ax.set_ylabel('Count', fontsize=18, labelpad=8, fontweight='bold')

blue_patch = mpatches.Patch(color='b', label='DL Model')
red_patch = mpatches.Patch(color='r', label='Vegas')
ax.legend(handles=[blue_patch, red_patch], fontsize='large')

plt.xticks(fontsize=16)
plt.yticks(fontsize=16)
plt.tight_layout()

<a id=gscsa></a>

## Game Score and Component Score Accuracy

In [53]:
game_records['game_score_is_correct'] = game_records.apply(lambda x: x['game_score_direction'] 
                                                           if pd.isnull(x['game_score_direction']) 
                                                           else (x['game_score_direction'] == 
                                                                 x['game_result_direction']), axis=1)
game_records['ml_pred_is_correct'] = game_records.apply(lambda x: x['ml_pred_direction'] 
                                                           if pd.isnull(x['ml_pred_direction']) 
                                                           else (x['ml_pred_direction'] == 
                                                                 x['game_result_direction']), axis=1)
game_records['dl_pred_is_correct'] = game_records.apply(lambda x: x['dl_pred_direction'] 
                                                           if pd.isnull(x['dl_pred_direction']) 
                                                           else (x['dl_pred_direction'] == 
                                                                 x['game_result_direction']), axis=1)
game_records['covers_is_correct'] = game_records.apply(lambda x: x['covers_pred_direction'] 
                                                           if pd.isnull(x['covers_pred_direction']) 
                                                           else int(x['covers_pred_direction'] == 
                                                                 x['game_result_direction']), axis=1)
game_records['raptor_is_correct'] = game_records.apply(lambda x: x['raptor_pred_direction'] 
                                                           if pd.isnull(x['raptor_pred_direction']) 
                                                           else int(x['raptor_pred_direction'] == 
                                                                 x['game_result_direction']), axis=1)

In [54]:
# Date Restrictions
previous_years_only_df = game_records[game_records['date'] < datetime.strptime('20221001', "%Y%m%d")]
this_year_only_df = game_records[game_records['date'] > datetime.strptime('20221001', "%Y%m%d")]

<a id=overall></a>

### Overall Accuracy

In [55]:
game_score_accuracy = game_records['game_score_is_correct'].mean()
ml_pred_accuracy = game_records['ml_pred_is_correct'].mean()
dl_pred_accuracy = game_records['dl_pred_is_correct'].mean()
covers_accuracy = game_records['covers_is_correct'].mean()
raptor_accuracy = game_records['raptor_is_correct'].mean()

game_score_accuracy_py = previous_years_only_df['game_score_is_correct'].mean()
ml_pred_accuracy_py = previous_years_only_df['ml_pred_is_correct'].mean()
dl_pred_accuracy_py = previous_years_only_df['dl_pred_is_correct'].mean()
covers_accuracy_py = previous_years_only_df['covers_is_correct'].mean()
raptor_accuracy_py = previous_years_only_df['raptor_is_correct'].mean()

game_score_accuracy_ty = this_year_only_df['game_score_is_correct'].mean()
ml_pred_accuracy_ty = this_year_only_df['ml_pred_is_correct'].mean()
dl_pred_accuracy_ty = this_year_only_df['dl_pred_is_correct'].mean()
covers_accuracy_ty = this_year_only_df['covers_is_correct'].mean()
raptor_accuracy_ty = this_year_only_df['raptor_is_correct'].mean()

In [56]:
print('Game Score Accuracy')
print(f'Overall: {round(game_score_accuracy * 100, 2)}% - Previous Years: {round(game_score_accuracy_py * 100, 2)}% - This Year: {round(game_score_accuracy_ty * 100, 2)}%')
print('ML Pred Accuracy')
print(f'Overall: {round(ml_pred_accuracy * 100, 2)}% - Previous Years: {round(ml_pred_accuracy_py * 100, 2)}% - This Year: {round(ml_pred_accuracy_ty * 100, 2)}%')
print('DL Pred Accuracy')
print(f'Overall: {round(dl_pred_accuracy * 100, 2)}% - Previous Years: {round(dl_pred_accuracy_py * 100, 2)}% - This Year: {round(dl_pred_accuracy_ty * 100, 2)}%')
print('Covers Accuracy')
print(f'Overall: {round(covers_accuracy * 100, 2)}% - Previous Years: {round(covers_accuracy_py * 100, 2)}% - This Year: {round(covers_accuracy_ty * 100, 2)}%')
print('Raptor Accuracy')
print(f'Overall: {round(raptor_accuracy * 100, 2)}% - Previous Years: {round(raptor_accuracy_py * 100, 2)}% - This Year: {round(raptor_accuracy_ty * 100, 2)}%')

In [57]:
x_data = ['Random Guess', 'Profitable', 'Game Score', 'ML Model', 'DL Model', 'Covers', 'Raptor']
y_data = [50.0,
          52.4,
          round(game_score_accuracy_ty * 100, 2),
          round(ml_pred_accuracy_ty * 100, 2),
          round(dl_pred_accuracy_ty * 100, 2),
          round(covers_accuracy_ty * 100, 2),
          round(raptor_accuracy_ty * 100, 2)]

In [58]:
fig, ax = plt.subplots(figsize=(12,4))
ax = sns.barplot(x=x_data, y=y_data,
             palette=('gray', 'gray', 'green', '#17408B', '#C9082A','yellow', 'purple'))

ax.set_title('Prediction Accuracy', fontsize=24, pad=16, fontweight='bold')
ax.set_ylabel('Accuracy %', fontsize=18, labelpad=8, fontweight='bold')

for p in ax.patches:
             ax.annotate(f'{p.get_height()}', (p.get_x() + p.get_width() / 2., p.get_height()),
                 ha='center', va='center', fontsize=20, fontweight='bold', color='white', xytext=(0, -20),
                 textcoords='offset points')

plt.xticks(fontsize=16)
plt.yticks(fontsize=16)
plt.tight_layout()

image_name = 'prediction_accuracy'
# plt.savefig(f'../images/{image_name}.png', dpi=300, bbox_inches='tight')

<a id=gamescore></a>

### Game Score Accuracy

In [59]:
overall_game_score_accuracy = game_records.groupby(pd.cut(game_records['game_score'],
                                                          bins=list(range(0, 100, 5))))['game_score_is_correct'].agg(['mean', 'size']).reset_index()
previous_years_game_score_accuracy = previous_years_only_df.groupby(pd.cut(previous_years_only_df['game_score'],
                                                          bins=list(range(0, 100, 5))))['game_score_is_correct'].agg(['mean', 'size']).reset_index()
this_year_game_score_accuracy = this_year_only_df.groupby(pd.cut(this_year_only_df['game_score'],
                                                          bins=list(range(0, 100, 5))))['game_score_is_correct'].agg(['mean', 'size']).reset_index()

In [60]:
overall_game_score_accuracy['bin_high'] = overall_game_score_accuracy['game_score'].apply(lambda x: x.right)
previous_years_game_score_accuracy['bin_high'] = previous_years_game_score_accuracy['game_score'].apply(lambda x: x.right)
this_year_game_score_accuracy['bin_high'] = this_year_game_score_accuracy['game_score'].apply(lambda x: x.right)

In [61]:
print(overall_game_score_accuracy, '\n\n', previous_years_game_score_accuracy, '\n\n', this_year_game_score_accuracy)

In [62]:
fig, ax = plt.subplots(figsize=(12, 8))
sns.lineplot(data=overall_game_score_accuracy, x='bin_high', y='mean', ax=ax, color='r', linewidth=4)
sns.lineplot(data=this_year_game_score_accuracy, x='bin_high', y='mean', ax=ax, color='b', linewidth=4)

ax.set_title('Overall Win Percentage by Game Score', fontsize=24, pad=16, fontweight='bold')
ax.set_xlabel('Game Score', fontsize=18, labelpad=8, fontweight='bold')
ax.set_ylabel('Win Percentage', fontsize=18, labelpad=8, fontweight='bold')

red_patch = mpatches.Patch(color='r', label='Overall')
blue_patch = mpatches.Patch(color='b', label='Current Year')
ax.legend(handles=[red_patch, blue_patch], fontsize='large')

plt.xticks(fontsize=16)
plt.yticks(fontsize=16)
plt.tight_layout()

<a id=ml></a>

### ML Prediction Accuracy

In [63]:
overall_ml_pred_accuracy = game_records.groupby(pd.cut(game_records['ml_home_score'],
                                                          bins=list(range(0, 100, 5))))['ml_pred_is_correct'].agg(['mean', 'size']).reset_index()
previous_years_ml_pred_accuracy = previous_years_only_df.groupby(pd.cut(previous_years_only_df['ml_home_score'],
                                                          bins=list(range(0, 100, 5))))['ml_pred_is_correct'].agg(['mean', 'size']).reset_index()
this_year_ml_pred_accuracy = this_year_only_df.groupby(pd.cut(this_year_only_df['ml_home_score'],
                                                          bins=list(range(0, 100, 5))))['ml_pred_is_correct'].agg(['mean', 'size']).reset_index()

In [64]:
overall_ml_pred_accuracy['bin_high'] = overall_ml_pred_accuracy['ml_home_score'].apply(lambda x: x.right)
previous_years_ml_pred_accuracy['bin_high'] = previous_years_ml_pred_accuracy['ml_home_score'].apply(lambda x: x.right)
this_year_ml_pred_accuracy['bin_high'] = this_year_ml_pred_accuracy['ml_home_score'].apply(lambda x: x.right)

In [65]:
print(overall_ml_pred_accuracy, '\n\n', previous_years_ml_pred_accuracy, '\n\n', this_year_ml_pred_accuracy)

In [66]:
fig, ax = plt.subplots(figsize=(12, 8))
sns.lineplot(data=overall_ml_pred_accuracy, x='bin_high', y='mean', ax=ax, color='r', linewidth=4)
sns.lineplot(data=this_year_ml_pred_accuracy, x='bin_high', y='mean', ax=ax, color='b', linewidth=4)

ax.set_title('Overall Win Percentage by ML Home Score', fontsize=24, pad=16, fontweight='bold')
ax.set_xlabel('ML Home Score', fontsize=18, labelpad=8, fontweight='bold')
ax.set_ylabel('Win Percentage', fontsize=18, labelpad=8, fontweight='bold')

red_patch = mpatches.Patch(color='r', label='Overall')
blue_patch = mpatches.Patch(color='b', label='Current Year')
ax.legend(handles=[red_patch, blue_patch], fontsize='large')

plt.xticks(fontsize=16)
plt.yticks(fontsize=16)
plt.tight_layout()

<a id=dl></a>

### DL Prediction Accuracy

In [67]:
overall_dl_pred_accuracy = game_records.groupby(pd.cut(game_records['dl_home_score'],
                                                          bins=list(range(0, 100, 5))))['dl_pred_is_correct'].agg(['mean', 'size']).reset_index()
previous_years_dl_pred_accuracy = previous_years_only_df.groupby(pd.cut(previous_years_only_df['dl_home_score'],
                                                          bins=list(range(0, 100, 5))))['dl_pred_is_correct'].agg(['mean', 'size']).reset_index()
this_year_dl_pred_accuracy = this_year_only_df.groupby(pd.cut(this_year_only_df['dl_home_score'],
                                                          bins=list(range(0, 100, 5))))['dl_pred_is_correct'].agg(['mean', 'size']).reset_index()

In [68]:
overall_dl_pred_accuracy['bin_high'] = overall_dl_pred_accuracy['dl_home_score'].apply(lambda x: x.right)
previous_years_dl_pred_accuracy['bin_high'] = previous_years_dl_pred_accuracy['dl_home_score'].apply(lambda x: x.right)
this_year_dl_pred_accuracy['bin_high'] = this_year_dl_pred_accuracy['dl_home_score'].apply(lambda x: x.right)

In [69]:
print(overall_dl_pred_accuracy, '\n\n', previous_years_dl_pred_accuracy, '\n\n', this_year_dl_pred_accuracy)

In [70]:
fig, ax = plt.subplots(figsize=(12, 8))
sns.lineplot(data=overall_dl_pred_accuracy, x='bin_high', y='mean', ax=ax, color='r', linewidth=4)
sns.lineplot(data=this_year_dl_pred_accuracy, x='bin_high', y='mean', ax=ax, color='b', linewidth=4)

ax.set_title('Overall Win Percentage by DL Home Score', fontsize=24, pad=16, fontweight='bold')
ax.set_xlabel('DL Home Score', fontsize=18, labelpad=8, fontweight='bold')
ax.set_ylabel('Win Percentage', fontsize=18, labelpad=8, fontweight='bold')

red_patch = mpatches.Patch(color='r', label='Overall')
blue_patch = mpatches.Patch(color='b', label='Current Year')
ax.legend(handles=[red_patch, blue_patch], fontsize='large')

plt.xticks(fontsize=16)
plt.yticks(fontsize=16)
plt.tight_layout()

<a id=covers></a>

### Covers Accuracy

In [71]:
overall_covers_accuracy = game_records.groupby(pd.cut(game_records['covers_home_score'],
                                                          bins=list(range(0, 100, 5))))['covers_is_correct'].agg(['mean', 'size']).reset_index()
previous_years_covers_accuracy = previous_years_only_df.groupby(pd.cut(previous_years_only_df['covers_home_score'],
                                                          bins=list(range(0, 100, 5))))['covers_is_correct'].agg(['mean', 'size']).reset_index()
this_year_covers_accuracy = this_year_only_df.groupby(pd.cut(this_year_only_df['covers_home_score'],
                                                          bins=list(range(0, 100, 5))))['covers_is_correct'].agg(['mean', 'size']).reset_index()

In [72]:
overall_covers_accuracy['bin_high'] = overall_covers_accuracy['covers_home_score'].apply(lambda x: x.right)
previous_years_covers_accuracy['bin_high'] = previous_years_covers_accuracy['covers_home_score'].apply(lambda x: x.right)
this_year_covers_accuracy['bin_high'] = this_year_covers_accuracy['covers_home_score'].apply(lambda x: x.right)

In [73]:
print(overall_covers_accuracy, '\n\n', previous_years_covers_accuracy, '\n\n', this_year_covers_accuracy)

In [74]:
fig, ax = plt.subplots(figsize=(12, 8))
sns.lineplot(data=overall_covers_accuracy, x='bin_high', y='mean', ax=ax, color='r', linewidth=4)
sns.lineplot(data=this_year_covers_accuracy, x='bin_high', y='mean', ax=ax, color='b', linewidth=4)

ax.set_title('Overall Win Percentage by Covers Home Score', fontsize=24, pad=16, fontweight='bold')
ax.set_xlabel('Covers Home Score', fontsize=18, labelpad=8, fontweight='bold')
ax.set_ylabel('Win Percentage', fontsize=18, labelpad=8, fontweight='bold')

red_patch = mpatches.Patch(color='r', label='Overall')
blue_patch = mpatches.Patch(color='b', label='Current Year')
ax.legend(handles=[red_patch, blue_patch], fontsize='large')

plt.xticks(fontsize=16)
plt.yticks(fontsize=16)
plt.tight_layout()

<a id=raptor></a>

### Raptor Accuracy

In [75]:
overall_raptor_accuracy = game_records.groupby(pd.cut(game_records['raptor_home_score'],
                                                          bins=list(range(0, 100, 5))))['raptor_is_correct'].agg(['mean', 'size']).reset_index()
previous_years_raptor_accuracy = previous_years_only_df.groupby(pd.cut(previous_years_only_df['raptor_home_score'],
                                                          bins=list(range(0, 100, 5))))['raptor_is_correct'].agg(['mean', 'size']).reset_index()
this_year_raptor_accuracy = this_year_only_df.groupby(pd.cut(this_year_only_df['raptor_home_score'],
                                                          bins=list(range(0, 100, 5))))['raptor_is_correct'].agg(['mean', 'size']).reset_index()

In [76]:
overall_raptor_accuracy['bin_high'] = overall_raptor_accuracy['raptor_home_score'].apply(lambda x: x.right)
previous_years_raptor_accuracy['bin_high'] = previous_years_raptor_accuracy['raptor_home_score'].apply(lambda x: x.right)
this_year_raptor_accuracy['bin_high'] = this_year_raptor_accuracy['raptor_home_score'].apply(lambda x: x.right)

In [77]:
print(overall_raptor_accuracy, '\n\n', previous_years_raptor_accuracy, '\n\n', this_year_raptor_accuracy)

In [78]:
fig, ax = plt.subplots(figsize=(12, 8))
sns.lineplot(data=overall_raptor_accuracy, x='bin_high', y='mean', ax=ax, color='r', linewidth=4)
sns.lineplot(data=this_year_raptor_accuracy, x='bin_high', y='mean', ax=ax, color='b', linewidth=4)

ax.set_title('Overall Win Percentage by Raptor Home Score', fontsize=24, pad=16, fontweight='bold')
ax.set_xlabel('Raptor Home Score', fontsize=18, labelpad=8, fontweight='bold')
ax.set_ylabel('Win Percentage', fontsize=18, labelpad=8, fontweight='bold')

red_patch = mpatches.Patch(color='r', label='Overall')
blue_patch = mpatches.Patch(color='b', label='Current Year')
ax.legend(handles=[red_patch, blue_patch], fontsize='large')

plt.xticks(fontsize=16)
plt.yticks(fontsize=16)
plt.tight_layout()