In [None]:
# Import Dependencies
import pandas as pd
import numpy as np

import matplotlib.pyplot as plt

import plotly.express as px
import plotly.graph_objects as go

In [None]:
# Read in the CSV files from previous script
record_df = pd.read_csv("resources/record.csv")
ats_df = pd.read_csv("resources/ats.csv")
ou_df = pd.read_csv("resources/ou.csv")

In [None]:
# Create a unique identifier to combine team and year
record_df['UID'] = record_df['Year'].astype(str) + record_df['Team'].astype(str)
ats_df['UID'] = ats_df['Year'].astype(str) + ats_df['Team'].astype(str)
ou_df['UID'] = ou_df['Year'].astype(str) + ou_df['Team'].astype(str)

# Merge the DataFrames into a larger dataset
df1 = pd.merge(record_df, ats_df, on='UID')
gambling_df = pd.merge(df1, ou_df, on='UID')

# Drop repeat columns
columns_to_drop = ['Team_x', 'MOV_x', 'ATS_x', 'Year_x', 'UID',
       'Team_y', 'MOV_y', 'ATS_y', 'Year_y']

gambling_df.drop(columns_to_drop, axis=1, inplace=True)

# Preview the Dataset
gambling_df.head(5)

In [None]:
# Reorder the columns
gambling_df = gambling_df[['Team', 'Year', 
                           'Win-Loss Record', 'Win %', 
                           'ATS Record', 'Cover %', 
                           'Over Record', 'Over %', 'Under %', 'Total']]

In [None]:
# Investigate the dtypes
gambling_df.dtypes

In [None]:
# Change year to datetime
gambling_df['Year'] = gambling_df['Year'].astype(str)

In [None]:
# Change the numbers to floating points
columns = ['Win %', 'Cover %', 'Over %', 'Under %']

for column in columns:
    gambling_df[column] = gambling_df[column].str.replace('%', '')
    gambling_df[column] = pd.to_numeric(gambling_df[column], errors='coerce')

In [None]:
# Split the record columns into individual columns and convert to integers
gambling_df[['Record Wins', 'Record Losses', 'Record Tie']] = gambling_df['Win-Loss Record'].str.split('-', expand=True)
gambling_df[['ATS Wins', 'ATS Losses', 'ATS Tie']] = gambling_df['ATS Record'].str.split('-', expand=True)
gambling_df[['OU Wins', 'OU Losses', 'OU Tie']] = gambling_df['Over Record'].str.split('-', expand=True)

# Convert all of these columns to integers
columns = ['Record Wins', 'Record Losses', 'Record Tie', 'ATS Wins', 'ATS Losses', 'ATS Tie', 'OU Wins', 'OU Losses', 'OU Tie']

for column in columns:
    gambling_df[column] = gambling_df[column].astype(int)

In [None]:
# Confirm the data cleaning operations were successful
gambling_df.dtypes

In [None]:
# Preview the cleaned dataset
gambling_df.head(5)

In [None]:
# Group the teams together
grouped_team = gambling_df.groupby('Team').agg({
    'Win %': 'mean', 'Cover %': 'mean', 'Over %': 'mean', 'Under %': 'mean', 'Total': 'mean', 
    'Record Wins': 'sum', 'Record Losses': 'sum', 'Record Tie': 'sum',
    'ATS Wins': 'sum', 'ATS Losses': 'sum', 'ATS Tie': 'sum',
    'OU Wins': 'sum', 'OU Losses': 'sum', 'OU Tie': 'sum',
}).round(2)

grouped_team

In [None]:
# User inputs
team = input(f'Enter Team Name (ex: Denver, Green Bay, NY Giants): ')
year_i = int(input(f'Starting Year: '))
year_f = int(input(f'Ending Year: '))   

# Convert the Year column to an integer
gambling_df['Year'] = gambling_df['Year'].astype(int)

# Filter the DataFrame based on the user inputs
if year_i == year_f:
    single_year = True
    result_df = gambling_df.query(f'Team == "{team}" and Year == {year_i}')
    result_df.set_index('Year', inplace = True)
    
else:
    single_year = False
    result_df = gambling_df.query(f'Team == "{team}" and Year >= {year_i} and Year <= {year_f}')
    result_df.set_index('Year', inplace = True)

In [None]:
# Plot the bar chart for Win % vs Cover %
fig = px.bar(result_df, x=result_df.index, y=['Win %', 'Cover %'], 
             title=f'{team} Gambling Metrics', barmode='group')

fig.update_xaxes(title='Year')
fig.update_yaxes(title='Win Percentage')
fig.show()

In [None]:
####### Generate the graphs for Cover % vs Win %

# Calculate the linear regression data series
x_data = result_df['Win %']
y_data = result_df['Cover %']

# Calculate the linear regression coefficients
slope, intercept = np.polyfit(x_data, y_data, 1)

# Calculate the R-squared value
predicted_y = slope * np.array(x_data) + intercept
residuals = np.array(y_data) - predicted_y
ss_residual = np.sum(residuals**2)
ss_total = np.sum((y_data - np.mean(y_data))**2)
r_squared = 1 - (ss_residual / ss_total)

# Create a scatter plot
fig = px.scatter(x=x_data, y=y_data, title=f'{team} Gambling Metrics: Cover % vs Win %')

# Create a trace for the regression line
regression_line = go.Scatter(x=x_data, y=slope * np.array(x_data) + intercept, mode='lines', name='Linear Regression')

# Add the trace to the scatter plot
fig.add_trace(regression_line)

# Annotate the plot with the R-squared value
annotation = go.layout.Annotation(
    text=f'R² = {r_squared:.3f}',
    xref='paper', yref='paper',
    x=0.05, y=0.95,
    showarrow=False,
    font=dict(size=12)
)

# Add the annotation to the layout
fig.update_layout(annotations=[annotation])
fig.update_xaxes(title='Win %')
fig.update_yaxes(title='Cover %')

# Show the plot
fig.show()

In [None]:
####### Generate the graphs for Over % vs Win %

# Calculate the linear regression data series
x_data = result_df['Win %']
y_data = result_df['Over %']

# Calculate the linear regression coefficients
slope, intercept = np.polyfit(x_data, y_data, 1)

# Calculate the R-squared value
predicted_y = slope * np.array(x_data) + intercept
residuals = np.array(y_data) - predicted_y
ss_residual = np.sum(residuals**2)
ss_total = np.sum((y_data - np.mean(y_data))**2)
r_squared = 1 - (ss_residual / ss_total)

# Create a scatter plot
fig = px.scatter(x=x_data, y=y_data, title=f'{team} Gambling Metrics: Over % vs Win %')

# Create a trace for the regression line
regression_line = go.Scatter(x=x_data, y=slope * np.array(x_data) + intercept, mode='lines', name='Linear Regression')

# Add the trace to the scatter plot
fig.add_trace(regression_line)

# Annotate the plot with the R-squared value
annotation = go.layout.Annotation(
    text=f'R² = {r_squared:.3f}',
    xref='paper', yref='paper',
    x=0.05, y=0.95,
    showarrow=False,
    font=dict(size=12)
)

# Add the annotation to the layout
fig.update_layout(annotations=[annotation])
fig.update_xaxes(title='Win %')
fig.update_yaxes(title='Over %')

# Show the plot
fig.show()

In [None]:
####### Generate the graphs for Under % vs Win %

# Calculate the linear regression data series
x_data = result_df['Win %']
y_data = result_df['Under %']

# Calculate the linear regression coefficients
slope, intercept = np.polyfit(x_data, y_data, 1)

# Calculate the R-squared value
predicted_y = slope * np.array(x_data) + intercept
residuals = np.array(y_data) - predicted_y
ss_residual = np.sum(residuals**2)
ss_total = np.sum((y_data - np.mean(y_data))**2)
r_squared = 1 - (ss_residual / ss_total)

# Create a scatter plot
fig = px.scatter(x=x_data, y=y_data, title=f'{team} Gambling Metrics: Under % vs Win %')

# Create a trace for the regression line
regression_line = go.Scatter(x=x_data, y=slope * np.array(x_data) + intercept, mode='lines', name='Linear Regression')

# Add the trace to the scatter plot
fig.add_trace(regression_line)

# Annotate the plot with the R-squared value
annotation = go.layout.Annotation(
    text=f'R² = {r_squared:.3f}',
    xref='paper', yref='paper',
    x=0.05, y=0.95,
    showarrow=False,
    font=dict(size=12)
)

# Add the annotation to the layout
fig.update_layout(annotations=[annotation])
fig.update_xaxes(title='Win %')
fig.update_yaxes(title='Under %')

# Show the plot
fig.show()