# NFL PLayer Data Analysis
### By: Justin Pearson

In [1]:
# Dependencies
import nfl_data_py as nfl
import stat_collection as stats

# Basics / visualizations
import pandas as pd
from matplotlib import pyplot as plt
import numpy as np
import seaborn as sns

# ML
from pycaret.regression import *


# Display maximum columns
pd.set_option('display.max_columns', None)
#stats.to_csv('{}-{}playerstats.csv'.format(start_year, final_year)) # csv output format {year}playerstats.csv

ModuleNotFoundError: No module named 'pycaret'

# Set Year values here
### - Year is the most recent year you want in the dataset
### - years back is the amount of years you want to go back from the given year

In [None]:
# Set year most recent year of desired dataset and how many years to go back
year = 2023
years_back = 20

In [None]:
# get dataframes
pass_df, def_df = stats.get_stats(year, years_back)

In [None]:
# Merge the defense and passer dataframes together
df = pass_df.merge(def_df, how='inner', on=['defteam', 'season', 'week', 'roof', 'surface'], suffixes=('_passer', '_defense'))

# clean outliers from the data
df = df[df['pass_attempts_ewma_passer'] > 5]


# Passing Data Overview

In [None]:
# Get an overview of the data
df.describe().round(2)

# Passing Data Heat Map

In [None]:
# Create a correlation for the dataset.
dataset_corr = df.select_dtypes(include=['number']).corr()

# Drop Agent_ID from correlation dataset.
dataset_corr = dataset_corr.drop(['passing_yards'], axis=1)
dataset_corr = dataset_corr.drop(['passing_yards'], axis=0)

# Create a correlation matrix. Only bottom left corner valued.
mask = np.zeros_like(dataset_corr.round(4))
mask[np.triu_indices_from(mask)] = True

# Generate the corrleation matrix (heatmap) using Seaborn.
with sns.axes_style("whitegrid"):
    f, ax = plt.subplots(figsize=(12, 10))
    ax = sns.heatmap(dataset_corr.round(2), mask=mask, vmax=1, center=0, vmin=-1, square=True,
                     cmap='PuOr', linewidths=.5, annot=True, annot_kws={"size": 12}, fmt='.1f')
    plt.title('Heatmap (Correlations) of Features in the Dataset', fontsize=15)
    plt.xlabel('Features', fontsize=15)
    plt.ylabel('Features', fontsize=15)
plt.show()

# Modeling With PyCaret

In [None]:
# Initialize PyCaret with default settings to start
setup = setup(data = df, target = 'passing_yards', session_id=123)