# NBA Trends

#### Analyzing relationships between Quant and Categorical

In [None]:
# Load libraries
import numpy as np
import pandas as pd
from scipy.stats import pearsonr, chi2_contingency
import matplotlib.pyplot as plt
import seaborn as sns

nba = pd.read_csv('./nba_games.csv')

# Subset Data to 2010 Season, 2014 Season
nba_2010 = nba[nba.year_id == 2010]
nba_2014 = nba[nba.year_id == 2014]

print(nba_2010.head())
print(nba_2014.head())

# create two series that represent the points for each team
knicks_pts_10 = nba_2010.pts[nba.fran_id == 'Knicks']
nets_pts_10 = nba_2010.pts[nba.fran_id == 'Nets']

# calculate the difference between the avg points of both teams
diff_means_2010 = knicks_pts_10.mean() - nets_pts_10.mean()
print(diff_means_2010)

# create overlapping histograms
plt.hist(knicks_pts_10, color = 'blue', label = 'Knicks', normed = True, alpha = 0.8)
plt.hist(nets_pts_10, color = 'red', label = 'Nets', normed = True, alpha = 0.8)
plt.legend()
plt.title('2010 Season')
plt.show()
plt.clf()

# calculate the mean difference between the two teams
knicks_pts_14 = nba_2014.pts[nba.fran_id == 'Knicks']
nets_pts_14 = nba_2014.pts[nba.fran_id == 'Nets']

diff_means_2014 = knicks_pts_14.mean() - nets_pts_14.mean()
print(diff_means_2014)

#plot the overlapping histograms
plt.hist(knicks_pts_14, color = 'blue', label = 'Knicks', normed = True, alpha = 0.8)
plt.hist(nets_pts_14, color = 'red', label = 'Nets', normed = True, alpha = 0.8)
plt.legend()
plt.title('2014 Season')
plt.show()
plt.clf()

# Generate side-by-side boxplots
sns.boxplot(data = nba_2010, x = 'fran_id', y = 'pts')
plt.show()

#### Analyzing relationships between Categorical variables

In [None]:
# create a contingency table of frequencies
location_result_freq = pd.crosstab(nba_2010.game_result, nba_2010.game_location)
print(location_result_freq)

# convert the table above to a table of proportions
print(location_result_freq / len(nba_2010))

# calculate the expected contingency table
from scipy.stats import chi2_contingency
chi2, pval, dof, expected_result = chi2_contingency(location_result_freq)

print(expected_result)
print(chi2)

#### Analyzing Relationships Between Quantitative Variables

In [None]:
# calculate the covariance between forecast and point_diff
point_diff_forecast_cov = np.cov(nba_2010.forecast, nba_2010.point_diff)
print(point_diff_forecast_cov)

# calculate the correlation between forecast and point_diff
point_diff_forecast_corr = pearsonr(nba_2010.forecast, nba_2010.point_diff)
print(point_diff_forecast_corr)

# create a scatter plot
plt.clf()
plt.scatter('forecast', 'point_diff', data=nba_2010)
plt.xlabel('Forecasted Win Prob.')
plt.ylabel('Point Differential')
plt.show()