In [None]:
import numpy as np
import pandas as pd
from scipy.stats import pearsonr, chi2_contingency
import matplotlib.pyplot as plt
import seaborn as sns

import codecademylib3
np.set_printoptions(suppress=True, precision = 2)

nba = pd.read_csv('./nba_games.csv')

# Subset Data to 2010 Season, 2014 Season
nba_2010 = nba[nba.year_id == 2010]
nba_2014 = nba[nba.year_id == 2014]

print(nba_2010.head())
print(nba_2014.head())

# Analyzing relationships between Quant and Categorical:
knicks_pts = nba_2010.pts[nba.fran_id == "Knicks"]
nets_pts = nba_2010.pts[nba.fran_id == "Nets"]

# Checking for association between knicks_pts and nets_pts in 2010:
avg_knicks_pts = np.mean(knicks_pts)
avg_nets_pts = np.mean(nets_pts)
diff_means_2010 = avg_knicks_pts - avg_nets_pts
print(diff_means_2010)

# Checking for association with Histogram:
plt.hist(knicks_pts, label="Knicks", normed=True, alpha=0.6)
plt.hist(nets_pts, label="Nets", normed=True, alpha=0.6)
plt.legend()
plt.show()

# Checking for association between knicks_pts and nets_pts in 2014:
knicks_pts = nba_2014.pts[nba.fran_id == "Knicks"]
nets_pts = nba_2014.pts[nba.fran_id == "Nets"]

avg_knicks_pts = np.mean(knicks_pts)
avg_nets_pts = np.mean(nets_pts)
diff_means_2014 = avg_knicks_pts - avg_nets_pts
print(diff_means_2014)

# Checking for association with Histogram:
plt.clf()
plt.hist(knicks_pts, label="Knicks", normed=True, alpha=0.6)
plt.hist(nets_pts, label="Nets", normed=True, alpha=0.6)
plt.legend()
plt.show()

# Using boxplot to check associations between franchise and points scored per game:
plt.clf()
sns.boxplot(data=nba_2010, x="fran_id", y="pts")
plt.show()

#Analyzing relationships between Categorical variables:
# Calculating contingency table of frequencies to check for association:
location_result_freq = pd.crosstab(nba_2010.game_result, nba_2010.game_location)
print(location_result_freq)

# Converting frequency table to table of proportions:
location_result_proportions = location_result_freq/len(nba_2010)
print(location_result_proportions)

# Checking for association with expected contingency table:
chi2, pval, dof, expected = chi2_contingency(location_result_freq)
print(expected)
print(chi2)

# Analyzing relationships between quantitative variables:
# Checking for association between forcast and point_diff using covariance:
cov_forecast_diff = np.cov(nba_2010.forecast, nba_2010.point_diff)
print(cov_forecast_diff)

# Using correlation to check for correlation:
corr_forecast_diff, p = pearsonr(nba_2010.forecast, nba_2010.point_diff)
print(corr_forecast_diff)

#Generating a scatter plot:
plt.clf()
plt.scatter(x=nba_2010.forecast, y=nba_2010.point_diff)
plt.xlabel("Forecast")
plt.ylabel("Point Difference")
plt.show()