In [None]:
import numpy as np
import pandas as pd
from scipy.stats import pearsonr, chi2_contingency
import matplotlib.pyplot as plt
import seaborn as sns

import codecademylib3
np.set_printoptions(suppress=True, precision = 2)

nba = pd.read_csv('./nba_games.csv')

# Subset Data to 2010 Season, 2014 Season
nba_2010 = nba[nba.year_id == 2010]
nba_2014 = nba[nba.year_id == 2014]

print(nba_2010.head())
print(nba_2014.head())

# Exploring Games:
knicks_pts = nba_2010.pts[nba.fran_id=="knicks"]
nets_pts = nba_2010.pts[nba.fran_id=="Nets"]

# Exploring differences:
diff_means_2010 = knicks_pts.mean() - nets_pts.mean()

# Creating an Overlapping Histogram:
plt.hist(knicks_pts, alpha = 0.8, normed = True, label = "knicks")
plt.hist(nets_pts, alpha = 0.8, normed = True, label = "nets")
plt.legend()
plt.show() 

# Checking the relationshipe b/n nba_2010 & nba 2014:
diff_means_2014 = nba_2014.mean() - nba_2010.mean()
print(diff_means_2014)


# Boxplots:
plt.clf()
sns.boxplot(data = nba_2010, x = 'fran_id', y = 'pts')
plt.show()

# Creating crosstabs:
location_result_freq = pd.crosstab(nba_2010.game_result, nba_2010.game_location)
print(location_result_freq)
location_result_proportions = location_result_freq/len(nba_2010)
print(location_result_proportions)

# Calculating chi-square:
chi2, pval, dof, expected = chi2_contingency(location_result_freq)
print(expected)
print(chi2)

# Calculating Covariances and Correlation:
np.cov(nba_2010.forecast, nba_2010.point_diff)
correlation, p = pearsonr(nba_2010.forecast, nba_2010.point_diff)
print(correlation)

# Creating a side by side Boxplot:
plt.clf()
pts = nba_2010["pts"]
fran_id = nba_2010["fran_id"]
sns.boxplot( x = fran_id, y = pts)
plt.show()

# Generating a Scatter plot:
plt.clf()
plt.scatter( x = nba_2010.forecast, y = nba_2010.point_diff)
plt.show()