In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
from nssstats.plots import std_plot
from nssstats.plots import iqr_plot
from nssstats.plots import quadrant_plot, half_plot
from ipywidgets import interact, FloatSlider

In [None]:
espn_100 = pd.read_csv("espn_100_database.csv")
espn_100.head()

Let's see the number of players by position

In [None]:
espn_100.pos.value_counts().plot(kind = 'bar')
plt.xticks(rotation = 0)
plt.title('Number of Players By Position');

Let's see the number of players by Type of High School

In [None]:
espn_100.hs_type.value_counts().plot(kind = 'bar')
plt.xticks(rotation = 0)
plt.title('Number of Players By Type of High School');

# Player Weight Statisitcs

In [None]:
espn_100.weight.mean()

In [None]:
espn_100.weight.median()

In [None]:
fig,ax = plt.subplots(figsize = (10,6))               

plt.hist(
    data = espn_100,
    x = 'weight',
    edgecolor = 'black',
    linewidth = 2
);                              
plt.xlabel('weight (lbs.)')                            
plt.ylabel('Players')
plt.title('Histogram of EPSN 100 Recruit Weight');

Let's look at range in weight

In [None]:
espn_100.weight.max()

In [None]:
espn_100.nlargest(1,'weight')

In [None]:
espn_100.weight.min()

In [None]:
espn_100.nsmallest(1,'weight')

In [None]:
espn_100.weight.max()- espn_100.weight.min()

Variance and Standard Devivation

In [None]:
espn_100['weight_deviation'] = espn_100.weight - espn_100.weight.mean()
espn_100.head()

In [None]:
espn_100.weight.std()

In [None]:
espn_100['weight_deviation'].mean()

In [None]:
espn_100['squared_weight_deviation'] = espn_100['weight_deviation']**2
espn_100

Population Standard Deviation

In [None]:
np.sqrt(espn_100['squared_weight_deviation'].mean())

In [None]:
espn_100.weight.var(ddof = 0)

In [None]:
espn_100.weight.std(ddof = 0)

In [None]:
plt.figure(figsize = (10,6))

std_plot(espn_100.weight, edgecolor = 'black', linewidth = 2)

z-scores

In [None]:
espn_100['weight_z-score'] = (espn_100.weight - espn_100.weight.mean()) / espn_100.weight.std(ddof = 0)

In [None]:
espn_100['weight_z-score'].std()

Let's look at height z-scores for Kevin Love

In [None]:
espn_100.loc[(espn_100.player == 'Kevin Love')]

Quartiles and Quantiles/Percentiles

In [None]:
espn_100.weight.quantile(q = 0.25)

In [None]:
espn_100.weight.quantile(q = 0.5)

In [None]:
espn_100.weight.quantile(q = 0.75)

In [None]:
espn_100.weight.describe()

Interquartile Range

In [None]:
espn_100.weight.quantile(q = 0.75) - espn_100.weight.quantile(q = 0.25)

In [None]:
plt.figure(figsize = (10,6))

iqr_plot(espn_100.weight, bins = 25, edgecolor = 'black', linewidth = 2)

Observing Outliers in the Dataset

In [None]:
plt.figure(figsize = (10,6))
sns.boxplot(x = espn_100.weight);

In [None]:
plt.figure(figsize = (10,8))

sns.boxplot(data = espn_100.sort_values('school'), x = "weight", y = "school");

In [None]:
plt.figure(figsize = (10,8))

sns.boxplot(data = espn_100.sort_values('high_school'), x = "weight", y = "high_school");

In [None]:
plt.figure(figsize = (10,8))

sns.boxplot(data = espn_100.sort_values('hs_type'), x = "weight", y = "hs_type");

In [None]:
plt.figure(figsize = (10,8))

sns.boxplot(data = espn_100.sort_values('hs_affiliation_type'), x = "weight", y = "hs_affiliation_type");

In [None]:
plt.figure(figsize = (10,8))

sns.boxplot(data = espn_100.sort_values('hometown'), x = "weight", y = "hometown");

In [None]:
plt.figure(figsize = (10,8))

sns.boxplot(data = espn_100.sort_values('state'), x = "weight", y = "state");

In [None]:
plt.figure(figsize = (10,8))

sns.boxplot(data = espn_100.sort_values('pos'), x = "weight", y = "pos");

# Player Height Statisitcs

In [None]:
espn_100.ht_inches.mean()

In [None]:
espn_100.ht_inches.median()

In [None]:
fig,ax = plt.subplots(figsize = (10,6))               

plt.hist(
    data = espn_100,
    x = 'ht_inches',
    edgecolor = 'black',
    linewidth = 2
);                              
plt.xlabel('weight (lbs.)')                            
plt.ylabel('Players')
plt.title('Histogram of EPSN 100 Recruit Height');

Let's look at the range in height

In [None]:
espn_100.ht_inches.max()

In [None]:
espn_100.nlargest(1,'ht_inches')

In [None]:
espn_100.ht_inches.min()

In [None]:
espn_100.nsmallest(1,'ht_inches')

In [None]:
espn_100.ht_inches.max()- espn_100.ht_inches.min()

Variance and Standard Deviation

In [None]:
espn_100['height_deviation'] = espn_100.ht_inches - espn_100.ht_inches.mean()
espn_100.head()

In [None]:
espn_100.ht_inches.std()

In [None]:
espn_100['height_deviation'].mean()

In [None]:
espn_100['squared_height_deviation'] = espn_100['height_deviation']**2
espn_100

Population Standard Deviation

In [None]:
np.sqrt(espn_100['squared_height_deviation'].mean())

In [None]:
espn_100.ht_inches.var(ddof = 0)

In [None]:
espn_100.ht_inches.std(ddof = 0)

In [None]:
plt.figure(figsize = (10,6))

std_plot(espn_100.ht_inches, edgecolor = 'black', linewidth = 2)

z-scores

In [None]:
espn_100['height_z-score'] = (espn_100.ht_inches - espn_100.ht_inches.mean()) / espn_100.ht_inches.std(ddof = 0)

In [None]:
espn_100['height_z-score'].std()

Let's look at height z-scores for Kevin Love

In [None]:
espn_100.loc[(espn_100.player == 'Kevin Love')]

Quartiles and Quantiles/Percentiles

In [None]:
espn_100.ht_inches.quantile(q = 0.25)

In [None]:
espn_100.ht_inches.quantile(q = 0.5)

In [None]:
espn_100.ht_inches.quantile(q = 0.75)

In [None]:
espn_100.ht_inches.describe()

Interquartile Range

In [None]:
espn_100.ht_inches.quantile(q = 0.75) - espn_100.ht_inches.quantile(q = 0.25)

In [None]:
plt.figure(figsize = (10,6))

iqr_plot(espn_100.ht_inches, bins = 25, edgecolor = 'black', linewidth = 2)

Observing Outliers in the Dataset

In [None]:
plt.figure(figsize = (10,6))
sns.boxplot(x = espn_100.ht_inches);

In [None]:
plt.figure(figsize = (10,8))

sns.boxplot(data = espn_100.sort_values('school'), x = "ht_inches", y = "school");

In [None]:
plt.figure(figsize = (10,8))

sns.boxplot(data = espn_100.sort_values('high_school'), x = "ht_inches", y = "high_school");

In [None]:
plt.figure(figsize = (10,8))

sns.boxplot(data = espn_100.sort_values('hs_type'), x = "ht_inches", y = "hs_type");

In [None]:
plt.figure(figsize = (10,8))

sns.boxplot(data = espn_100.sort_values('hs_affiliation_type'), x = "ht_inches", y = "hs_affiliation_type");

In [None]:
plt.figure(figsize = (10,8))

sns.boxplot(data = espn_100.sort_values('hometown'), x = "ht_inches", y = "hometown");

In [None]:
plt.figure(figsize = (10,8))

sns.boxplot(data = espn_100.sort_values('state'), x = "ht_inches", y = "state");

In [None]:
plt.figure(figsize = (10,8))

sns.boxplot(data = espn_100.sort_values('pos'), x = "ht_inches", y = "pos");

# Player Grade Statistics

In [None]:
espn_100.grade.mean()

In [None]:
espn_100.grade.median()

In [None]:
fig,ax = plt.subplots(figsize = (10,6))               

plt.hist(
    data = espn_100,
    x = 'grade',
    edgecolor = 'black',
    linewidth = 2
);                              
plt.xlabel('Grade')                            
plt.ylabel('Players')
plt.title('Histogram of EPSN 100 Recruit Grades');

Let's look at the range in grade

In [None]:
espn_100.grade.max()

In [None]:
espn_100.nlargest(1,'grade')

In [None]:
espn_100.grade.min()

In [None]:
espn_100.nsmallest(1,'grade')

In [None]:
espn_100.grade.max()- espn_100.grade.min()

Variance and Standard Deviation

In [None]:
espn_100['grade_deviation'] = espn_100.grade - espn_100.grade.mean()
espn_100.head()

In [None]:
espn_100.grade.std()

In [None]:
espn_100['grade_deviation'].mean()

In [None]:
espn_100['squared_grade_deviation'] = espn_100['grade_deviation']**2
espn_100

Population Standard Deviation

In [None]:
np.sqrt(espn_100['squared_grade_deviation'].mean())

In [None]:
espn_100.grade.var(ddof = 0)

In [None]:
espn_100.grade.std(ddof = 0)

In [None]:
plt.figure(figsize = (10,6))

std_plot(espn_100.grade, edgecolor = 'black', linewidth = 2)

In [None]:
espn_100['grade_z-score'] = (espn_100.grade - espn_100.grade.mean()) / espn_100.grade.std(ddof = 0)

In [None]:
espn_100['grade_z-score'].std()

Let's look at the grade z-scores for Kevin Love

In [None]:
espn_100.loc[(espn_100.player == 'Kevin Love')]

Quartiles and Quantiles/Percentiles

In [None]:
espn_100.grade.quantile(q = 0.25)

In [None]:
espn_100.grade.quantile(q = 0.5)

In [None]:
espn_100.grade.quantile(q = 0.75)

In [None]:
espn_100.grade.describe()

Interquartile Range

In [None]:
espn_100.grade.quantile(q = 0.75) - espn_100.grade.quantile(q = 0.25)

In [None]:
plt.figure(figsize = (10,6))

iqr_plot(espn_100.grade, bins = 25, edgecolor = 'black', linewidth = 2)

Observing Outliers in the Dataset

In [None]:
plt.figure(figsize = (10,6))
sns.boxplot(x = espn_100.grade);

In [None]:
plt.figure(figsize = (10,8))

sns.boxplot(data = espn_100.sort_values('school'), x = "grade", y = "school");

In [None]:
plt.figure(figsize = (10,8))

sns.boxplot(data = espn_100.sort_values('high_school'), x = "grade", y = "high_school");

In [None]:
plt.figure(figsize = (10,8))

sns.boxplot(data = espn_100.sort_values('hs_type'), x = "grade", y = "hs_type");

In [None]:
plt.figure(figsize = (10,8))

sns.boxplot(data = espn_100.sort_values('hs_affiliation_type'), x = "grade", y = "hs_affiliation_type");

In [None]:
plt.figure(figsize = (10,8))

sns.boxplot(data = espn_100.sort_values('hometown'), x = "grade", y = "hometown");

In [None]:
plt.figure(figsize = (10,8))

sns.boxplot(data = espn_100.sort_values('state'), x = "grade", y = "state");

In [None]:
plt.figure(figsize = (10,8))

sns.boxplot(data = espn_100.sort_values('pos'), x = "grade", y = "pos");

# Height Vs Weight

In [None]:
espn_100.plot(kind = 'scatter', x = 'ht_inches', y = 'weight', figsize = (12,8))
plt.title('Height vs. Weight');

In [None]:
espn_100[['ht_inches', 'weight']].cov()

In [None]:
espn_100['ht_inches'].var()

In [None]:
espn_100['weight'].var()

In [None]:
quadrant_plot(espn_100.ht_inches, espn_100.weight, labels = ['Height', 'Weight'], figsize = (12,8))

In [None]:
espn_100[['ht_inches', 'weight']].corr()

# Other Statistics

In [None]:
espn_100.groupby('pos').corr(numeric_only=True)

In [None]:
espn_100.groupby('hs_type').corr(numeric_only=True)

In [None]:
espn_100.groupby('class').corr(numeric_only=True)

In [None]:
espn_100.groupby('hometown').corr(numeric_only=True)

In [None]:
espn_100.groupby('state').corr(numeric_only=True)

In [None]:
espn_100.groupby(['pos','school'])['grade'].describe()

In [None]:
pd.crosstab(espn_100['grade'], 
            espn_100['pos'], 
            normalize='index').plot(kind = 'bar', 
                                    edgecolor = 'black', 
                                    width = 0.75,
                                    stacked = True)
plt.ylabel('Proportion');

In [None]:
pd.crosstab(espn_100['stars'], 
            espn_100['pos'], 
            normalize='index').plot(kind = 'bar', 
                                    edgecolor = 'black', 
                                    width = 0.75,
                                    stacked = True)
plt.ylabel('Proportion');

In [None]:
pd.crosstab(espn_100['pos'], 
            espn_100['mcdonalds_aa'], 
            normalize='index').plot(kind = 'bar', 
                                    edgecolor = 'black', 
                                    width = 0.75,
                                    stacked = True)
plt.ylabel('Proportion');

In [None]:
pd.crosstab(espn_100['hs_type'], 
            espn_100['mcdonalds_aa'], 
            normalize='index').plot(kind = 'bar', 
                                    edgecolor = 'black', 
                                    width = 0.75,
                                    stacked = True)
plt.ylabel('Proportion');

In [None]:
pd.crosstab(espn_100['conference'], 
            espn_100['mcdonalds_aa'], 
            normalize='index').plot(kind = 'bar', 
                                    edgecolor = 'black', 
                                    width = 0.75,
                                    stacked = True)
plt.ylabel('Proportion');

In [None]:
pd.crosstab(espn_100['conference_type'], 
            espn_100['mcdonalds_aa'], 
            normalize='index').plot(kind = 'bar', 
                                    edgecolor = 'black', 
                                    width = 0.75,
                                    stacked = True)
plt.ylabel('Proportion');

In [None]:
pd.crosstab(espn_100['pos'], 
            espn_100['jbc'], 
            normalize='index').plot(kind = 'bar', 
                                    edgecolor = 'black', 
                                    width = 0.75,
                                    stacked = True)
plt.ylabel('Proportion');

In [None]:
pd.crosstab(espn_100['hs_type'], 
            espn_100['jbc'], 
            normalize='index').plot(kind = 'bar', 
                                    edgecolor = 'black', 
                                    width = 0.75,
                                    stacked = True)
plt.ylabel('Proportion');

In [None]:
pd.crosstab(espn_100['conference'], 
            espn_100['jbc'], 
            normalize='index').plot(kind = 'bar', 
                                    edgecolor = 'black', 
                                    width = 0.75,
                                    stacked = True)
plt.ylabel('Proportion');

In [None]:
pd.crosstab(espn_100['conference_type'], 
            espn_100['jbc'], 
            normalize='index').plot(kind = 'bar', 
                                    edgecolor = 'black', 
                                    width = 0.75,
                                    stacked = True)
plt.ylabel('Proportion');