In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
import warnings
import plotly.express as px 
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"
from autoviz.AutoViz_Class import AutoViz_Class

In [None]:
espn_100 = pd.read_csv("espn_100_database.csv")
espn_100

In [None]:
espn_100.info

In [None]:
print(espn_100.dtypes)

In [None]:
espn_100.isnull().sum()

# General Information

In [None]:
positions = pd.DataFrame(espn_100.pos.value_counts().reset_index())
positions

In [None]:
positions.columns = ['Position', 'Number of Recruits']
positions

In [None]:
fig = px.histogram(espn_100, x="pos")

fig.show()

In [None]:
espn_100.ht_inches.mean()

The average height for an ESPN 100 recruit is 6'5''. Just under 6'6''

In [None]:
espn_100.ht_inches.min()

The shortest recruit is 5'6''

In [None]:
espn_100.ht_inches.max()

The tallest recruit is 7'5''

In [None]:
fig = px.scatter_3d(espn_100, x="pos", y="ht_inches", z="height")
 
fig.show()

In [None]:
espn_100.weight.mean()

The average weight for an ESPN 100 recruit is 200 lbs

In [None]:
espn_100.weight.min()

The lightest recruit is 150 lbs

In [None]:
espn_100.weight.max()

The heaviest recruit is 300 lbs

In [None]:
fig = px.violin(espn_100, x="pos", y="weight")
 

fig.show()

In [None]:
espn_100.stars.mean()

The average ESPN recurit is rated 4 stars. Note: Star ratings were not given to the classes of 2007, 2008, and 2009.

In [None]:
espn_100.grade.mean()

The average grade of a ESPN 100 recruit is 90

In [None]:
espn_100.grade.min()

The lowest grade of a ESPN 100 recruit is 80

In [None]:
espn_100.grade.max()

The highest grade of a ESPN 100 recruit is 99

In [None]:
espn_100.corr()

In [None]:
sns.heatmap(espn_100.corr())

How many ESPN 100 Recruits were McDonald's All-Americans?

In [None]:
num_mdaa = pd.DataFrame(espn_100.mcdonalds_aa.value_counts().reset_index())
num_mdaa

In [None]:
num_mdaa.columns = ['McDonalds All-American', 'Number of Recruits']
num_mdaa

In [None]:
fig = px.pie(num_mdaa, values="Number of Recruits", names="McDonalds All-American")
fig.show()

How many ESPN 100 Recruits played in the Jordan Brand Classic?

In [None]:
num_jbc = pd.DataFrame(espn_100.jbc.value_counts().reset_index())
num_jbc

In [None]:
num_jbc.columns = ['JBC Participant', 'Number of Recruits']
num_jbc

In [None]:
fig = px.pie(num_jbc, values="Number of Recruits", names="JBC Participant")
fig.show()

# Location of Recruits

Which states produces the most recurits?

In [None]:
states = pd.DataFrame(espn_100.state.value_counts().reset_index())

In [None]:
states.columns = ['State', 'Signees']
states

In [None]:
states.nlargest(5)

In [None]:
top5_states = states[states['State'].isin(['California', 'Texas', 'Georgia','Florida', 'New York'])]

In [None]:
states.nsmallest(5)

California produced the most recurits.

Which cities produce the most recruits?

In [None]:
cities = pd.DataFrame(espn_100.hometown.value_counts().reset_index())

In [None]:
cities.columns = ['City', 'Signees']
cities

In [None]:
cities.nlargest(5)

In [None]:
top5_cities = cities[cities['City'].isin(['Chicago, IL', 'Memphis, TN', 'Los Angeles, CA','Indianapolis, IN', 'Philadelphia, PA'])]

In [None]:
cities.nsmallest(5)

Chicago, IL produced the most recruits.

# High School Information

High School Type

In [None]:
hs_type = pd.DataFrame(espn_100.hs_type.value_counts().reset_index())

In [None]:
hs_type.columns = ['High School Type', 'Number of Schools']
hs_type

In [None]:
px.bar(hs_type,x='High School Type',y='Number of Schools', text='Number of Schools',title='Type of High School ESPN 100 Recruits Attend')

Most ESPN 100 Recuits attended public schools

High School Affiliation

In [None]:
hs_affiliation = pd.DataFrame(espn_100.hs_affiliation_type.value_counts().reset_index())

In [None]:
hs_affiliation.columns = ['High School Affiliation', 'Number of Schools']
hs_affiliation

In [None]:
px.bar(hs_affiliation,x='High School Affiliation',y='Number of Schools', text='Number of Schools',title='High School Affiliations for ESPN 100 Recruits')

Most ESPN 100 Recuits High Schools were public school affiliated.

# College Signings

In [None]:
college_signings = pd.DataFrame(espn_100.school.value_counts().reset_index())
college_signings

In [None]:
college_signings.columns = ['School', 'Signees']
college_signings

In [None]:
college_signings.Signees.nlargest(5)

In [None]:
top5_schools = college_signings[college_signings['School'].isin(['Kentucky', 'Duke', 'N Carolina','UCLA', 'Arizona'])]

In [None]:
college_signings.Signees.nsmallest(5)

In [None]:
plt.bar(college_signings.School,college_signings.Signees)
plt.title('Schools with the most Signed Recruits')
plt.xlabel('School')
plt.ylabel('Number of Recrutis Signed')
plt.show()

In [None]:
fig = px.pie(college_signings, values='Signees', names='School',hole=.5,
                    template="plotly_white")
fig.update_traces(textposition='inside',textinfo='percent+label')
fig.update_layout(height=700,
                  title='Number of Recruits Signed Per School')
fig.show()

Kentucky has signed the most ESPN 100 recruits.

In [None]:
px.bar(top5_schools,x='School',y='Signees', text='Signees',title='Top 5 Schools With The Most ESPN 100 Recruits')

# Let's make Dataframes for Each Position

Point Guard

In [None]:
pg_df = espn_100[espn_100['pos'] =='PG']
pg_df

In [None]:
pg_df.ht_inches.mean()

The average point guard is 6'1''

In [None]:
pg_df.ht_inches.min()

The shortest point guard is 5'6''

In [None]:
pg_df.ht_inches.max()

The tallest point guard is 6'7''

In [None]:
pg_df.weight.mean()

Average Point Guard weighs 175 lbs

In [None]:
pg_df.weight.min()

Lightest Point Guard is 150 lbs.

In [None]:
pg_df.weight.max()

Heaviest Point Guard is 220 lbs

Which states produces the most point guards?

In [None]:
pg_states = pd.DataFrame(pg_df.state.value_counts().reset_index())
pg_states

In [None]:
pg_states.columns = ['State','Signees']
pg_states

In [None]:
pg_states.Signees.nlargest(5)

In [None]:
pg_states.Signees.nsmallest(5)

California produced the most Point Guards

Which cities produce the most point guards?

In [None]:
pg_cities = pd.DataFrame(pg_df.hometown.value_counts().reset_index())
pg_cities

In [None]:
pg_cities.columns = ['City', 'Signees']
pg_cities

In [None]:
pg_cities.Signees.nlargest(5)

In [None]:
pg_cities.Signees.nsmallest(5)

Detroit, MI produced the most Point Guards.

Which schools signed the most point guards?

In [None]:
pg_signings = pd.DataFrame(pg_df.school.value_counts().reset_index())
pg_signings

In [None]:
pg_signings.columns = ['School', 'Signees']
pg_signings

 Kentucky signed the most point guards

Shooting Guard

In [None]:
sg_df = espn_100[espn_100['pos'] =='SG']
sg_df

In [None]:
sg_df.ht_inches.mean()

The average shooting guard is 6'3''

In [None]:
sg_df.ht_inches.min()

The shortest shooting guard is 6 ft even (6'0'').

In [None]:
sg_df.ht_inches.max()

The tallest shooting guard is 6'7''

In [None]:
sg_df.weight.mean()

The average shooting guard weighs 187 lbs.

In [None]:
sg_df.weight.min()

The lightest shooting guard weighs l60 lbs.

In [None]:
sg_df.weight.max()

The heaviest shooting guard weighs 225 lbs.

Which states produces the most shooting guards?

In [None]:
sg_states = pd.DataFrame(sg_df.state.value_counts().reset_index())

In [None]:
sg_states.columns = ['State', 'Signees']
sg_states

In [None]:
sg_states.Signees.nlargest(5)

In [None]:
sg_states.Signees.nsmallest(5)

California produced the most Shooting Guards

Which cities produce the most shooting guards?

In [None]:
sg_cities = pd.DataFrame(sg_df.hometown.value_counts().reset_index())
sg_cities

In [None]:
sg_cities.columns = ['City', 'Signees']
sg_cities

In [None]:
sg_cities.Signees.nlargest(5)

In [None]:
sg_cities.Signees.nsmallest(5)

Chicago, IL produced the most shooting guards.

Which schools signed the most shooting guards?

In [None]:
sg_signings = pd.DataFrame(sg_df.school.value_counts().reset_index())
sg_signings

In [None]:
sg_signings.columns = ['School', 'Signees']
sg_signings

Duke signed the most shooting guards.

Small Forward

In [None]:
sf_df = espn_100[espn_100['pos'] =='SF']
sf_df

In [None]:
sf_df.ht_inches.mean()

The average small forward is 6'6''

In [None]:
sf_df.ht_inches.min()

The shortest small forward is 6'4''

In [None]:
sf_df.ht_inches.max()

The tallest small forward is 6'10''

In [None]:
sf_df.weight.mean()

The average small forward weighs 197 lbs.

In [None]:
sf_df.weight.min()

The lightest small forward weighs 160 lbs.

In [None]:
sf_df.weight.max()

The heaviest small forward weighs 240 lbs.

Which states produces the most small forwards?

In [None]:
sf_states = pd.DataFrame(sf_df.state.value_counts().reset_index())

In [None]:
sf_states.columns = ['State', 'Signees']
sf_states

In [None]:
sf_states.Signees.nlargest(5)

In [None]:
sf_states.Signees.nsmallest(5)

California produced the most Small Forwards

Which cities produce the most small forwards?

In [None]:
sf_cities = pd.DataFrame(sf_df.hometown.value_counts().reset_index())
sf_cities

In [None]:
sf_cities.columns = ['City', 'Signees']
sf_cities

In [None]:
sf_cities.Signees.nlargest(5)

In [None]:
sf_cities.Signees.nsmallest(5)

Chicago, IL produced the most small forwards.

Which schools signed the most small forwards?

In [None]:
sf_signings = pd.DataFrame(sf_df.school.value_counts().reset_index())
sf_signings

In [None]:
sf_signings.columns = ['School', 'Signees']
sf_signings

Duke signed the most small forwards.

Power Forward

In [None]:
pf_df = espn_100[espn_100['pos'] =='PF']
pf_df

In [None]:
pf_df.ht_inches.mean()

The average power forward is 6'8''

In [None]:
pf_df.ht_inches.min()

The smallest power forward is  6'5''

In [None]:
pf_df.ht_inches.max()

The tallest power forward is 6'11''

In [None]:
pf_df.weight.mean()

The average power forward weighs 214 lbs.

In [None]:
pf_df.weight.min()

The lightest power forward weighs 160 lbs.

In [None]:
pf_df.weight.max()

The average heaviest power forward weighs 300 lbs.

Which states produces the most power forwards?

In [None]:
pf_states = pd.DataFrame(pf_df.state.value_counts().reset_index())

In [None]:
pf_states.columns = ['State', 'Signees']
pf_states

In [None]:
pf_states.Signees.nlargest(5)

In [None]:
pf_states.Signees.nsmallest(5)

California produced the most power forwards.

Which cities produce the most power forwards?

In [None]:
pf_cities = pd.DataFrame(pf_df.hometown.value_counts().reset_index())

In [None]:
pf_cities.columns = ['City', 'Signees']
pf_cities

In [None]:
pf_cities.Signees.nlargest(5)

In [None]:
pf_cities.Signees.nsmallest(5)

Chicago, IL produced the most power forwards.

Which schools signed the most power forwards?

In [None]:
pf_signings = pd.DataFrame(pf_df.school.value_counts().reset_index())
pf_signings

In [None]:
pf_signings.columns = ['School', 'Signees']
pf_signings

Kentucky signed the most power forwards.

Center

In [None]:
center_df = espn_100[espn_100['pos'] =='C']
center_df

In [None]:
center_df.ht_inches.mean()

The average center is 6'10''

In [None]:
center_df.ht_inches.min()

The shortest center is 6'7''

In [None]:
center_df.ht_inches.max()

The tallest center is 7'5''

In [None]:
center_df.weight.mean()

The average center weighs 232 lbs.

In [None]:
center_df.weight.min()

The lightest center weighs 190 lbs.

In [None]:
center_df.weight.max()

The heaviest center weighs 330 lbs.

Which states produces the most centers?

In [None]:
center_states = pd.DataFrame(center_df.state.value_counts().reset_index())

In [None]:
center_states.columns = ['State', 'Signees']
center_states

In [None]:
center_states.Signees.nlargest(5)

In [None]:
center_states.Signees.nsmallest(5)

Texas produced the most centers

Which cities produce the most centers?

In [None]:
center_cities = pd.DataFrame(center_df.hometown.value_counts().reset_index())

In [None]:
center_cities.columns = ['City', 'Signees']
center_cities

In [None]:
center_cities.Signees.nlargest(5)

In [None]:
center_cities.Signees.nsmallest(5)

Philadelphia, PA produced the most centers.

Which schools signed the most centers?

In [None]:
center_signings = pd.DataFrame(center_df.school.value_counts().reset_index())
center_signings

In [None]:
center_signings.columns = ['School', 'Signees']
center_signings

Kentucky signed the most centers.

# McDonald's All-American & Jordan Brand Classic Information

McDonald's All-American Info

In [None]:
mdaa =  espn_100[espn_100['mcdonalds_aa'] =='YES']
mdaa

In [None]:
mdaa = pd.DataFrame(mdaa)
mdaa

In [None]:
mdaa.player.sum()

In [None]:
mdaa.rank.mean()

The average rank for a McDonald's All-American is

In [None]:
mdaa.rank.min()

The lowest rank for a McDonald's All-American is

In [None]:
mdaa.rank.max()

In [None]:
mdaa.grade.mean()

The average grade for a McDonald's All-American is 94.45

In [None]:
mdaa.grade.min()

The lowest grade for a McDonald's All-American is 86

In [None]:
mdaa.grade.max()

The average grade highest for a McDonald's All-American is 99

In [None]:
mdaa.corr()

In [None]:
mdaa_signings = pd.DataFrame(mdaa.school.value_counts().reset_index())
mdaa_signings

In [None]:
mdaa_signings.columns = ['School', 'Signees']
mdaa_signings

Duke Signed the most McDonald's All-Americans

Jordan Brand Classic Info

In [None]:
jbc = espn_100[espn_100['jbc'] =='YES']
jbc

In [None]:
jbc = pd.DataFrame(jbc)
jbc

In [None]:
jbc.player.sum()

In [None]:
jbc.rank.mean()

The average rank for a JBC Participant is

In [None]:
jbc.rank.min()

The lowest rank for a JBC Participant is

In [None]:
jbc.rank.max()

The highest rank for a JBC Participant is

In [None]:
jbc.grade.mean()

The average grade for  a JBC Participant is 93

In [None]:
jbc.grade.min()

The lowest grade for a JBC Participant is 84

In [None]:
jbc.grade.max()

The highest grade for the top recruit is 99

In [None]:
jbc.corr()

In [None]:
jbc_signings = pd.DataFrame(jbc.school.value_counts().reset_index())
jbc_signings

In [None]:
jbc_signings.columns = ['School', 'Signees']
jbc_signings

Kentucky had the most Jordan Brand Classic participants.

# Top Recruit Information

In [None]:
top_recruit = espn_100[espn_100['rank'] =='1']
top_recruit

In [None]:
top_recruit = pd.DataFrame(top_recruit)
top_recruit

In [None]:
top_recruit.rank.mean()

The average rank for the top recruit is

In [None]:
top_recruit.rank.min()

The lowest rank for the top recruit is

In [None]:
top_recruit.rank.max()

The highest rank for the top recruit is

In [None]:
top_recruit.grade.mean()

The average grade for the top recruit is

In [None]:
top_recruit.grade.min()

The lowest grade for the top recruit is

In [None]:
top_recruit.grade.max()

The highest grade for the top recruit is

In [None]:
top_recruit.corr()

In [None]:
top_recruit_signings = pd.DataFrame(top_recruit.school.value_counts().reset_index())
top_recruit

In [None]:
top_recruit.columns = ['School', 'Signees']
top_recruit

signed the most #1 ranked recruits

# Autovisualizations

In [None]:
AV = AutoViz_Class()

filename = 'espn_100_database.csv'
AV.AutoViz(filename, depVar='pos', sep=",", dfte=None, chart_format='svg', max_rows_analyzed=160000, max_cols_analyzed=30)

In [None]:
AV.AutoViz(filename, depVar='ht_inches', sep=",", dfte=None, chart_format='svg', max_rows_analyzed=160000, max_cols_analyzed=30)

In [None]:
AV.AutoViz(filename, depVar='weight', sep=",", dfte=None, chart_format='svg', max_rows_analyzed=160000, max_cols_analyzed=30)

In [None]:
AV.AutoViz(filename, depVar='hometown', sep=",", dfte=None, chart_format='svg', max_rows_analyzed=160000, max_cols_analyzed=30)

In [None]:
AV.AutoViz(filename, depVar='state', sep=",", dfte=None, chart_format='svg', max_rows_analyzed=160000, max_cols_analyzed=30)

In [None]:
AV.AutoViz(filename, depVar='high_school', sep=",", dfte=None, chart_format='svg', max_rows_analyzed=160000, max_cols_analyzed=30)

In [None]:
AV.AutoViz(filename, depVar='hs_type', sep=",", dfte=None, chart_format='svg', max_rows_analyzed=160000, max_cols_analyzed=30)

In [None]:
AV.AutoViz(filename, depVar='hs_affiliation_type', sep=",", dfte=None, chart_format='svg', max_rows_analyzed=160000, max_cols_analyzed=30)

In [None]:
AV.AutoViz(filename, depVar='school', sep=",", dfte=None, chart_format='svg', max_rows_analyzed=160000, max_cols_analyzed=30)

In [None]:
AV.AutoViz(filename, depVar='stars', sep=",", dfte=None, chart_format='svg', max_rows_analyzed=160000, max_cols_analyzed=30)

In [None]:
AV.AutoViz(filename, depVar='grade', sep=",", dfte=None, chart_format='svg', max_rows_analyzed=160000, max_cols_analyzed=30)

In [None]:
AV.AutoViz(filename, depVar='class', sep=",", dfte=None, chart_format='svg', max_rows_analyzed=160000, max_cols_analyzed=30)

In [None]:
AV.AutoViz(filename, depVar='rank', sep=",", dfte=None, chart_format='svg', max_rows_analyzed=160000, max_cols_analyzed=30)

In [None]:
AV.AutoViz(filename, depVar='player', sep=",", dfte=None, chart_format='svg', max_rows_analyzed=160000, max_cols_analyzed=30)

In [None]:
AV.AutoViz(filename, depVar='first_name', sep=",", dfte=None, chart_format='svg', max_rows_analyzed=160000, max_cols_analyzed=30)

In [None]:
AV.AutoViz(filename, depVar='last_name', sep=",", dfte=None, chart_format='svg', max_rows_analyzed=160000, max_cols_analyzed=30)

# Import Dataframes into an Excel Spreadsheet

In [None]:
xlwriter = pd.ExcelWriter('espn100bb_analysis.xlsx')
positions..to_excel(xlwriter, sheet_name='Positions')
college_signings.to_excel(xlwriter, sheet_name='College Signings')
num_mdaa.to_excel(xlwriter, sheet_name='MDAA General Info')
num_jbc.to_excel(xlwriter, sheet_name='JBC General Info')
states.to_excel(xlwriter, sheet_name='States')
cities.to_excel(xlwriter, sheet_name='Cities')
hs_type.to_excel(xlwriter, sheet_name='High School Type')
hs_affiliation.to_excel(xlwriter, sheet_name='High School Affiliation')
pg_signings.to_excel(xlwriter, sheet_name='Point Guard Signings')
pg_states.to_excel(xlwriter, sheet_name='Point Guard States')
pg_cities.to_excel(xlwriter, sheet_name='Point Guard Cities')
sg_signings.to_excel(xlwriter, sheet_name='Shooting Guard Signings')
sg_states.to_excel(xlwriter, sheet_name='Shooting Guard States')
sg_cities.to_excel(xlwriter, sheet_name='Shooting Guard Cities')
sf_signings.to_excel(xlwriter, sheet_name='Small Forward Signings')
sf_states.to_excel(xlwriter, sheet_name='Small Forward States')
sf_cities.to_excel(xlwriter, sheet_name='Small Forward Cities')
pf_signings.to_excel(xlwriter, sheet_name='Power Forward Signings')
pf_states.to_excel(xlwriter, sheet_name='Power Forward States')
pf_cities.to_excel(xlwriter, sheet_name='Power Forward Cities')
Center_signings.to_excel(xlwriter, sheet_name='Center Signings')
center_states.to_excel(xlwriter, sheet_name='Center States')
center_cities.to_excel(xlwriter, sheet_name='Center Cities')
mdaa.to_excel(xlwriter, sheet_name='McDonald All Americans')
jbc.to_excel(xlwriter, sheet_name='JBC Participants')
mdaa_signings.to_excel(xlwriter, sheet_name='MDAA College Signings')
jbc_signings.to_excel(xlwriter, sheet_name='JBC College Signings')
top_recruit.to_excel(xlwriter, sheet_name='Top Recruit')
xlwriter.close()