In [1]:
import plotly.plotly as py
import pandas as pd
import numpy as np

school_data = pd.read_csv('clean_data.csv')

#Avg Paid Tuition per Full time student by state
avg_tuition_state=school_data.groupby(['STABBR'])['Paid Tuition per Full time student'].mean()
df = pd.DataFrame({'state':avg_tuition_state.index, 'tuition':avg_tuition_state.values})

scl = [[0.0, 'rgb(26,152,80)'],[0.2, 'rgb(145,207,96)'],[0.4, 'rgb(217,239,139)'],\
           [0.6, 'rgb(254,224,139)'],[0.8, 'rgb(252,141,89)'],[1.0, 'rgb(215,48,39)']]

data = [dict(
        type='choropleth', 
        colorscale = scl,
        autocolorscale = False,
        locations = df['state'],
        z = df['tuition'].astype(float),
        locationmode = 'USA-states',
        marker = dict(
            line = dict(
                color = 'rgb(255,255,255)',
                width = 2
            )),
         colorbar = dict(
             title = "Tuition")
         )]

layout = dict(
          title = 'Average Tuition by State',
          geo = dict(
              scope='usa',
              projection=dict(type='albers usa'),
              showlakes = True,
              lakecolor = 'rgb(255,255,255)'),
             )
fig = dict(data=data, layout=layout)
py.iplot(fig)

In [2]:
#Avg DTI (Debt to Income) % by state
grad_avg_debt = school_data.groupby(['STABBR'])['Graduate Median Debt'].mean()
avg_early_pay = school_data.groupby(['STABBR'])['Early Career'].mean()
DTI = grad_avg_debt/avg_early_pay*100
df2 = pd.DataFrame({'state':grad_avg_debt.index, 'Avg Median Debt by State':grad_avg_debt.values, 'Avg Early Pay by State':avg_early_pay.values, 'DTI':DTI.values})

data2 = [dict(
        type='choropleth', 
        colorscale = scl,
        autocolorscale = False,
        locations = df2['state'],
        z = df2['DTI'].astype(float),
        locationmode = 'USA-states',
        marker = dict(
            line = dict(
                color = 'rgb(255,255,255)',
                width = 2
            )),
         colorbar = dict(
             title = "DTI %")
         )]

layout2 = dict(
          title = 'DTI % Average by State',
          geo = dict(
              scope='usa',
              projection=dict(type='albers usa'),
              showlakes = True,
              lakecolor = 'rgb(255,255,255)'),
             )
fig2 = dict(data=data2, layout=layout2)
py.iplot(fig2)

In [3]:
#Avg early career pay by state
early_career_state=school_data.groupby(['STABBR'])['Early Career'].mean()
df3 = pd.DataFrame({'state':early_career_state.index, 'Avg Early Career Pay':early_career_state.values})
data3 = [dict(
        type='choropleth', 
        colorscale = scl,
        autocolorscale = False,
        locations = df2['state'],
        z = df3['Avg Early Career Pay'].astype(float),
        locationmode = 'USA-states',
        marker = dict(
            line = dict(
                color = 'rgb(255,255,255)',
                width = 2
            )),
         colorbar = dict(
             title = "Early Career Pay ($)")
         )]

layout3 = dict(
          title = 'Avereage Early Career Pay by State',
          geo = dict(
              scope='usa',
              projection=dict(type='albers usa'),
              showlakes = True,
              lakecolor = 'rgb(255,255,255)'),
             )
fig3 = dict(data=data3, layout=layout3)
py.iplot(fig3)

In [4]:
#locations of where higher earning degrees are
healthcare_state=school_data.groupby(['STABBR'])['Health Professions'].sum()
engineering_state=school_data.groupby(['STABBR'])['Engineering'].sum()
engineeringtech_state=school_data.groupby(['STABBR'])['Engineering Technologies and Engineering-Related Fields'].sum()
compsci_state=school_data.groupby(['STABBR'])['Computer and Information Sciences and Support Services'].sum()
total_high_degrees = healthcare_state + engineering_state + engineeringtech_state + compsci_state
df5 = pd.DataFrame({'state':healthcare_state.index, '% avg healthcare degrees':healthcare_state.values, '% avg engineering degrees':engineering_state.values, '% avg engineering tech degrees':engineeringtech_state.values, '%avg comp sci degrees':compsci_state.values, 'total':total_high_degrees.values})

data5 = [dict(
        type='choropleth', 
        colorscale = scl,
        autocolorscale = False,
        locations = df5['state'],
        z = df5['total'].astype(float),
        locationmode = 'USA-states',
        marker = dict(
            line = dict(
                color = 'rgb(255,255,255)',
                width = 2
            )),
         colorbar = dict(
             title = "Avg % by State of <br>Degrees Received")
         )]

layout5 = dict(
          title = 'Where the Highest Earning Degrees are being Received <br>(Healthcare, Engineering, Computer Science)',
          geo = dict(
              scope='usa',
              projection=dict(type='albers usa'),
              showlakes = True,
              lakecolor = 'rgb(255,255,255)'),
             )
fig5 = dict(data=data5, layout=layout5)
py.iplot(fig5)
