## Analysis of Tuition, Fees, Costs and Financial Aid

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

%matplotlib inline
sns.set_style("whitegrid")
plt.style.use("fivethirtyeight")

## Read in tuition and financial aid data

In [2]:
tuition_fees = pd.read_csv("../data/ipeds_costs_2017_18.csv")
finan_aid_one = pd.read_csv("../data/ipeds_student_finan_aid_part_one.csv")

In [3]:
tuition_fees.shape

(4281, 121)

In [4]:
tuition_fees.columns

Index(['school_id', 'dist_tuit_ft_ug', 'dist_fees_ft_ug', 'dist_hour_pt_ug',
       'is_tuit_ft_ug', 'is_fees_ft_ug', 'is_hour_pt_ug', 'os_tuition_ft_ug',
       'os_fees_ft_ug', 'os_hour_pt_ug',
       ...
       'off_nofam_rmbd_17', 'off_nofam_rmbd_18', 'off_nofam_other_15',
       'off_nofam_other_16', 'off_nofam_other_17', 'off_other_nofam_18',
       'off_other_fam_15', 'off_other_fam_16', 'off_other_fam_17',
       'off_other_fam_18'],
      dtype='object', length=121)

In [5]:
finan_aid_one.shape

(6394, 64)

In [6]:
finan_aid_one.columns

Index(['school_id', 'num_student_fall', 'pct_student_fall',
       'num_stud_dist_fall', 'pct_stud_dist_fall', 'num_stud_is_fall',
       'pct_stud_is_fall', 'num_stud_oos_fall', 'pct_stud_oos_fall',
       'total_ug_fall', 'num_ft_ug_anyaid', 'pct_ft_ug_anyaid',
       'num_ft_ug_fedaid', 'pct_ft_ug_fedaid', 'ft_ug_avg_aid',
       'ft_ug_stateaid', 'pct_ft_ug_stateaid', 'avg_ft_ug_stateaid',
       'num_ft_ug_instgrant', 'pct_ft_ug_instgrant', 'avg_ft_ug_instgrant',
       'num_ft_ug_loans', 'pct_ft_ug_loans', 'avg_ft_ug_loans', 'SCUGFFN',
       'SCUGRAD', 'UAGRNTT', 'UPGRNTN', 'AGRNT_N', 'AGRNT_P', 'AGRNT_A',
       'PGRNT_N', 'PGRNT_P', 'PGRNT_A', 'OFGRT_N', 'OFGRT_P', 'OFGRT_A',
       'FLOAN_N', 'FLOAN_P', 'FLOAN_A', 'OLOAN_N', 'OLOAN_P', 'OLOAN_A',
       'UAGRNTN', 'UAGRNTP', 'UAGRNTA', 'UPGRNTP', 'UPGRNTT', 'UPGRNTA',
       'UFLOANN', 'UFLOANP', 'UFLOANT', 'UFLOANA', 'AGRNT_T', 'FGRNT_T',
       'PGRNT_T', 'OFGRT_T', 'SGRNT_T', 'tot_ft_ug_inst_grant',
       'tot_ft_ug_loans

In [7]:
tuition_fees.head()

Unnamed: 0,school_id,dist_tuit_ft_ug,dist_fees_ft_ug,dist_hour_pt_ug,is_tuit_ft_ug,is_fees_ft_ug,is_hour_pt_ug,os_tuition_ft_ug,os_fees_ft_ug,os_hour_pt_ug,...,off_nofam_rmbd_17,off_nofam_rmbd_18,off_nofam_other_15,off_nofam_other_16,off_nofam_other_17,off_other_nofam_18,off_other_fam_15,off_other_fam_16,off_other_fam_17,off_other_fam_18
0,100654,8379.0,1478.0,274.0,8379.0,1478.0,274.0,16758.0,1478.0,548.0,...,8830.0,8379.0,2748.0,3790.0,3090.0,2580.0,1300.0,3790.0,3090.0,2580.0
1,100663,8328.0,0.0,347.0,8328.0,0.0,347.0,19032.0,0.0,793.0,...,11682.0,11682.0,4886.0,4886.0,4886.0,4886.0,4886.0,4886.0,4886.0,4886.0
2,100690,9000.0,900.0,430.0,9000.0,900.0,430.0,9000.0,900.0,430.0,...,9600.0,9600.0,1600.0,1500.0,1600.0,1600.0,1600.0,1500.0,1600.0,1600.0
3,100706,9356.0,924.0,411.0,9356.0,924.0,411.0,20556.0,924.0,908.0,...,9603.0,9748.0,4302.0,4508.0,3578.0,3697.0,4302.0,4508.0,3578.0,3697.0
4,100724,8328.0,2740.0,347.0,8328.0,2740.0,347.0,16656.0,2740.0,694.0,...,7320.0,7320.0,3172.0,4228.0,4228.0,4228.0,3172.0,4228.0,4228.0,4228.0


In [8]:
finan_aid_one.head()

Unnamed: 0,school_id,num_student_fall,pct_student_fall,num_stud_dist_fall,pct_stud_dist_fall,num_stud_is_fall,pct_stud_is_fall,num_stud_oos_fall,pct_stud_oos_fall,total_ug_fall,...,FGRNT_T,PGRNT_T,OFGRT_T,SGRNT_T,tot_ft_ug_inst_grant,tot_ft_ug_loans,tot_ft_ug_fedloan,tot_ft_ug_other,num_ft_ug_fed_grant,pct_ft_ug_fed_grant
0,100654,1410,29,0,0,858,61,552,39,4851,...,4815142,4683617,131525,751194,4199256,6622483,6412830,209653,1272,90
1,100663,1948,16,0,0,1654,85,294,15,12369,...,3992347,3709480,282867,37357,11599891,7784870,6538553,1246317,1807,93
2,100690,4,1,0,0,0,0,0,0,294,...,10861,10661,200,300,5300,13166,13166,0,2,50
3,100706,1203,18,0,0,930,77,273,23,6507,...,1121742,1040120,81622,11250,8731653,3228422,2660723,567699,1060,88
4,100724,1143,24,0,0,584,51,401,35,4727,...,4249148,4119764,129384,771223,2636196,5776381,5644876,131505,1070,94


## Read in modified directory info data

In [9]:
ipeds_directory = pd.read_csv("../data/directory_filtered.csv")

In [10]:
ipeds_directory.shape

(6993, 58)

In [11]:
# Merge with tuition and fees df
directory_costs =  pd.merge(ipeds_directory, tuition_fees, on = 'school_id')

In [12]:
directory_costs.shape

(4170, 178)

In [14]:
directory_costs.columns

Index(['Unnamed: 0', 'school_id', 'institution_name', 'address', 'city',
       'state_abbr', 'zipcode', 'fips_code', 'app_url', 'net_price_url',
       ...
       'off_nofam_rmbd_17', 'off_nofam_rmbd_18', 'off_nofam_other_15',
       'off_nofam_other_16', 'off_nofam_other_17', 'off_other_nofam_18',
       'off_other_fam_15', 'off_other_fam_16', 'off_other_fam_17',
       'off_other_fam_18'],
      dtype='object', length=178)