In [1]:
# Dependencies
import pandas as pd
import numpy as np
import requests
import json
from pprint import pprint

# Import API key
from doe_api import doe_key

In [2]:
#demographics

url = 'https://api.data.gov/ed/collegescorecard/v1/schools.json'
params1 = {
    'api_key': doe_key,
    'per_page': 250,
    'fields': "latest.root.id,latest.root.location.lon,latest.root.location.lat,latest.school.name,"+
    
    "latest.school.zip,latest.student.size,latest.root.id,latest.school.region_id,"+
    "latest.aid.pell_grant_rate,latest.school.faculty_salary,latest.school.ft_faculty_rate,"+
    
    #admissions test requirements
    "latest.admissions.test_requirements,"+
    'latest.admissions.admission_rate.overall,'+
    
    #completion rate demographics
    'latest.completion.completion_rate_4yr_150nt,'+
    "latest.completion.completion_rate_4yr_150_nhpi,"+ 
    "latest.completion.completion_rate_4yr_150_aian,latest.completion.completion_rate_4yr_150_asian,"+
    "latest.completion.completion_rate_4yr_150_hispanic,latest.completion.completion_rate_4yr_150_black,"+
    "latest.completion.completion_rate_4yr_150_white,"+
    
    
    #student demographics
    "latest.student.demographics.race_ethnicity.white,latest.student.demographics.race_ethnicity.hispanic,"+
    "latest.student.demographics.race_ethnicity.black,latest.student.demographics.race_ethnicity.api,"+
    "latest.student.demographics.race_ethnicity.aian,latest.student.demographics.women,latest.student.demographics.men,"+
    
    #median debt per demographic
    "latest.aid.median_debt.income.30001_75000,latest.aid.median_debt.income.0_30000,latest.aid.median_debt.income.greater_than_75000,"+
    "latest.aid.median_debt.noncompleters,latest.aid.median_debt.completers.overall,latest.aid.median_debt.male_students,"+
    "latest.aid.median_debt.independent_students,latest.aid.median_debt.female_students,latest.aid.median_debt.dependent_students,"+
    
    #religious affiliation
    "latest.school.religious_affiliation,"+
    
    #undergrads age 25 and above
    "latest.student.share_25_older"+
    
    #page marker
    'page=0'
    
    
}

In [3]:
# Initialize an empty list to store the data
demographics_data = []
#starting page is 0
params1['page'] = 0

#loop that will loop through the page until we reach page 6
while params1['page'] < 7:
    #api call
    response = requests.get(url, params=params1).json()
    next_page_results = response.get("results", [])
    demographics_data.extend(next_page_results)
    #to go to the next page of results
    params1['page'] += 1

In [4]:
demographics_df = pd.DataFrame(demographics_data)
demographics_df = demographics_df.fillna(0)
demographics_df = demographics_df.drop(columns = ['latest.school.zip','latest.aid.pell_grant_rate',
                                                  'latest.school.faculty_salary',
                                                  'latest.school.ft_faculty_rate',
                                                 'latest.school.religious_affiliation',
                                                 'latest.student.demographics.race_ethnicity.api_2000',
                                                 'latest.school.region_id',
                                                 'latest.aid.median_debt.dependent_students',
                                                'latest.completion.completion_rate_4yr_150_white',
                                                 'latest.completion.completion_rate_4yr_150_nhpi',
                                                 'latest.completion.completion_rate_4yr_150_aian',
                                                 'latest.completion.completion_rate_4yr_150_asian',
                                                 'latest.completion.completion_rate_4yr_150_hispanic',
                                                 'latest.completion.completion_rate_4yr_150_black',
                                                 'latest.aid.median_debt.income.30001_75000',
                                                 'latest.aid.median_debt.income.0_30000',
                                                 'latest.aid.median_debt.income.greater_than_75000',
                                                  'latest.aid.median_debt.noncompleters',
                                                 'latest.aid.median_debt.completers.overall',
                                                  'latest.aid.median_debt.male_students',
                                                  'latest.aid.median_debt.independent_students',
                                                 'latest.aid.median_debt.female_students'])

In [5]:
demographics_clean = demographics_df.rename(columns={
    'latest.school.name':'School Name',
    'latest.student.size':'Student Count',
    'latest.admissions.test_requirements':'Standardized Test required? (1:Required, 2:Recommended, 3:Neither required nor recommended , 4:Do not know, 5: Considered but not required)',
    'latest.admissions.admission_rate.overall': 'Admission Rate',
    'latest.completion.completion_rate_4yr_150nt':'4Yr Uni Completion Rates',
    'latest.student.demographics.race_ethnicity.white': '% White Students',
    'latest.student.demographics.race_ethnicity.hispanic':'% Hispanic Students',
    'latest.student.demographics.race_ethnicity.black': '% Black Students',
    'latest.student.demographics.race_ethnicity.aian': '% AIAN Students',
    'latest.student.demographics.women':'% Female Students',
    'latest.student.demographics.men':'% Male Students'})

In [6]:
demographics_clean['Admission Rate'] = demographics_clean['Admission Rate'] * 100
demographics_clean['4Yr Uni Completion Rates'] = demographics_clean['4Yr Uni Completion Rates'] * 100
demographics_clean['% White Students'] = demographics_clean['% White Students'] * 100
demographics_clean['% Hispanic Students'] = demographics_clean['% Hispanic Students'] * 100
demographics_clean['% Black Students'] = demographics_clean['% Black Students'] * 100
demographics_clean['% AIAN Students'] = demographics_clean['% AIAN Students'] * 100
demographics_clean['% Female Students'] = demographics_clean['% Female Students'] * 100
demographics_clean['% Male Students'] = demographics_clean['% Male Students'] * 100

In [7]:
demographics_clean

Unnamed: 0,School Name,Student Count,"Standardized Test required? (1:Required, 2:Recommended, 3:Neither required nor recommended , 4:Do not know, 5: Considered but not required)",Admission Rate,4Yr Uni Completion Rates,% White Students,% Hispanic Students,% Black Students,% AIAN Students,% Female Students,% Male Students
0,Alabama A & M University,5098.0,5.0,71.60,28.07,1.84,1.14,89.78,0.27,59.40,40.60
1,University of Alabama at Birmingham,13284.0,5.0,88.54,62.45,52.97,6.69,24.58,0.21,62.10,37.90
2,Amridge University,251.0,0.0,0.00,44.44,24.70,4.38,69.32,0.00,68.53,31.47
3,University of Alabama in Huntsville,7358.0,1.0,73.67,60.72,71.96,6.10,8.71,0.80,40.47,59.53
4,Alabama State University,3495.0,5.0,97.99,28.43,1.52,1.29,92.59,0.17,64.95,35.05
...,...,...,...,...,...,...,...,...,...,...,...
695,Robert Morgan Educational Center and Technical...,435.0,0.0,0.00,0.00,10.11,60.00,26.90,0.00,47.59,52.41
696,Rollins College,2501.0,2.0,48.69,70.59,56.74,18.87,5.44,0.12,59.94,40.06
697,First Coast Technical College,379.0,0.0,0.00,0.00,68.07,9.76,17.15,0.00,53.83,46.17
698,Saint Leo University,6456.0,5.0,70.66,46.25,30.59,17.77,22.92,0.51,61.03,38.97


In [45]:
demographics_clean.to_csv('data/demographics.csv', index=False, header=True)

In [3]:
#male/female
url = 'https://api.data.gov/ed/collegescorecard/v1/schools.json'
params3 = {
    'api_key': doe_key,
    'per_page': 250,
    'fields': "latest.root.id,latest.root.location.lon,latest.root.location.lat,latest.school.name,"+
    
    #male 
    #general completion
    "latest.completion.title_iv.male.completed_by.2yrs,latest.completion.title_iv.male.completed_by.3yrs,"+
    "latest.completion.title_iv.male.completed_by.4yrs,latest.completion.title_iv.male.completed_by.6yrs,"+
    "latest.completion.title_iv.male.completed_by.8yrs,"+
    #general withdrew
    "latest.completion.title_iv.male.withdrawn_by.2yrs,latest.completion.title_iv.male.withdrawn_by.3yrs,"+
    "latest.completion.title_iv.male.withdrawn_by.4yrs,latest.completion.title_iv.male.withdrawn_by.6yrs,"+
    "latest.completion.title_iv.male.withdrawn_by.8yrs,"+
    
    
    #female 
    #general completion
    "latest.completion.title_iv.female.completed_by.2yrs,latest.completion.title_iv.female.completed_by.3yrs,"+
    "latest.completion.title_iv.female.completed_by.4yrs,latest.completion.title_iv.female.completed_by.6yrs,"+
    "latest.completion.title_iv.female.completed_by.8yrs,"+
    #general withdrew
    "latest.completion.title_iv.female.withdrawn_by.2yrs,latest.completion.title_iv.female.withdrawn_by.3yrs,"+
    "latest.completion.title_iv.female.withdrawn_by.4yrs,latest.completion.title_iv.female.withdrawn_by.6yrs,"+
    "latest.completion.title_iv.female.withdrawn_by.8yrs,"+
    
    #page marker
    'page=0'
    
}

In [4]:
# Initialize an empty list to store the data
gender_completion = []
#starting page is 0
params3['page'] = 0

#loop that will loop through the page until we reach page 6
while params3['page'] < 7:
    #api call
    response = requests.get(url, params=params3).json()
    next_page_results = response.get("results", [])
    gender_completion.extend(next_page_results)
    #to go to the next page of results
    params3['page'] += 1

In [13]:
#create dataframe
gender_completion_df = pd.DataFrame(gender_completion)
#fill nan values to 0
gender_completion_df = gender_completion_df.fillna(0)
#rename columns
gender_completion_clean = gender_completion_df.rename(columns = {
        'latest.school.name': 'School Name',
       'latest.completion.title_iv.male.completed_by.2yrs':'% Male Students Completed within 2yrs',
       'latest.completion.title_iv.male.completed_by.3yrs':'% Male Students Completed within 3yrs',
       'latest.completion.title_iv.male.completed_by.4yrs':'% Male Students Completed within 4yrs',
       'latest.completion.title_iv.male.completed_by.6yrs':'% Male Students Completed within 6yrs',
       'latest.completion.title_iv.male.completed_by.8yrs':'% Male Students Completed within 8yrs',
       'latest.completion.title_iv.male.withdrawn_by.2yrs':'% Male Students Withdrawn by 2yrs',
       'latest.completion.title_iv.male.withdrawn_by.3yrs':'% Male Students Withdrawn by 3yrs',
       'latest.completion.title_iv.male.withdrawn_by.4yrs':'% Male Students Withdrawn by 4yrs',
       'latest.completion.title_iv.male.withdrawn_by.6yrs':'% Male Students Withdrawn by 6yrs',
       'latest.completion.title_iv.male.withdrawn_by.8yrs':'% Male Students Withdrawn by 8yrs',
       'latest.completion.title_iv.female.completed_by.2yrs':'% Female Students Completed within 2yrs',
       'latest.completion.title_iv.female.completed_by.3yrs':'% Female Students Completed within 3yrs',
       'latest.completion.title_iv.female.completed_by.4yrs':'% Female Students Completed within 4yrs',
       'latest.completion.title_iv.female.completed_by.6yrs':'% Female Students Completed within 6yrs',
       'latest.completion.title_iv.female.completed_by.8yrs':'% Female Students Completed within 8yrs',
       'latest.completion.title_iv.female.withdrawn_by.2yrs':'% Female Students Withdrawn by 2yrs',
       'latest.completion.title_iv.female.withdrawn_by.3yrs':'% Female Students Withdrawn by 3yrs',
       'latest.completion.title_iv.female.withdrawn_by.4yrs':'% Female Students Withdrawn by 4yrs',
       'latest.completion.title_iv.female.withdrawn_by.6yrs':'% Female Students Withdrawn by 6yrs',
       'latest.completion.title_iv.female.withdrawn_by.8yrs':'% Female Students Withdrawn by 8yrs'
    
})

In [14]:
#for loop to convert floats to percentage
#starting i at 1 to skip the school name column
for i in gender_completion_clean.columns[1:]:
    gender_completion_clean[i] = gender_completion_clean[i] * 100

In [15]:
gender_completion_clean

Unnamed: 0,School Name,% Male Students Completed within 2yrs,% Male Students Completed within 3yrs,% Male Students Completed within 4yrs,% Male Students Completed within 6yrs,% Male Students Completed within 8yrs,% Male Students Withdrawn by 2yrs,% Male Students Withdrawn by 3yrs,% Male Students Withdrawn by 4yrs,% Male Students Withdrawn by 6yrs,...,% Female Students Completed within 2yrs,% Female Students Completed within 3yrs,% Female Students Completed within 4yrs,% Female Students Completed within 6yrs,% Female Students Completed within 8yrs,% Female Students Withdrawn by 2yrs,% Female Students Withdrawn by 3yrs,% Female Students Withdrawn by 4yrs,% Female Students Withdrawn by 6yrs,% Female Students Withdrawn by 8yrs
0,Alabama A & M UniversityAlabama A & M Universi...,4.066265,8.126722,17.638484,24.455611,25.568182,37.349398,35.399449,42.419825,41.876047,...,4.714912,17.823834,25.938967,35.748031,31.801471,29.714912,25.181347,28.755869,23.307087,19.669118
1,University of Alabama at BirminghamUniversity ...,13.675958,28.867761,42.529880,50.994764,50.358974,22.386760,26.361279,24.800797,21.256545,...,15.812379,35.194417,47.121662,55.813953,56.910039,18.907157,19.292124,18.635015,15.084852,12.320730
2,Amridge UniversityAmridge UniversityAmridge Un...,0.000000,0.000000,0.000000,0.000000,27.272727,60.000000,50.000000,48.979592,44.642857,...,0.000000,0.000000,0.000000,0.000000,12.389381,50.806452,52.525253,51.898734,49.541284,28.318584
3,University of Alabama in HuntsvilleUniversity ...,6.235012,23.529412,34.672619,48.105437,47.627417,21.942446,26.342711,31.398810,23.558484,...,15.983027,35.007849,39.389068,48.007968,51.016949,19.801980,17.896389,23.151125,14.541833,10.169492
4,Alabama State UniversityAlabama State Universi...,5.450237,8.786611,19.449902,25.000000,22.301136,39.810427,46.443515,38.899804,40.261628,...,3.857143,16.521739,28.167116,36.871508,32.295271,28.428571,31.677019,27.493261,21.694600,19.838524
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
695,Robert Morgan Educational Center and Technical...,36.842105,0.000000,0.000000,18.248175,26.732673,48.421053,63.207547,74.311927,60.583942,...,43.529412,0.000000,0.000000,41.803279,33.333333,42.352941,50.000000,35.555556,26.229508,0.000000
696,Rollins CollegeRollins CollegeRollins CollegeR...,20.779221,58.865248,65.476190,64.210526,66.666667,9.740260,7.092199,11.904762,13.157895,...,16.835017,64.262295,63.076923,74.144487,70.870871,7.744108,7.213115,8.076923,9.885932,9.909910
697,First Coast Technical CollegeFirst Coast Techn...,73.684211,59.090909,0.000000,54.135338,40.206186,0.000000,29.545455,50.000000,28.571429,...,74.117647,58.273381,0.000000,58.823529,49.738220,0.000000,28.776978,25.833333,21.764706,9.424084
698,Saint Leo UniversitySaint Leo UniversitySaint ...,17.211704,26.895307,30.031746,39.784946,44.506816,38.726334,37.093863,35.301587,27.258065,...,17.053529,27.817067,28.210181,36.293077,40.264731,33.538607,28.665724,31.001642,23.998758,19.726729


In [50]:
gender_completion_clean.to_csv('data/gender_completion.csv', index=False, header=True)