# National University Ranking

![](images/2023-07-13-00-48-01.png)

## Introduction

## Reasearch Idea: 
### Developing a searchable database to help high school students identify colleges that match their criteria in terms of tuition, graduation rate, location, and rank.

### Import Libraries

In [1]:
import pandas as pd
import plotly.express as px
import plotly.io as pio
pio.templates.default = "ggplot2"
import plotly.offline as po
po.init_notebook_mode(connected=True)
import plotly.graph_objects as go

In [2]:
# Loading the dataset
nur = pd.read_csv("dataset/National Universities Rankings.csv", index_col=0)
nur.shape

(231, 7)

In [3]:
nur.head()

Unnamed: 0_level_0,Name,Location,Rank,Description,Tuition and fees,In-state,Undergrad Enrollment
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,Princeton University,"Princeton, NJ",1,"Princeton, the fourth-oldest college in the Un...","$45,320",,5402
1,Harvard University,"Cambridge, MA",2,"Harvard is located in Cambridge, Massachusetts...","$47,074",,6699
2,University of Chicago,"Chicago, IL",3,"The University of Chicago, situated in Chicago...","$52,491",,5844
3,Yale University,"New Haven, CT",3,"Yale University, located in New Haven, Connect...","$49,480",,5532
4,Columbia University,"New York, NY",5,"Columbia University, located in Manhattan's Mo...","$55,056",,6102


In [12]:
df_nur = nur.copy()

In [13]:
# Promise
# Remove the dollar sign ($) and comma
df_nur['Tuition and fees'] = df_nur['Tuition and fees'].str.replace('$', '', regex=False).str.replace(',', '')
# convert the column datatype to integer
df_nur['Tuition and fees'] = df_nur['Tuition and fees'].fillna('0').astype(int)

# Chinaza
# Cleaning In-state column
df_nur['In-state'] = df_nur['In-state'].str.replace('$','', regex=False).str.replace(',', '')
df_nur['In-state'] = df_nur['In-state'].fillna('0').astype(int)

# precious
# data cleaning by replacing ',' with '' and converting column to integer
df_nur['Undergrad Enrollment'] = df_nur['Undergrad Enrollment'].str.replace(',','').astype('int')
#task.toyosi - Extract the states from the location table, make it a column of it's own
df_nur['State'] = nur['Location'].str.split(',').str[-1].str.strip()

#Olufemilite
#Changing column headers to lower case and changing spaces to underscore
df_nur.rename(columns={"Name":"name","Location":"location","Rank":"rank","Description":"description",
                       "Tuition and fees":"tuition_and_fees","In-state":"in_state","Undergrad Enrollment":"undergrad_enrollment",
                       "State": "state"}, inplace=True)

# Goodrich
# Extract the year using regex and create a new column
df_nur['year'] = df_nur['description'].str.extract(r'(\d{4})')
df_nur['year'] = df_nur['year'].fillna('0').astype(int)

df_nur.head()

Unnamed: 0_level_0,name,location,rank,description,tuition_and_fees,in_state,undergrad_enrollment,state,year
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
0,Princeton University,"Princeton, NJ",1,"Princeton, the fourth-oldest college in the Un...",45320,0,5402,NJ,0
1,Harvard University,"Cambridge, MA",2,"Harvard is located in Cambridge, Massachusetts...",47074,0,6699,MA,0
2,University of Chicago,"Chicago, IL",3,"The University of Chicago, situated in Chicago...",52491,0,5844,IL,0
3,Yale University,"New Haven, CT",3,"Yale University, located in New Haven, Connect...",49480,0,5532,CT,0
4,Columbia University,"New York, NY",5,"Columbia University, located in Manhattan's Mo...",55056,0,6102,NY,0


In [16]:
# grace_oby
# Get the full name of the 51 states and make it a column of it's own. 
# Note, you must make sure they match when adding it to the dataframe
state_abbrev = {
            'NJ': 'New Jersey',
            'MA': 'Massachusetts',
            'IL': 'Illinois',
            'CT': 'Connecticut',
            'NY': 'New York',
            'CA': 'California',
            'NC': 'North Carolina',
            'PA': 'Pennsylvania',
            'MD': 'Maryland',
            'NH': 'New Hampshire',
            'RI': 'Rhode Island',
            'TX': 'Texas',
            'IN': 'Indiana',    
            'TN': 'Tennessee',
            'MO': 'Missouri',
            'GA': 'Georgia',
            'DC': 'District of Columbia',
            'VA': 'Virginia',
            'MI': 'Michigan',
            'OH': 'Ohio',
            'LA': 'Louisiana',
            'FL': 'Florida',
            'WI': 'Wisconsin',
            'WA': 'Washington',
            'SC': 'South Carolina',
            'UT': 'Utah',
            'MN': 'Minnesota',
            'DE': 'Delaware',
            'CO': 'Colorado',
            'IA': 'Iowa',
            'OK': 'Oklahoma',
            'VT': 'Vermont',
            'AL': 'Alabama',
            'OR': 'Oregon',
            'NE': 'Nebraska',
            'KS': 'Kansas',
            'AZ': 'Arizona',
            'KY': 'Kentucky',
            'AR': 'Arkansas',
            'MS': 'Mississippi',
            'HI': 'Hawaii',
            'ID': 'Idaho',
            'WY': 'Wyoming',
            'NM': 'New Mexico',
            'ME': 'Maine',
            'WV': 'West Virginia',
            'ND': 'North Dakota',
            'NV': 'Nevada',
            'SD': 'South Dakota',
            'AK': 'Alaska',
            'MT': 'Montana',
    
}
state_abbrev.keys()
df_nur["state_full"] = df_nur["state"].map(state_abbrev)

# Johnkennedy
## creating the region column
# Dictionary to map state names to regions
state_to_region = {
    'Princeton': 'Northeast',
    'Cambridge': 'Northeast',
    'Chicago': 'Midwest',
    'New Haven': 'Northeast',
    'New York': 'Northeast',
    'Stanford': 'West',
    'Durham': 'South',
    'Philadelphia': 'Northeast',
    'Baltimore': 'South',
    'Hanover': 'Northeast',
    'Pasadena': 'West',
    'Evanston': 'Midwest',
    'Providence': 'Northeast',
    'Ithaca': 'Northeast',
    'Houston': 'South',
    'Notre Dame': 'Midwest',
    'Nashville': 'South',
    'St. Louis': 'Midwest',
    'Atlanta': 'South',
    'Washington': 'South',
    'Berkeley': 'West',
    'Los Angeles': 'West',
    'Pittsburgh': 'Northeast',
    'Charlottesville': 'South',
    'Medford': 'Northeast',
    'Ann Arbor': 'Midwest',
    'Winston-Salem': 'South',
    'Chapel Hill': 'South',
    'Chestnut Hill': 'Northeast',
    'Williamsburg': 'South',
    'Rochester': 'Northeast',
    'Waltham': 'Northeast',
    'Cleveland': 'Midwest',
    'Santa Barbara': 'West',
    'Boston': 'Northeast',
    'Troy': 'Northeast',
    'New Orleans': 'South',
    'Irvine': 'West',
    'Bethlehem': 'Northeast',
    'Davis': 'West',
    'La Jolla': 'West',
    'Champaign': 'Midwest',
    'Coral Gables': 'South',
    'Madison': 'Midwest',
    'Malibu': 'West',
    'Gainesville': 'South',
    'Villanova': 'Northeast',
    'University Park': 'Northeast',
    'Columbus': 'Midwest',
    'Seattle': 'West',
    'Dallas': 'South',
    'Athens': 'South',
    'Austin': 'South',
    'West Lafayette': 'Midwest',
    'Syracuse': 'Northeast',
    'Storrs': 'Northeast',
    'College Park': 'Northeast',
    'Worcester': 'Northeast',
    'Clemson': 'South',
    'Provo': 'West',
    'Piscataway': 'Northeast',
    'Waco': 'South',
    'Hoboken': 'Northeast',
    'Minneapolis': 'Midwest',
    'College Station': 'South',
    'Amherst': 'Northeast',
    'Blacksburg': 'South',
    'Oxford': 'South',
    'Santa Cruz': 'West',
    'Newark': 'Northeast',
    'Golden': 'West',
    'East Lansing': 'Midwest',
    'Fort Worth': 'South',
    'Iowa City': 'Midwest',
    'Binghamton': 'Northeast',
    'Bloomington': 'Midwest',
    'Milwaukee': 'Midwest',
    'Denver': 'West',
    'San Diego': 'West',
    'Tulsa': 'South',
    'Tallahassee': 'South',
    'Raleigh': 'South',
    'Boulder': 'West',
    'Burlington': 'Northeast',
    'Stony Brook': 'Northeast',
    'Auburn': 'South',
    'Tuscaloosa': 'South',
    'Eugene': 'West',
    'Knoxville': 'South',
    'San Francisco': 'West',
    'Columbia': 'Northeast',
    'Ames': 'Midwest',
    'Dayton': 'Northeast',
    'Lincoln': 'Midwest',
    'Norman': 'South',
    'Stockton': 'West',
    'Salt Lake City': 'West',
    'Houghton': 'Midwest',
    'South Orange': 'Northeast',
    'Riverside': 'West',
    'Lawrence': 'Midwest',
    'St. Paul': 'Midwest',
    'Tucson': 'West',
    'Tempe': 'West',
    'Potsdam': 'Northeast',
    'Fort Collins': 'West',
    'Hempstead': 'Northeast',
    'Lexington': 'South',
    'Manhattan': 'Northeast',
    'Baton Rouge': 'South',
    'Macon': 'South',
    'Fayetteville': 'South',
    'Cincinnati': 'Northeast',
    'University': 'Northeast',
    'Fairfax': 'Northeast',
    'Corvallis': 'West',
    'Pullman': 'West',
    'Garden City': 'Northeast',
    'San Diego': 'West',
    'Rochester': 'Northeast',
    'Albany': 'Northeast',
    'Richardson': 'South',
    'Normal': 'Midwest',
    'Immaculata': 'Northeast',
    'Stillwater': 'South',
    'Merced': 'West',
    'La Verne': 'West',
    'Lowell': 'Northeast',
    'Seattle': 'West',
    'Birmingham': 'South',
    'Kingston': 'Northeast',
    'Tampa': 'South',
    'La Mirada': 'West',
    'St Louis': 'Midwest',
    'Rolla': 'Midwest',
    'Queens': 'Northeast',
    'Richmond': 'South',
    'Jackson': 'South',
    'Honolulu': 'West',
    'Melbourne': 'Northeast',
    'Moscow': 'West',
    'Louisville': 'South',
    'Laramie': 'West',
    'Muncie': 'Midwest',
    'Mississippi State': 'South',
    'Montclair': 'Northeast',
    'Lubbock': 'South',
    'Orlando': 'South',
    'Albuquerque': 'West',
    'Berrien Springs': 'Midwest',
    'Azusa': 'West',
    'Orono': 'Northeast',
    'Morgantown': 'South',
    'Chester': 'Northeast',
    'Kent': 'Northeast',
    'Fargo': 'Midwest',
    'Moon Township': 'Northeast',
    'West Hartford': 'Northeast',
    'Bowling Green': 'Midwest',
    'Kalamazoo': 'Midwest',
    'Indianapolis': 'Northeast',
    'Huntsville': 'South',
    'Denver': 'West',
    'Reno': 'West',
    'Fullerton': 'West',
    'Mount Pleasant': 'Midwest',
    'Ruston': 'South',
    'Brookings': 'Midwest',
    'Buffalo': 'Northeast',
    'Fairbanks': 'West',
    'Charlotte': 'South',
    'Grand Forks': 'Midwest',
    'Vermillion': 'Midwest',
    'Greenville': 'South',
    'Bozeman': 'West',
    'Norfolk': 'Northeast',
    'Kansas City': 'Midwest',
    'Ashland': 'Northeast',
    'Dallas': 'South',
    'DeKalb': 'Midwest',
    'Ft. Lauderdale': 'South',
    'Carbondale': 'Midwest',
    'Missoula': 'West',
    'Lisle': 'Midwest',
    'Fresno': 'West',
    'Boiling Springs': 'Northeast',
    'Las Cruces': 'West',
    'Winchester': 'South',
    'Cookeville': 'South',
    'North Dartmouth': 'Northeast',
    'Greensboro': 'South',
    'Hattiesburg': 'South',
    'Logan': 'West'
}
# Create a new column 'region'
df_nur['region']= df_nur['location'].apply(lambda x: x.split(',')[0]).map(state_to_region)

df_nur.head()

Unnamed: 0_level_0,name,location,rank,description,tuition_and_fees,in_state,undergrad_enrollment,state,year,state_full,region
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
0,Princeton University,"Princeton, NJ",1,"Princeton, the fourth-oldest college in the Un...",45320,0,5402,NJ,0,New Jersey,Northeast
1,Harvard University,"Cambridge, MA",2,"Harvard is located in Cambridge, Massachusetts...",47074,0,6699,MA,0,Massachusetts,Northeast
2,University of Chicago,"Chicago, IL",3,"The University of Chicago, situated in Chicago...",52491,0,5844,IL,0,Illinois,Midwest
3,Yale University,"New Haven, CT",3,"Yale University, located in New Haven, Connect...",49480,0,5532,CT,0,Connecticut,Northeast
4,Columbia University,"New York, NY",5,"Columbia University, located in Manhattan's Mo...",55056,0,6102,NY,0,New York,Northeast


In [7]:
# saving cleaned data
# df_nur.to_csv('dataset/cleaned_nur_data.csv')

In [8]:
df_nur.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 231 entries, 0 to 230
Data columns (total 11 columns):
 #   Column                Non-Null Count  Dtype 
---  ------                --------------  ----- 
 0   name                  231 non-null    object
 1   location              231 non-null    object
 2   rank                  231 non-null    int64 
 3   description           231 non-null    object
 4   tuition_and_fees      231 non-null    int32 
 5   in_state              231 non-null    int32 
 6   undergrad_enrollment  231 non-null    int32 
 7   state                 231 non-null    object
 8   year                  231 non-null    int32 
 9   state_full            231 non-null    object
 10  region                231 non-null    object
dtypes: int32(4), int64(1), object(6)
memory usage: 18.0+ KB


In [9]:
df_nur.isna().sum()

name                    0
location                0
rank                    0
description             0
tuition_and_fees        0
in_state                0
undergrad_enrollment    0
state                   0
year                    0
state_full              0
region                  0
dtype: int64

In [10]:
df_nur['state'].unique()

array(['NJ', 'MA', 'IL', 'CT', 'NY', 'CA', 'NC', 'PA', 'MD', 'NH', 'RI',
       'TX', 'IN', 'TN', 'MO', 'GA', 'DC', 'VA', 'MI', 'OH', 'LA', 'FL',
       'WI', 'WA', 'SC', 'UT', 'MN', 'DE', 'CO', 'IA', 'OK', 'VT', 'AL',
       'OR', 'NE', 'KS', 'AZ', 'KY', 'AR', 'MS', 'HI', 'ID', 'WY', 'NM',
       'ME', 'WV', 'ND', 'NV', 'SD', 'AK', 'MT'], dtype=object)

In [11]:
#SERAH
#Filter the dataframe for the following 5 states. NJ, MA, IL, CT, and NY. 
#For each of them, there should be a dataframe sorted by tuition fees, in state, and undergrad enrollment. E.g. Seperate NJ entries, a df sorted by tuition desc order, another df still for NJ sorted by in state column desc, lastly another df sorted by enrollment

nj = df_nur[df_nur['state'] =='NJ']
nj_fees = nj.sort_values('tuition_and_fees', ascending=False).reset_index(drop=True)
nj_grad = nj.sort_values('undergrad_enrollment', ascending=False).reset_index(drop=True)
nj_instate = nj.sort_values('in_state', ascending=False).reset_index(drop=True)

# Muees Hassan
# Filter the dataframe for the following 5 states. CA, NC, PA, MD, and NH. 
# For each of them, there should be a dataframe sorted by tuition fees, in state, 
# and undergrad enrollment.
ca_state = df_nur[df_nur['state'] == 'CA']
ca_state_tuition = ca_state.sort_values(by='tuition_and_fees', ascending=False)
ca_state_instate = ca_state.sort_values(by='in_state', ascending=False)
ca_state_enrollment = ca_state.sort_values(by='undergrad_enrollment', ascending=False)


In [26]:
# graceoby task16 EDA
# Create a function that does it automatically for any given state. 
# Your function should return three dataframe based on the sorting dataframes mentioned in the task.
def state_sorted_data(state_code):
    filtered_state = df_nur[df_nur['state'] == state_code]
    fs_fees = filtered_state.sort_values('tuition_and_fees', ascending=False).reset_index(drop=True)
    fs_instate = filtered_state.sort_values('in_state', ascending=False).reset_index(drop=True)
    fs_grad = filtered_state.sort_values('undergrad_enrollment', ascending=False).reset_index(drop=True)
    return fs_fees, fs_instate, fs_grad

### EDA

In [14]:
fig = px.bar(df_nur[:3].sort_values('rank', ascending=False), y="rank", x="name", text_auto=True,height = 300, width= 550, labels={'name':'', 'rank':''})
fig.update_layout(xaxis={"categoryorder": "total ascending"}, title_text="Top Universities by Rank")
fig.update_yaxes(showticklabels=False)
fig.show()

In [15]:
top_tf = df_nur.sort_values(by='tuition_and_fees', ascending=False)[:3]
fig = px.bar(top_tf, y="tuition_and_fees", x="name", text_auto=True,height=300, width=550,
             labels={'name':'', 'tuition_and_fees': ''})
fig.update_layout(xaxis={"categoryorder": "total descending"}, title_text="Top Universities by Tuition and fees")
fig.update_yaxes(showticklabels=False)
fig.show()

In [16]:
top_ins = df_nur.sort_values(by='in_state', ascending=False)[:3]
fig = px.bar(top_ins, x="in_state", y="name", text_auto=True,height=400, width=550,
             labels={'name':'', 'in_state': ''}, title="Top Universities by In-State fees")
fig.update_xaxes(showticklabels=False)
fig.show()

In [17]:
top_ins = df_nur[df_nur['year'] != 0].sort_values(by='year')[:3]
fig = px.bar(top_ins, y="year", x="name", text_auto=True,height=400, width=550,
             labels={'name':'', 'year': ''}, title="Oldest Universities")
fig.update_yaxes(showticklabels=False)
fig.show()

In [18]:
#Visualize results from task 3. Show the top university by tuition fees, another viz showing top university by in state fees, 
#then another showing for enrollment

fig = px.bar(ca_state_tuition.head(3), y="tuition_and_fees", x="name", color="name", text_auto=True)
fig.update_layout(barmode="stack", xaxis={"categoryorder": "total descending"}, title_text="Top 3 Universities by Tuition_and_fees in California", yaxis_title="Tuition_and_fees Values",
    xaxis_title="Names of Universities in California")
fig.show()

fig = px.bar(ca_state_enrollment.head(3), y="undergrad_enrollment", x="name", color="name", text_auto=True)
fig.update_layout(barmode="stack", xaxis={"categoryorder": "total descending"}, title_text="Top Universities by Undergrad_enrollment in California",yaxis_title="Undergrad_enrollment Values",
    xaxis_title="Names of Universities in California")
fig.show()

fig = px.bar(ca_state_instate.head(3), y="in_state", x="name", color="name", text_auto=True)
fig.update_layout(barmode="stack", xaxis={"categoryorder": "total descending"}, title_text="Top Universities by In-State in California",yaxis_title="In-State Values",
    xaxis_title="Names of Universities in California")
fig.show()

fig = px.bar(nc_state_tuition.head(3), y="tuition_and_fees", x="name", color="name", text_auto=True)
fig.update_layout(barmode="stack", xaxis={"categoryorder": "total descending"}, title_text="Top Universities by Tuition_and_fees in North Carolina", yaxis_title="Tuition_and_fees Values",
    xaxis_title="Names of Universities in North Carolina")
fig.show()

fig = px.bar(nc_state_enrollment.head(3), y="undergrad_enrollment", x="name", color="name", text_auto=True)
fig.update_layout(barmode="stack", xaxis={"categoryorder": "total descending"}, title_text="Top Universities by Undergrad_enrollment in North Carolina", yaxis_title="Undergrad_enrollment Values",
    xaxis_title="Names of Universities in North Carolina")
fig.show()

fig = px.bar(nc_state_instate.head(3), y="in_state", x="name", color="name", text_auto=True)
fig.update_layout(barmode="stack", xaxis={"categoryorder": "total descending"}, title_text="Top Universities by In-State in North Carolina",yaxis_title="In-State Values",
    xaxis_title="Names of Universities in North Carolina")
fig.show()

fig = px.bar(pa_state_tuition.head(3), y="tuition_and_fees", x="name", color="name", text_auto=True)
fig.update_layout(barmode="stack", xaxis={"categoryorder": "total descending"}, title_text="Top Universities by Tuition_and_fees in Pennsylvania", yaxis_title="Tuition_and_fees Values",
    xaxis_title="Names of Universities in Pennsylvania")
fig.show()

fig = px.bar(pa_state_enrollment.head(3), y="undergrad_enrollment", x="name", color="name", text_auto=True)
fig.update_layout(barmode="stack", xaxis={"categoryorder": "total descending"}, title_text="Top Universities by Undergrad_enrollment in Pennsylvania",yaxis_title="Undergrad_enrollment Values",
    xaxis_title="Names of Universities in Pennsylvania")
fig.show()

fig = px.bar(pa_state_instate.head(3), y="in_state", x="name", color="name", text_auto=True)
fig.update_layout(barmode="stack", xaxis={"categoryorder": "total descending"}, title_text="Top Universities by In-State in Pennsylvania",yaxis_title="In-State Values",
    xaxis_title="Names of Universities in Pennsylvania")
fig.show()

fig = px.bar(md_state_tuition.head(3), y="tuition_and_fees", x="name", color="name", text_auto=True)
fig.update_layout(barmode="stack", xaxis={"categoryorder": "total descending"}, title_text="Top Universities by Tuition_and_fees in Maryland", yaxis_title="Tuition_and_fees Values",
    xaxis_title="Names of Universities in Maryland")
fig.show()

fig = px.bar(md_state_enrollment.head(3), y="undergrad_enrollment", x="name", color="name", text_auto=True)
fig.update_layout(barmode="stack", xaxis={"categoryorder": "total descending"}, title_text="Top Universities by Undergrad_enrollment in Marylanda",yaxis_title="Undergrad_enrollment Values",
    xaxis_title="Names of Universities in Maryland")
fig.show()

fig = px.bar(md_state_instate.head(3), y="in_state", x="name", color="name", text_auto=True)
fig.update_layout(barmode="stack", xaxis={"categoryorder": "total descending"}, title_text="Top Universities by In-State in Maryland",yaxis_title="In-State Values",
    xaxis_title="Names of Universities in Maryland")
fig.show()

fig = px.bar(nh_state_tuition, y="tuition_and_fees", x="name", color="name", text_auto=True)
fig.update_layout(barmode="stack", xaxis={"categoryorder": "total descending"}, title_text="Top Universities by Tuition_and_fees in New Hampshire", yaxis_title="Tuition_and_fees Values",
    xaxis_title="Names of Universities in New Hampshire")
fig.show()

fig = px.bar(nh_state_enrollment, y="undergrad_enrollment", x="name", color="name", text_auto=True)
fig.update_layout(barmode="stack", xaxis={"categoryorder": "total descending"}, title_text="Top Universities by Undergrad_enrollment in New Hampshire",yaxis_title="Undergrad_enrollment Values",
    xaxis_title="Names of Universities in New Hampshire")
fig.show()

fig = px.bar(nh_state_instate, y="in_state", x="name", color="name", text_auto=True)
fig.update_layout(barmode="stack", xaxis={"categoryorder": "total descending"}, title_text="Top Universities by In-State in New Hampshire",yaxis_title="In-State Values",
    xaxis_title="Names of Universities in New Hampshire")
fig.show()

In [19]:
#Gbemisola
#Read up on Plotly Library. Visualize results from task 2. Show the top university by tuition fees, another viz showing top university by in state fees, then another showing for enrollment.
#I edited the dependency task because she forgot to create nj_fees df
njtuition_fig = go.Figure(data=go.Bar(x=nj_fees['name'], y=nj_fees['tuition_and_fees']))
njtuition_fig.update_layout(title="NJ Top Universities by Tuition Fees", xaxis_title="University", yaxis_title="Tuition Fees")
njtuition_fig.show()

njgrad_fig = go.Figure(data=go.Bar(x=nj_grad['name'], y=nj_grad['undergrad_enrollment']))
njgrad_fig.update_layout(title=" NJ Top Universities by Enrollment", xaxis_title="University", yaxis_title="Enrollment")
njgrad_fig.show()

njinstate_fig = go.Figure(data=go.Bar(x=nj_instate['name'], y=nj_instate['in_state']))
njinstate_fig.update_layout(title="NJ Top Universities by State Fees", xaxis_title="University", yaxis_title="State Fees")
njinstate_fig.show()

matuition_fig = go.Figure(data=go.Bar(x=ma_fees['name'], y=ma_fees['tuition_and_fees']))
matuition_fig.update_layout(title="MA Top Universities by Tuition Fees", xaxis_title="University", yaxis_title="Tuition Fees")
matuition_fig.show()

mainstate_fig = go.Figure(data=go.Bar(x=ma_instate['name'], y=ma_instate['in_state']))
mainstate_fig.update_layout(title="MA Top Universities by State Fees", xaxis_title="University", yaxis_title="State Fees")
mainstate_fig.show()

magrad_fig = go.Figure(data=go.Bar(x=ma_grad['name'], y=ma_grad['undergrad_enrollment']))
magrad_fig.update_layout(title="MA Top Universities by Enrollment", xaxis_title="University", yaxis_title="Enrollment")
magrad_fig.show()

iltuition_fig = go.Figure(data=go.Bar(x=il_fees['name'], y=il_fees['tuition_and_fees']))
iltuition_fig.update_layout(title="MA Top Universities by Tuition Fees", xaxis_title="University", yaxis_title="Tuition Fees")
iltuition_fig.show()

ilinstate_fig = go.Figure(data=go.Bar(x=il_instate['name'], y=il_instate['in_state']))
ilinstate_fig.update_layout(title="IL Top Universities by State Fees", xaxis_title="University", yaxis_title="State Fees")
ilinstate_fig.show()

ilgrad_fig = go.Figure(data=go.Bar(x=il_grad['name'], y=il_grad['undergrad_enrollment']))
ilgrad_fig.update_layout(title="IL Top Universities by Enrollment", xaxis_title="University", yaxis_title="Enrollment")
ilgrad_fig.show()

cttuition_fig = go.Figure(data=go.Bar(x=ct_fees['name'], y=ct_fees['tuition_and_fees']))
cttuition_fig.update_layout(title="CT Top Universities by Tuition Fees", xaxis_title="University", yaxis_title="Tuition Fees")
cttuition_fig.show()

ctinstate_fig = go.Figure(data=go.Bar(x=ct_instate['name'], y=ct_instate['in_state']))
ctinstate_fig.update_layout(title="CT Top Universities by State Fees", xaxis_title="University", yaxis_title="State Fees")
ctinstate_fig.show()

ctgrad_fig = go.Figure(data=go.Bar(x=ct_grad['name'], y=ct_grad['undergrad_enrollment']))
ctgrad_fig.update_layout(title="CT Top Universities by Enrollment", xaxis_title="University", yaxis_title="Enrollment")
ctgrad_fig.show()

nytuition_fig = go.Figure(data=go.Bar(x=ny_fees['name'], y=ny_fees['tuition_and_fees']))
nytuition_fig.update_layout(title="NY Top Universities by Tuition Fees", xaxis_title="University", yaxis_title="Tuition Fees")
nytuition_fig.show()

nygrad_fig = go.Figure(data=go.Bar(x=ny_grad['name'], y=ny_grad['undergrad_enrollment']))
nygrad_fig.update_layout(title="NY Top Universities by Enrollment", xaxis_title="University", yaxis_title="Enrollment")
nygrad_fig.show()

nyinstate_fig = go.Figure(data=go.Bar(x=ny_instate['name'], y=nj_instate['in_state']))
nyinstate_fig.update_layout(title="NY Top Universities by State Fees", xaxis_title="University", yaxis_title="State Fees")
nyinstate_fig.show()

# ALL YOUR TASK SHOULD BE DONE BELOW THIS MARKDOWN FOR EASY MERGING

In [20]:
def plot_bar5_graph(df, x_column, y_column, title, x_axis_title, y_axis_title):
    fig = go.Figure(data=go.Bar(x=df[x_column], y=df[y_column]))
    fig.update_layout(title=title, xaxis_title=x_axis_title, yaxis_title=y_axis_title)
    fig.show()

In [21]:
plot_bar5_graph(nj_fees, 'name', 'tuition_and_fees', "NJ Top Universities by Tuition Fees", "University", "Tuition Fees")

In [22]:
plot_bar5_graph(nj_instate, 'name', 'in_state', "NJ Top Universities by State Fees", "University", "State Fees")

In [23]:
plot_bar5_graph(nj_grad, 'name', 'undergrad_enrollment', "NJ Top Universities by Enrollment", "University", "Enrollment")

In [24]:
plot_bar5_graph(ma_fees, 'name', 'tuition_and_fees', "MA Top Universities by Tuition Fees", "University", "Tuition Fees")

In [25]:
plot_bar5_graph(ma_instate, 'name', 'in_state', "MA Top Universities by State Fees", "University", "State Fees")

In [26]:
plot_bar5_graph(ma_grad, 'name', 'undergrad_enrollment', "MA Top Universities by Enrollment", "University", "Enrollment")

In [27]:
plot_bar5_graph(il_fees, 'name', 'tuition_and_fees', "IL Top Universities by Tuition Fees", "University", "Tuition Fees")

In [28]:
plot_bar5_graph(il_instate, 'name', 'in_state', "IL Top Universities by State Fees", "University", "State Fees")

In [29]:
plot_bar5_graph(il_grad, 'name', 'undergrad_enrollment', "IL Top Universities by Enrollment", "University", "Enrollment")

In [30]:
plot_bar5_graph(ct_fees, 'name', 'tuition_and_fees', "CT Top Universities by Tuition Fees", "University", "Tuition Fees")

In [31]:
plot_bar5_graph(ct_instate, 'name', 'in_state', "CT Top Universities by State Fees", "University", "State Fees")

In [32]:
plot_bar5_graph(ct_grad, 'name', 'undergrad_enrollment', "CT Top Universities by Enrollment", "University", "Enrollment")

In [33]:
plot_bar5_graph(ny_fees, 'name', 'tuition_and_fees', "NY Top Universities by Tuition Fees", "University", "Tuition Fees")

In [34]:
plot_bar5_graph(ny_instate, 'name', 'in_state', "NY Top Universities by State Fees", "University", "State Fees")

In [35]:
plot_bar5_graph(ny_grad, 'name', 'undergrad_enrollment', "NY Top Universities by Enrollment", "University", "Enrollment")