 <b>Opening File</b>

In [1]:
import numpy as np
import pandas as pd

In [2]:
college = pd.read_csv("Top 100 Private Colleges.2003.csv")
print(college.loc[0])

Overall Rank                                                      1
School                           California Institute of Technology
State                                                            CA
Undergrad. Enrollment                                           939
Admission Rate                                                  21%
*SAT or ACT                                                 99/100%
Student/faculty Ratio                                             3
4-year Grad. Rate                                               71%
6-year Grad. Rate                                               85%
Quality Rank                                                     10
Total Costs                                                $32,682 
Cost After Need-based Aid                                  $10,981 
Need Met                                                       100%
Aid From Grants                                                 93%
Cost After Non-Need-Based Aid                   

In [3]:
college = college.drop(college.index[college['Overall Rank'].isin([np.nan])])

In [4]:
college.columns

Index(['Overall Rank', 'School', 'State', 'Undergrad. Enrollment',
       'Admission Rate', '*SAT or ACT', 'Student/faculty Ratio',
       '4-year Grad. Rate', '6-year Grad. Rate', 'Quality Rank', 'Total Costs',
       'Cost After Need-based Aid', 'Need Met', 'Aid From Grants',
       'Cost After Non-Need-Based Aid', 'Non-Need-Based Aid+', 'Average Debt',
       'Cost Rank'],
      dtype='object')

<b> List the top 10 quality ranked colleges</b>

In [6]:
qr_data = college.sort_values(by='Quality Rank')

In [7]:
print(qr_data.head(10).loc[:,['Quality Rank','Overall Rank','School', 'State']])

    Quality Rank  Overall Rank                                 School State
2            1.0           3.0                       Williams College    MA
11           2.0          12.0  Massachusetts Institute of Technology    MA
6            3.0           7.0                        Yale University    CT
3            4.0           4.0                     Swarthmore College    PA
4            5.0           5.0                        Amherst College    MA
9            6.0          10.0                    Stanford University    CA
10           7.0          11.0                   Princeton University    NJ
8            8.0           9.0                     Harvard University    MA
22           9.0          23.0             University of Pennsylvania    PA
0           10.0           1.0     California Institute of Technology    CA


<b> List all the colleges in California </b>

In [8]:
print(college.loc[college['State'].isin(['CA']), 
                  ['Overall Rank','School','State','Total Costs']])

    Overall Rank                              School State Total Costs
0            1.0  California Institute of Technology    CA    $32,682 
9           10.0                 Stanford University    CA    $38,875 
12          13.0                      Pomona College    CA    $38,130 
27          28.0           Claremont McKenna College    CA    $37,730 
51          52.0                 Harvey Mudd College    CA    $38,880 
70          70.0   University of Southern California    CA    $37,968 
76          76.0                     Scripps College    CA    $36,500 


<b> Which college has lowest average debt? </b>

In [9]:
#function for converting Average Debt data from string to integer
def removesymbolcurrency(x):
    return float(x.split('$')[1].replace(",", ""))

In [10]:
#Finding minimum value of average debt 
#Then displaying all colleges with minimum average debt

avg_debt_data = college.copy()
avg_debt_data['Average Debt'] = avg_debt_data['Average Debt'].apply(removesymbolcurrency)
avg_debt_data = avg_debt_data.sort_values(by='Average Debt')

leastval = avg_debt_data.head(1).loc[:,['Average Debt']]
leastval = leastval.values
print(college.loc[avg_debt_data.index[avg_debt_data['Average Debt'].isin(leastval[0])], 
                  ['Overall Rank','School','State','Average Debt']])


    Overall Rank                            School State Average Debt
49          50.0               Brandeis University    MA          $0 
95          96.0                    Babson College    MA          $0 
89          90.0           University of Rochester    NY          $0 
72          72.0                Trinity University    TX          $0 
53          54.0                 Bryn Mawr College    PA          $0 
47          48.0                Macalester College    MN          $0 
26          27.0                 Dartmouth College    NH          $0 
25          26.0             Washington University    MO          $0 
99         100.0  The George Washington University    DC          $0 


<b>List bottom 10 cost rank colleges</b>

In [11]:
cost_rank = college.sort_values(by='Cost Rank')
print(cost_rank.tail(10).loc[:,['Cost Rank','Overall Rank','School', 'State']])

    Cost Rank  Overall Rank                            School State
33       91.0          34.0                  Brown University    RI
26       92.0          27.0                 Dartmouth College    NH
22       93.0          23.0        University of Pennsylvania    PA
97       94.0          98.0  Rensselaer Polytechnic Institute    NY
40       95.0          41.0                Cornell University    NY
99       96.0         100.0  The George Washington University    DC
32       97.0          33.0             Georgetown University    DC
36       98.0          37.0                Middlebury College    VT
41       99.0          42.0               Wesleyan University    CT
90      100.0          91.0               New York University    NY


 <b> Among colleges having more than 2000 undergraduates, which has highest faculty/student ratio.</b>

In [12]:
#Selecting colleges with more than 2000 enrollents

more_undergraduates = college[college['Undergrad. Enrollment'] > 2000]

#Selecting college with highest faculty student ratio

more_undergraduates = more_undergraduates.sort_values(by='Student/faculty Ratio')


In [13]:
#As highest faculty/Student ratio is equal to lowest student/faculty ratio

print('Highest faculty/student ratio')
print(more_undergraduates.head(1).loc[:,['School','Undergrad. Enrollment',
                                         'Student/faculty Ratio']])

print('\n\nLowest faculty/student ratio')
print(more_undergraduates.tail(1).loc[:,['School','Undergrad. Enrollment',
                                         'Student/faculty Ratio']])

Highest faculty/student ratio
            School  Undergrad. Enrollment  Student/faculty Ratio
1  Rice University                 2787.0                    5.0


Lowest faculty/student ratio
                      School  Undergrad. Enrollment  Student/faculty Ratio
60  Brigham Young University                29379.0                   18.0


<b> Which college has aid in grant more than 80% and costliest in total cost?</b>

In [14]:
#function for converting percentage data from string to integer
def removesymbolpercent(x):
    return float(x.split('%')[0])

In [15]:
#finding colleges with more than 80% grant
#Then selecting costliest by total cost

aid_data = college.copy()
aid_data['Aid From Grants'] = aid_data['Aid From Grants'].apply(removesymbolpercent)
grant_data = college.loc[aid_data['Aid From Grants'] > 80]

grant_data['Total Costs'] = grant_data['Total Costs'].apply(removesymbolcurrency)
grant_data = grant_data.sort_values(by='Total Costs')
Maxval = grant_data.tail(1).loc[:,['Total Costs']]
Maxval = Maxval.values
print(college.loc[ grant_data.index[grant_data['Total Costs'].isin(Maxval[0])],
                  ['Overall Rank','School','State','Total Costs','Aid From Grants']])

    Overall Rank            School State Total Costs Aid From Grants
33          34.0  Brown University    RI    $40,248              83%


<b> Among top 50 quality rank colleges, which has lowest total cost</b>

In [16]:
#Returning top 50 colleges by quality rank

topqr = qr_data.head(50)

topqr['Total Costs'] = topqr['Total Costs'].apply(removesymbolcurrency)
top_data = topqr.sort_values(by='Total Costs')
Minval = top_data.head(1).loc[:,['Total Costs']]
Minval = Minval.values
tdata = top_data.loc[ top_data['Total Costs'].isin(Minval[0]), 
                     ['Overall Rank','Quality Rank','School','State','Total Costs']]

print(tdata)

   Overall Rank  Quality Rank          School State  Total Costs
5           6.0          39.0  Webb Institute    NY       8079.0
