In [1]:
import numpy as np
import pandas as pd

In [2]:
file_path = 'Top 100 Private Colleges.2003.csv'

In [3]:
data = pd.read_csv(file_path)
data.head()

Unnamed: 0,Overall Rank,School,State,Undergrad. Enrollment,Admission Rate,*SAT or ACT,Student/faculty Ratio,4-year Grad. Rate,6-year Grad. Rate,Quality Rank,Total Costs,Cost After Need-based Aid,Need Met,Aid From Grants,Cost After Non-Need-Based Aid,Non-Need-Based Aid+,Average Debt,Cost Rank
0,1.0,California Institute of Technology,CA,939.0,21%,99/100%,3.0,71%,85%,10.0,"$32,682","$10,981",100%,93%,"$18,553",15%,"$10,244",4.0
1,2.0,Rice University,TX,2787.0,24%,89/92%,5.0,68%,89%,19.0,"$28,350","$14,779",100%,88%,"$22,418",34%,"$12,705",5.0
2,3.0,Williams College,MA,1985.0,23%,93/93%,8.0,89%,94%,1.0,"$36,550","$14,737",100%,89%,"$33,251",,"$12,316",26.0
3,4.0,Swarthmore College,PA,1479.0,24%,94/98%,8.0,86%,92%,4.0,"$38,676","$17,386",100%,85%,"$11,404",2%,"$12,759",21.0
4,5.0,Amherst College,MA,1618.0,18%,94/92%,9.0,84%,94%,5.0,"$38,492","$14,453",100%,92%,"$33,411",,"$11,544",38.0


## Q1. List the top 10 quality ranked colleges

In [4]:
quality_ranks = data['Quality Rank']
quality_ranks.dtypes

dtype('float64')

In [5]:
quality_ranks = quality_ranks.sort_values()
sorted_idxs = quality_ranks.keys()[:10]
colleges = data['School']
top_10_colleges = colleges[sorted_idxs]

In [6]:
print('Rank\t College')
for rank, college in enumerate(top_10_colleges):
    print(rank+1,"\t", college)

Rank	 College
1 	 Williams College
2 	 Massachusetts Institute of Technology
3 	 Yale University
4 	 Swarthmore College
5 	 Amherst College
6 	 Stanford University
7 	 Princeton University
8 	 Harvard University
9 	 University of Pennsylvania
10 	 California Institute of Technology


## Q2. List all the colleges in California

In [7]:
colleges_callifornia = data['School'].loc[data['State'] == "CA"]

In [8]:
for colleges in colleges_callifornia:
    print(colleges)

California Institute of Technology
Stanford University
Pomona College
Claremont McKenna College
Harvey Mudd College
University of Southern California
Scripps College


## Q3. Which college has lowest average debt?

In [9]:
average_debt = data['Average Debt'].dropna()
average_debt.dtypes

dtype('O')

In [10]:
# Removing $ and , signs
average_debt = average_debt.str.replace(',', '')
average_debt = average_debt.str.replace('$', '')
average_debt = average_debt.astype(int)

In [11]:
min_debt = min(average_debt)
idxs = []
for idx, val in enumerate(average_debt):
    if(val == min_debt):
        idxs.append(idx)
print(min_debt)

0


In [12]:
schools_min_debt = data['School'].loc[idxs]

In [13]:
for schools in schools_min_debt:
    print(schools)

Washington University
Dartmouth College
Macalester College
Brandeis University
Bryn Mawr College
Trinity University
University of Rochester
Babson College
The George Washington University


## Q4. List bottom 10 cost rank colleges

In [14]:
cost_ranks = data['Cost Rank'].dropna()
cost_ranks.dtypes

dtype('float64')

In [15]:
cost_ranks = cost_ranks.sort_values(ascending=False)

In [16]:
sorted_idxs = cost_ranks.keys()[:10]

In [17]:
colleges = data[['School', 'Cost Rank']]
bottom_10_colleges = colleges.loc[sorted_idxs]

In [18]:
print(bottom_10_colleges)

                              School  Cost Rank
90               New York University      100.0
41               Wesleyan University       99.0
36                Middlebury College       98.0
32             Georgetown University       97.0
99  The George Washington University       96.0
40                Cornell University       95.0
97  Rensselaer Polytechnic Institute       94.0
22        University of Pennsylvania       93.0
26                 Dartmouth College       92.0
33                  Brown University       91.0


## Q5. Among colleges having more than 2000 undergraduates, which has highest faculty/student ratio.

In [19]:
colleges = data[['School' ,'Student/faculty Ratio']]\
  .loc[data["Undergrad. Enrollment"] > 2000]
colleges.dtypes

School                    object
Student/faculty Ratio    float64
dtype: object

In [20]:
unique_val = colleges['Student/faculty Ratio'].unique()
print(unique_val)

[ 5.  7.  8.  6. 11.  9. 12. 10. 13. 18. 17. 14.]


In [21]:
max_ratio = max(unique_val)
college_having_max_ratio = colleges['School']\
  .loc[colleges['Student/faculty Ratio'] == max_ratio]

In [22]:
for college in college_having_max_ratio:
    print(college)

Brigham Young University


## Q6. Which college has aid in grant more than 80% and costliest in total cost?

In [23]:
colleges = data[['School' ,'Total Costs', "Aid From Grants"]]\
  .loc[data["Aid From Grants"] > "80%"]

In [24]:
colleges['Total Costs'] = colleges['Total Costs'].str.replace(',', '')
colleges['Total Costs'] = colleges['Total Costs'].str.replace('$', '')
colleges['Total Costs'] = colleges['Total Costs'].astype(int)

In [25]:
max_cost = max(colleges['Total Costs'])
print(max_cost)

40248


In [26]:
required_colleges = colleges\
  .loc[colleges['Total Costs'] == max_cost]

In [27]:
print(required_colleges)

              School  Total Costs Aid From Grants
33  Brown University        40248             83%


## Q7. Among top 50 quality rank colleges, which has lowest total cost

In [28]:
colleges = data[['School' ,'Total Costs', "Quality Rank"]]\
  .loc[data["Quality Rank"] <= 50]

In [29]:
# Removing $ and , signs
colleges['Total Costs'] = colleges['Total Costs'].str.replace(',', '')
colleges['Total Costs'] = colleges['Total Costs'].str.replace('$', '')
colleges['Total Costs'] = colleges['Total Costs'].astype(int)

In [30]:
min_cost = min(colleges['Total Costs'])

In [31]:
required_colleges = colleges\
  .loc[colleges['Total Costs'] == min_cost]

In [32]:
print(required_colleges)

           School  Total Costs  Quality Rank
5  Webb Institute         8079          39.0
