In [50]:
import json
import pandas as pd

In [51]:
data = json.load(open('nyu-prof-data.json'))

# Extracting the teacher information
professors = data['data']['search']['teachers']['edges']

df = pd.DataFrame()

for professor in professors:
    node = professor['node']
    full_name = f"{node['firstName']} {node['lastName']}"
    rating = node['avgRating']
    department = node['department']
    difficulty = node['avgDifficulty']
    num_ratings = node['numRatings']
    
    df = pd.concat([df, pd.DataFrame({
        'Name': [full_name],
        'Average Rating': [rating],
        'Department': [department],
        'Difficulty Rating': [difficulty],
        'Number of Ratings': [num_ratings]
    })], ignore_index=True)
    

df

Unnamed: 0,Name,Average Rating,Department,Difficulty Rating,Number of Ratings
0,Phillip Kain,4.3,English,3.6,19
1,Richard Borowsky,2.6,Biology,3.5,12
2,Meretzky Mark,3.5,Computer Science,2.7,3
3,Susanna Horng,3.3,Writing,3.5,56
4,Robert Diyanni,4.7,Cultural Studies,2.3,39
...,...,...,...,...,...
4995,Niccolo Rossi,3.0,Italian,4.0,1
4996,Tamer Avcilar,2.0,Hospitality,3.0,1
4997,Tim Crouse,4.0,Theatre & Dance,4.0,1
4998,Ethiraj Gabriel Dattatreyan,5.0,Anthropology,1.8,3


In [52]:
# Keep only rows with unique (name, department) pairs
df = df.drop_duplicates(subset=['Name', 'Department'], keep='first')

df

Unnamed: 0,Name,Average Rating,Department,Difficulty Rating,Number of Ratings
0,Phillip Kain,4.3,English,3.6,19
1,Richard Borowsky,2.6,Biology,3.5,12
2,Meretzky Mark,3.5,Computer Science,2.7,3
3,Susanna Horng,3.3,Writing,3.5,56
4,Robert Diyanni,4.7,Cultural Studies,2.3,39
...,...,...,...,...,...
4995,Niccolo Rossi,3.0,Italian,4.0,1
4996,Tamer Avcilar,2.0,Hospitality,3.0,1
4997,Tim Crouse,4.0,Theatre & Dance,4.0,1
4998,Ethiraj Gabriel Dattatreyan,5.0,Anthropology,1.8,3


In [53]:
# Remove rows with Number of Ratings of 0
df = df[df['Number of Ratings'] != 0]

df

Unnamed: 0,Name,Average Rating,Department,Difficulty Rating,Number of Ratings
0,Phillip Kain,4.3,English,3.6,19
1,Richard Borowsky,2.6,Biology,3.5,12
2,Meretzky Mark,3.5,Computer Science,2.7,3
3,Susanna Horng,3.3,Writing,3.5,56
4,Robert Diyanni,4.7,Cultural Studies,2.3,39
...,...,...,...,...,...
4995,Niccolo Rossi,3.0,Italian,4.0,1
4996,Tamer Avcilar,2.0,Hospitality,3.0,1
4997,Tim Crouse,4.0,Theatre & Dance,4.0,1
4998,Ethiraj Gabriel Dattatreyan,5.0,Anthropology,1.8,3


In [58]:
# Get mean rating for each department
mean_ratings = df.groupby('Department')['Average Rating'].mean().reset_index()
mean_ratings = mean_ratings.sort_values(by='Average Rating', ascending=False)

mean_ratings

Unnamed: 0,Department,Average Rating
20,Arts amp Sciences,5.000000
124,Publishing & Printing,5.000000
79,Interactive Telecommunications,5.000000
64,Genetics,5.000000
1,Administration,5.000000
...,...,...
137,Sports Management,2.700000
27,Business Law,2.500000
120,Professional Programs,2.333333
140,Teaching Learning,2.000000


In [59]:
# Get mean difficulty rating for each department
mean_difficulty = df.groupby('Department')['Difficulty Rating'].mean().reset_index()
mean_difficulty = mean_difficulty.sort_values(by='Difficulty Rating', ascending=False)

mean_difficulty

Unnamed: 0,Department,Difficulty Rating
140,Teaching Learning,4.500000
9,Architecture,4.500000
59,Foundations of Am. Culture/Classics,4.300000
122,Public Administration,4.075000
22,Biochemistry,4.066667
...,...,...
46,English As A Second Language,2.000000
141,Teaching & Learning,1.937500
3,African Studies,1.833333
142,Teaching amp Learning,1.700000


In [60]:
# Get number of ratings for each department
num_ratings = df.groupby('Department')['Number of Ratings'].sum().reset_index()
num_ratings = num_ratings.sort_values(by='Number of Ratings', ascending=False)

num_ratings

Unnamed: 0,Department,Number of Ratings
150,Writing,4198
98,Mathematics,2399
41,Economics,1872
45,English,1682
121,Psychology,1663
...,...,...
26,Business Economics,1
1,Administration,1
124,Publishing & Printing,1
20,Arts amp Sciences,1


In [61]:
# Create a matrix of the mean ratings, mean difficulty ratings, and number of ratings for each department
department_stats = mean_ratings.merge(mean_difficulty, on='Department')
department_stats = department_stats.merge(num_ratings, on='Department')

department_stats

Unnamed: 0,Department,Average Rating,Difficulty Rating,Number of Ratings
0,Arts amp Sciences,5.000000,3.0,1
1,Publishing & Printing,5.000000,4.0,1
2,Interactive Telecommunications,5.000000,4.0,2
3,Genetics,5.000000,1.0,1
4,Administration,5.000000,3.0,1
...,...,...,...,...
146,Sports Management,2.700000,4.0,14
147,Business Law,2.500000,2.7,7
148,Professional Programs,2.333333,3.3,24
149,Teaching Learning,2.000000,4.5,6
