In [2]:
import csv
import json
from collections import defaultdict

In [3]:
filename = 'Rate My Professors Reviews.csv'
data = open(filename, 'r')
csvreader = csv.reader(data)
next(csvreader)

['Professor_ID',
 'Professor_Name',
 'University',
 'Department',
 'Quality',
 'Difficulty',
 'Class_Name',
 'Comment',
 'Thumbs_Up',
 'Thumbs_Down',
 'Review_Date']

In [4]:
professors = defaultdict(lambda: {"name": "", "id": 0, "department": "", "overallQuality": 0, "difficulty": 0, "reviews": []})

In [5]:
with open(filename, 'r') as csvfile:
    reader = csv.DictReader(csvfile)
    for row in reader:
        professor_id = int(row['Professor_ID'])
        professor_name = row['Professor_Name']
        department = row['Department']
        quality = float(row['Quality'])
        difficulty = int(row['Difficulty'])
        class_name = row['Class_Name']
        comment = row['Comment']
        
        if professors[professor_id]['id'] == 0:
            professors[professor_id]['name'] = professor_name
            professors[professor_id]['id'] = professor_id
            professors[professor_id]['department'] = department
        
        professors[professor_id]['reviews'].append({
            "quality": quality,
            "difficulty": difficulty,
            "class": class_name,
            "review": comment
        })

In [6]:
for professor_id, data in professors.items():
    total_reviews = len(data['reviews'])
    if total_reviews > 0:
        overall_quality = sum([review['quality'] for review in data['reviews']]) / total_reviews
        overall_difficulty = sum([review['difficulty'] for review in data['reviews']]) / total_reviews
        data['overallQuality'] = round(overall_quality)
        data['difficulty'] = round(overall_difficulty)




In [7]:
output = {
    "professors": list(professors.values())
}

In [14]:
import re

def standardize_class_names(professors):
    for professor in professors:
        class_map = {}

        for review in professor['reviews']:
            class_name = review['class']
            match = re.search(r'(\D+)(\d+)([A-Z]?)', class_name)
            if match:
                prefix, number, suffix = match.groups()
                full_class_name = f"{prefix}{number}{suffix}"
                class_map[number] = full_class_name
                if suffix:
                    class_map[number + suffix] = full_class_name  
                    
        for review in professor['reviews']:
            class_name = review['class']
            if class_name.isdigit() or (class_name.isalnum() and class_name[-1].isalpha()):
                review['class'] = class_map.get(class_name, class_map.get(class_name[:-1], class_name))
            elif class_name in class_map:
                review['class'] = class_map[class_name]
    
    return professors

In [15]:
standardized_data = standardize_class_names(output['professors'])

In [16]:

with open('professors.json', 'w') as jsonfile:
    json.dump(standardized_data, jsonfile, indent=4)