In [265]:
import re
import numpy as np
import pandas as pd
from pprint import pprint
import requests
from bs4 import BeautifulSoup

In [266]:
grades = pd.read_csv('cs-course-grades.csv', index_col=False)
old_primary_instructors = list(set(grades['primaryInstructor'].to_list()))

In [267]:
def display_name_normally(name):
    parts = name.split(", ")
    normal_order = parts[1:] + [parts[0]]
    return " ".join(normal_order)

def remove_middle_name(name):
    parts = [r for r in re.split(" |\.", name) if r and len(r) > 0]
    if len(parts) > 2:
        return " ".join([parts[0], parts[2]])
    else:
        return name

primary_instructors = [remove_middle_name(display_name_normally(n)) for n in old_primary_instructors]
# pprint(primary_instructors)

raw_to_refined_names = {}
for i in range(len(primary_instructors)):
    raw_to_refined_names[old_primary_instructors[i]] = primary_instructors[i]
    
raw_to_refined_names['Fagen, Wade A'] = 'Wade Fagen-Ulmschneider'
raw_to_refined_names['Fagen-Ulmschnei, Wade A'] = 'Wade Fagen-Ulmschneider'
raw_to_refined_names['Hasegawa-Johnso, Mark A'] = 'Mark Hasegawa-Johnson'
# pprint(raw_to_refined_names)

In [268]:
grades = grades.replace({"primaryInstructor": raw_to_refined_names })
grades.to_csv('cs-course-grades.csv', index=False)

In [269]:
grades = pd.read_csv('cs-course-grades.csv', index_col=False)
primary_instructors = list(set(grades['primaryInstructor'].to_list()))

In [270]:
research_urls = [
    'http://cs.illinois.edu/research/architecture-compilers-and-parallel-computing', 
    'http://cs.illinois.edu/research/artificial-intelligence',
    'http://cs.illinois.edu/research/bioinformatics-and-computational-biology',
    'http://cs.illinois.edu/research/computers-and-education',
    'http://cs.illinois.edu/research/data-and-information-systems',
    'http://cs.illinois.edu/research/interactive-computing',
    'http://cs.illinois.edu/research/programming-languages-formal-methods-and-software-engineering',
    'http://cs.illinois.edu/research/scientific-computing',
    'http://cs.illinois.edu/research/security-and-privacy',
    'http://cs.illinois.edu/research/systems-and-networking',
    'http://cs.illinois.edu/research/theory-and-algorithms',
]

prof_research_dict = {}
for url in research_urls:
    r = requests.get(url)
    page = BeautifulSoup(r.content, 'html.parser')
    table = page.find('table')
    table_entries = table.find_all('tr')
    for te in table_entries:
        try:
            prof_td, research_td = te.find_all('td')
            prof = remove_middle_name(prof_td.find('a').text)
            research = research_td.text
            
            if prof in prof_research_dict:
                prof_research_dict[prof] += research.split(", ")
            else:
                prof_research_dict[prof] = research.split(", ")
        except:
            pass
        
for prof in prof_research_dict:
    prof_research_dict[prof] = ", ".join(prof_research_dict[prof]).strip()

In [271]:
instructors = pd.DataFrame(primary_instructors, columns=['instructorName'])
instructors.index += 1 
instructors['researchInterests'] = instructors['instructorName'].map(prof_research_dict)
instructors.to_csv('cs-course-instructors.csv', index=True, index_label='instructorId')

In [272]:
instructors = pd.read_csv('cs-course-instructors.csv', index_col=False)

name_to_no_dict = dict(zip(instructors.instructorName, instructors.instructorId))
grades = grades.replace({"primaryInstructor": name_to_no_dict })
grades.to_csv('cs-course-grades.csv', index=False)

In [273]:
instructors.to_csv('cs-course-instructors.csv', index=False)
instructors[instructors['instructorName'].str.contains("David")]


Unnamed: 0,instructorId,instructorName,researchInterests
0,1,David Varodayan,
55,56,David Nicol,"Modeling and Simulation, Security, Privacy, an..."
67,68,David Forsyth,"Computer Vision, Object Recognition, Scene Und..."
151,152,David Padua,"Compiler Techniques for Parallel Computing, Co..."


In [274]:
# [grades['primaryInstructor'] == 4]
grades.to_csv('cs-course-grades.csv', index=False)
grades

Unnamed: 0,courseNo,courseName,year,term,primaryInstructor,aPlus,a,aMinus,bPlus,b,bMinus,cPlus,c,cMinus,dPlus,d,dMinus,f
0,100,Freshman Orientation,2010,Fall,151,0,154,0,0,30,0,0,22,0,0,9,0,9
1,100,Freshman Orientation,2011,Fall,151,0,182,0,0,11,0,0,12,0,0,5,0,7
2,100,Freshman Orientation,2012,Fall,151,0,188,0,0,21,0,0,6,0,0,1,0,9
3,100,Freshman Orientation,2013,Fall,151,0,222,0,5,16,0,5,4,0,2,1,0,9
4,100,Freshman Orientation,2014,Fall,151,0,232,11,3,11,3,2,6,1,0,2,1,9
5,100,Freshman Orientation,2015,Fall,151,0,234,0,9,11,4,0,10,0,0,2,2,4
6,100,Freshman Orientation,2016,Fall,151,0,279,0,5,11,0,1,6,0,0,3,0,6
7,100,Freshman Orientation,2017,Fall,151,0,258,5,12,12,4,3,3,2,0,0,1,4
8,100,Freshman Orientation,2018,Fall,162,0,290,10,9,4,2,1,2,2,0,1,0,1
9,101,Intro Computing: Engrg & Sci,2010,Fall,97,49,74,68,67,56,46,39,25,16,9,6,8,2
