# Exploration

Reformatting Q Report data.

In [1]:
import pandas as pd
from tqdm import tqdm

In [None]:
pd.read_pickle('data/evaluations/evaluations.pkl').head()

## Reformatting data

In [2]:
renaming = {
    "Course Response Rate": "courseResponseRate",
    "Course General Questions": "courseGeneralQuestions",
    "General Instructor Questions": "generalInstructorQuestions",
    "On average, how many hours per week did you spend on coursework outside of class? Enter a whole number between 0 and 168.": "hoursPerWeek",
    "How strongly would you recommend this course to your peers?": "recommendation",
    "What was/were your reason(s) for enrolling in this course? (Please check all that apply)": "reasons",
    "What would you like to tell future students about this class? (Your response to this question may be published anonymously.)": "comments",
    "What would you like to tell future students about this class?": "commentsAlt",
}


dfs = []
for i in range(1, 16):
    path = f"data/evaluations/qreports-2023-04-14/batch-{i}.json"
    dfs.append(pd.read_json(path))

df = pd.concat(dfs).reset_index(drop=True).rename(columns=renaming)

In [3]:
assert not (df["comments"].notna() & df["commentsAlt"].notna()).any()
df["comments"] = df["comments"].fillna(df["commentsAlt"])
df = df.drop(columns=["commentsAlt"])

In [4]:
dict_columns = [
    "courseResponseRate",
    "courseGeneralQuestions",
    "generalInstructorQuestions",
    "hoursPerWeek",
    "recommendation",
    "reasons",
]

expanded_dfs = []
for column in tqdm(dict_columns):
    expanded_dfs.append(pd.json_normalize(df[column], sep="_").add_prefix(f"{column}_"))

df = pd.concat([df.drop(columns=dict_columns), *expanded_dfs], axis=1)

100%|██████████| 6/6 [00:06<00:00,  1.16s/it]


In [5]:
def get_tuple(column):
    parts = column.split("_")
    if len(parts) == 1:
        return ("meta", column, "")
    elif len(parts) == 2:
        return (parts[0], parts[1], "")
    else:
        return tuple(parts)


# create a multi-index for the columns
df.columns = pd.MultiIndex.from_tuples(
    [get_tuple(column) for column in df.columns],
)
df

Unnamed: 0_level_0,meta,meta,meta,meta,meta,meta,courseResponseRate,courseResponseRate,courseGeneralQuestions,courseGeneralQuestions,...,recommendation,reasons,reasons,reasons,reasons,reasons,reasons,reasons,reasons,reasons
Unnamed: 0_level_1,url,year,season,courseName,instructorName,comments,responded,invited,Evaluate the course overall.,Evaluate the course overall.,...,stdev,elective,concentration,secondary,gened,expos,language,premed,distribution,qrd
Unnamed: 0_level_2,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,count,votes,...,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
0,https://harvard.bluera.com/harvard/rpv-eng.asp...,2022,Spring,ARTS 27R,Davone Tines,[This is one of the best classes I've ever tak...,5,8,3.0,"[3, 0, 0, 0, 0]",...,0.00,3.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
1,https://harvard.bluera.com/harvard/rpv-eng.asp...,2022,Spring,ARTS 27R,Isaac Winokur,[This is one of the best classes I've ever tak...,5,8,3.0,"[3, 0, 0, 0, 0]",...,0.00,3.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
2,https://harvard.bluera.com/harvard/rpv-eng.asp...,2022,Spring,AFRAMER 10,Henry Gates,[This is a great intro course. The materials i...,48,62,40.0,"[18, 13, 7, 2, 0]",...,0.64,21.0,15.0,8.0,3.0,0.0,0.0,0.0,1.0,0.0
3,https://harvard.bluera.com/harvard/rpv-eng.asp...,2022,Spring,AFRAMER 10,Evelynn Hammonds,[This is a great intro course. The materials i...,48,62,40.0,"[18, 13, 7, 2, 0]",...,0.64,21.0,15.0,8.0,3.0,0.0,0.0,0.0,1.0,0.0
4,https://harvard.bluera.com/harvard/rpv-eng.asp...,2022,Spring,AFRAMER 109Y,Vivek Bald,[It's very interesting and lowkey until the la...,5,12,4.0,"[1, 3, 0, 0, 0]",...,0.96,3.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6914,https://harvard.bluera.com/harvard/rpv-eng.asp...,2019,Fall,WOMGEN 1210FT,Afsaneh Najmabadi,[This class was super interesting and a great ...,8,12,8.0,"[5, 2, 1, 0, 0]",...,0.46,4.0,3.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0
6915,https://harvard.bluera.com/harvard/rpv-eng.asp...,2019,Fall,WOMGEN 1217,Nicole Noll,[TAKE THIS CLASS!!! It is a mindblowing introd...,14,16,14.0,"[13, 1, 0, 0, 0]",...,0.00,8.0,4.0,4.0,0.0,0.0,0.0,0.0,1.0,0.0
6916,https://harvard.bluera.com/harvard/rpv-eng.asp...,2019,Fall,WOMGEN 1247,Michael Bronski,"[if you are really really interested in music,...",18,20,17.0,"[11, 3, 3, 0, 0]",...,0.72,10.0,6.0,6.0,0.0,0.0,0.0,0.0,0.0,0.0
6917,https://harvard.bluera.com/harvard/rpv-eng.asp...,2019,Fall,WOMGEN 1273,Robert Reid,"[there's A LOT of reading, but it's all worthw...",8,10,8.0,"[8, 0, 0, 0, 0]",...,0.00,5.0,3.0,2.0,0.0,0.0,0.0,0.0,1.0,0.0


In [6]:
df.to_pickle("data/evaluations/evaluations.pkl")