In [156]:
# Setup
import pandas as pd
import numpy as np

#Parameter Lists
#Set the variable as None to ignore it when searching
#Change the value here and run all the cells to get an output
LOCATION = None
ROOM = None
QUARTER_INDEX = None          # A number, refer to the file or dataframe
QUARTER = None                # e.g. "FA13"/"WI19"/"SP15"
COURSE_ID = "CSE"             # e.g. "COGS", "BILD"
COURSE_NUM = ["8B","8A","30","11"]  # Must be a list of string (double quoted or single quoted)! e.g. ["100", "20A", "120A"]
SECTION = None               # Must be a string! e.g "767325"
INSTRUCTOR_FIRST = None    # instructor's first name
INSTRUCTOR_LAST = None     # instructor's last name
LOWEST_SIZE = None            # Include only sections that have student number >= this variable
LOWEST_RESPONSE = None        # Include only Capes Questions that have number of response >= this variable
CAPES = [5,6,7,8,9,10,17,27,26]        # Must be a list of integer! Include only Capes question with an ID that is in the list. From 5 to 27, plus 51
FILENAME = "CSE_CAPES.csv"    # name of output file

df = pd.read_csv("CAPES.csv")
print(len(df))

In [157]:
# Read CSV
dtypes = {'LOCATION':str, 'ROOM':str, 'QUARTER_INDEX':int, 'QUATER':int, 'COURSE_ID':str, 'COURSE_NUM':str, 
          'SECTION':str, 'INSTRUCTOR_LAST':str, 'INSTRUCTOR_FIRST': str, 'CAPE_ID':int, 'QUESTION':str, 
          'SECTION_SIZE':int, 'RESPONSE_SIZE':int, "RATING":float}
df = pd.read_csv("CAPES.csv", usecols = ["LOCATION", "ROOM", "QUARTER_INDEX", "QUARTER", 
                                         "COURSE_ID", "COURSE_NUM", "SECTION", "INSTRUCTOR_LAST", 
                                         "INSTRUCTOR_FIRST", "CAPE_ID", "QUESTION", 
                                         "SECTION_SIZE", "RESPONSE_SIZE", "RATING"],
                 dtype = dtypes)

# clean out empty spaces in cells
df["INSTRUCTOR_FIRST"] = df["INSTRUCTOR_FIRST"].str.strip()
df["INSTRUCTOR_LAST"] = df["INSTRUCTOR_LAST"].str.strip()

def ClassExtractor(LOCATION, ROOM, QUARTER, COURSE_ID, COURSE_NUM, SECTION, INSTRUCTOR_LAST, 
                   INSTRUCTOR_FIRST, LOWEST_SIZE, LOWEST_RESPONSE, CAPES):
    result = df
    
    if LOCATION is not None:
        result = result[result["LOCATION"] == LOCATION]
    
    if ROOM is not None:
        reulst = result[result["ROOM"] == ROOM]
        
    if QUARTER is not None:
        result = result[result["QUATER"] == QUARTER]
    
    if COURSE_ID is not None:
        result = result[result["COURSE_ID"] == COURSE_ID]
        
    if COURSE_NUM is not None:
        result = result[result["COURSE_NUM"].isin(COURSE_NUM)]
        
    if SECTION is not None:
        result = result[result["SECTION"] == SECTION]
    
    if INSTRUCTOR_LAST is not None:
        result = result[result["INSTRUCTOR_LAST"] == INSTRUCTOR_LAST]
        
    if INSTRUCTOR_FIRST is not None:
        result = result[result["INSTRUCTOR_FIRST"] == INSTRUCTOR_FIRST]
        
    if LOWEST_SIZE is not None:
        result = result[result["SECTION_SIZE"] >= LOWEST_SIZE]
        
    if LOWEST_RESPONSE is not None:
        result = result[result["RESPONSE_SIZE"] >= LOWEST_RESPONSE]
        
    if isinstance(CAPES, list):
        result = result[result["CAPE_ID"].isin(CAPES)]
    
    
    return result

def AlltimeAvg(INSTRUCTOR_FIRST, INSTRUCTOR_LAST, CAPES):
    newdf = (df.loc[df["INSTRUCTOR_LAST"] == INSTRUCTOR_LAST]).loc[df["INSTRUCTOR_FIRST"] == INSTRUCTOR_FIRST]
    dic = {}

    for i in CAPES:
        temp = newdf.loc[newdf["CAPE_ID"] == i]
        dic[i] = temp["RATING"].mean()

    print("All Time Average Rating for", INSTRUCTOR_LAST + ",", INSTRUCTOR_FIRST, "in (CAPE_ID, AVG)")
    for key, value in dic.items():
        print(key, value)

        
aggregations = {
    'RATING':'mean',
    'SECTION_SIZE':'sum',
    'RESPONSE_SIZE':'sum'
}
def transpose(output):
    hall = output
    data = hall.groupby(['CAPE_ID','SECTION']).agg(aggregations).reset_index()

    know_rating = pd.Series(data[data.CAPE_ID == 5].RATING)
    prep_rating = pd.Series(data[data.CAPE_ID == 6].RATING)
    audible_rating = pd.Series(data[data.CAPE_ID == 7].RATING)
    expl_rating = pd.Series(data[data.CAPE_ID == 8].RATING)
    attent_rating = pd.Series(data[data.CAPE_ID == 9].RATING)
    note_rating = pd.Series(data[data.CAPE_ID == 10].RATING)
    stimu_rating = pd.Series(data[data.CAPE_ID == 17].RATING)
    rec_course_rating = pd.Series(data[data.CAPE_ID == 26].RATING)
    rec_professor_rating = pd.Series(data[data.CAPE_ID == 27].RATING)
    df = hall
    df = df.drop_duplicates(subset=['SECTION'], keep='first')

    df.loc[:,'know_rating'] = list(know_rating.values)
    df.loc[:,'prep_rating'] = prep_rating.values
    df.loc[:,'audible_rating'] = audible_rating.values
    df.loc[:,'expl_rating'] = expl_rating.values
    df.loc[:,'attent_rating'] = attent_rating.values
    df.loc[:,'note_rating'] = note_rating.values
    df.loc[:,'stimu_rating'] = stimu_rating.values
    df.loc[:,'rec_course_rating'] = rec_course_rating.values
    df.loc[:,'rec_professor_rating'] = rec_professor_rating.values
    df = df.drop(columns=['QUESTION','CAPE_ID'])

    
    return df

In [158]:
# Calling Function and Write Data
original = ClassExtractor(LOCATION, ROOM, QUARTER, COURSE_ID, COURSE_NUM, SECTION, INSTRUCTOR_LAST, 
                   INSTRUCTOR_FIRST, LOWEST_SIZE, LOWEST_RESPONSE, CAPES)
output = transpose(original)
# filename = "CAPE_" + (INSTRUCTOR_FIRST or "") + " " + (INSTRUCTOR_LAST or "") + ".csv"

# if not output.empty:
#     output.to_csv(FILENAME or ("CAPE_" + (INSTRUCTOR_FIRST or "") + " " + (INSTRUCTOR_LAST or "") + ".csv"))

# if None in (INSTRUCTOR_FIRST, INSTRUCTOR_LAST):
#     INSTRUCTOR_FIRST = output["INSTRUCTOR_FIRST"].iloc[0]
#     INSTRUCTOR_LAST = output["INSTRUCTOR_LAST"].iloc[0]

# if CAPES is None:
#     CAPES = list(range(5, 28))
#     CAPES.append(51)
    
# AlltimeAvg(INSTRUCTOR_FIRST, INSTRUCTOR_LAST, CAPES)
# An summary of overall capes rating of this professor will be display below after running this cell

In [159]:
output

Unnamed: 0,LOCATION,ROOM,QUARTER_INDEX,QUARTER,COURSE_ID,COURSE_NUM,SECTION,INSTRUCTOR_LAST,INSTRUCTOR_FIRST,SECTION_SIZE,...,RATING,know_rating,prep_rating,audible_rating,expl_rating,attent_rating,note_rating,stimu_rating,rec_course_rating,rec_professor_rating
7815,,,9,FA12,CSE,11,755925,Ord,Richard,190,...,4.814815,4.301471,4.207407,4.169118,4.007353,3.305970,3.185185,4.096296,0.970588,0.904412
8523,PCYNH,109,9,FA12,CSE,30,755935,Ord,Richard,169,...,4.855670,4.686957,4.706897,4.784483,4.637931,4.336207,3.695652,4.330435,0.939655,0.991379
8640,PCYNH,109,9,FA12,CSE,8A,755909,Gillespie,Gary N,178,...,4.301471,4.814815,4.785047,4.822430,4.588785,3.953271,4.443396,4.398148,0.963964,0.936937
8661,PCYNH,109,9,FA12,CSE,8A,755913,Alvarado,Christine J.,152,...,4.686957,4.855670,4.824742,4.845361,4.721649,4.391753,4.762887,4.656250,0.938144,0.979381
8682,PCYNH,109,9,FA12,CSE,8A,762476,Lee,Cynthia Bailey,149,...,4.413793,4.413793,4.465517,4.508621,4.301724,3.829060,3.234783,4.258621,0.974576,0.940678
42452,PETER,110,11,FA13,CSE,11,785772,Ord,Richard,179,...,4.704082,3.686275,3.245098,3.950980,3.245098,3.421569,2.774510,4.020000,0.867925,0.584906
42473,PCYNH,109,11,FA13,CSE,11,797084,Papadopoulos,Philip Micha,152,...,4.319149,3.940171,3.724138,3.948718,3.529915,3.179487,3.119658,4.034188,0.923077,0.735043
43229,PCYNH,109,11,FA13,CSE,30,785785,Ord,Richard,124,...,4.711111,3.829545,3.784091,3.584270,2.651685,2.606742,2.707865,4.089888,0.910112,0.393258
43250,PCYNH,109,11,FA13,CSE,30,785788,Ord,Richard,183,...,4.698113,3.877193,3.842105,3.912281,3.087719,3.000000,3.315789,4.035088,0.912281,0.491228
43333,PCYNH,109,11,FA13,CSE,8A,785759,Alvarado,Christine J.,94,...,4.830986,4.614286,4.600000,4.623188,4.492754,3.985507,4.304348,4.338235,0.900000,0.942857


69