## Minerva Grade Opportunities
### Nicolas A Gort Freitas

In [51]:
import requests
import numpy as np
import pandas as pd
from time import time

np.set_printoptions(suppress=True)
pd.set_option('display.max_rows', 150)
pd.set_option('display.float_format', lambda x: '%.4f' % x)

token = '****************************************'

#weight of the assignment
weight_assignment = 8

#perceived probability of getting a 4
p = 0.5

#do you want to check for one specific cornerstone
cornerstone_name = None

In [52]:
#find keys at the same level in nested dictionaries

def retrieve_parallel(key,key2,var): 
    if hasattr(var,'items'):
        for k, v in var.items():
            if k == key:
                try: 
                    yield var[key2]
                except: 
                    pass
                
            if isinstance(v, dict):
                for result in retrieve_parallel(key,key2, v):
                    if type(result)==int or type(result)==str:
                        yield result
            elif isinstance(v, list):
                for d in v:
                    for result in retrieve_parallel(key,key2, d):
                        yield result

In [56]:
lo = requests.get('https://seminar.minerva.kgi.edu/api/v1/hc-index-items?outcomeType=lo',
                headers={'Authorization':'Token '+token}).json()

hc = requests.get('https://seminar.minerva.kgi.edu/api/v1/hc-index-items',
                headers={'Authorization':'Token '+token}).json()

hctree = requests.get('https://seminar.minerva.kgi.edu/api/v1/hc-trees/current?tree',
                headers={'Authorization':'Token '+token}).json()

In [57]:
start = time()


hc_index = {}
hc_indices = list(retrieve_parallel('hashtag','id',hctree))
hc_names = list(retrieve_parallel('id','hashtag',hctree))

for i,j in zip(hc_indices,hc_names):
    hc_index[i]=j
    
print(time() - start)

0.0017311573028564453


In [58]:
def logrades(n):
    return requests.get('https://seminar.minerva.kgi.edu/api/v1/outcomeindex/performance?learning-outcome='+str(n),
                headers={'Authorization':'Token '+token}).json()
def hcgrades(n):
    return requests.get('https://seminar.minerva.kgi.edu/api/v1/outcomeindex/performance?hc-item='+str(n),
                headers={'Authorization':'Token '+token}).json()


In [59]:
def hc_numgrades(hcid):
    hcvar = hcgrades(hcid)
    weights = []
    for i,j in enumerate(hcvar):
        if j['score']==0.0:
            weights.append(0.0)
        elif j['assignment']==None:
            weights.append(1.0)
        elif 'weight' in j['assignment'].keys():
            weights.append(j['assignment']['weight'])
        else:
            weights.append(weights[-1])

    return sum(weights)

def hc_mean(hcid):
    for i in hc:
        if i['hc-item']==int(hcid):
            if i['mean']==0.0:
                return np.nan
            else:
                return i['mean']
            
def impact4(x,weight=weight_assignment):
    hcimpact = ((x.Mean*x.N+4*weight)/(x.N+weight))
    if x.N>0:
        return (x.Mean_course*x.HC_count - x.Mean + hcimpact)/(x.HC_count)-x.Mean_course
    if x.N==0:
        #print ('a')
        return (x.Mean_course*x.HC_count+4)/(x.HC_count + 1) - x.Mean_course
    
def impact3(x,weight=weight_assignment):
    hcimpact = ((x.Mean*x.N+3*weight)/(x.N+weight))
    if x.N>0:
        return (x.Mean_course*x.HC_count - x.Mean + hcimpact)/(x.HC_count)-x.Mean_course
    if x.N==0:
        #print ('a')
        return (x.Mean_course*x.HC_count+3)/(x.HC_count + 1) - x.Mean_course

def expected_impact(x,weight=weight_assignment,p=0.5):
    return p*impact4(x)+(1-p)*impact3(x)

In [60]:
start = time()
means = np.array([hc_mean(x) for x in hc_index.keys()])
ngrades = np.array([hc_numgrades(x) for x in hc_index.keys()])
print(time() - start)

69.73791694641113


In [61]:
cornerstones = list(retrieve_parallel('hashtag','cornerstone-code',hctree))

In [62]:
def hc_ranking(cornerstone_name = cornerstone_name):
    
    hc_data = pd.DataFrame({
    'HC' : list(hc_index.values()), 
        'Mean' : means,
        'N' : ngrades,
        'Course' : cornerstones
    })

    scores = hc_data.loc[:,['Mean','Course']].groupby('Course').agg({np.mean,lambda x: len(x[x > 0])})

    if scores.iloc[0,0]<=5.0:
        scores.columns = ['Mean','HC_count']
    else:
        scores.columns = ['HC_count','Mean']
    scores

    joint = hc_data.join(scores,on='Course',rsuffix='_course')

    #example: What HCs are most convenient to tag to improve my score in FA?

    hc_data['Impact_4']=joint.apply(impact4,axis=1)
    hc_data['Impact_3']=joint.apply(impact3,axis=1)
    hc_data['Expected_impact']=joint.apply(expected_impact,axis=1)

    if cornerstone_name:
        return hc_data.sort_values('Expected_impact',ascending=False)[hc_data.Course==cornerstone_name]
    else:
        return hc_data.sort_values('Expected_impact',ascending=False)

In [73]:
hc_ranking()

Unnamed: 0,HC,Mean,N,Course,Impact_4,Impact_3,Expected_impact
53,#connotation,2.3182,11.0,MC,0.0373,0.0151,0.0262
36,#interviewsurvey,3.0,7.0,EA,0.0267,0.0,0.0133
74,#differences,2.9091,11.0,CS,0.0209,0.0017,0.0113
65,#negotiate,2.4194,31.0,CS,0.0147,0.0054,0.0101
71,#leadprinciples,3.1429,7.0,CS,0.0208,-0.0035,0.0087
66,#nudge,3.1429,7.0,CS,0.0208,-0.0035,0.0087
35,#casestudy,3.25,4.0,EA,0.025,-0.0083,0.0083
24,#decisiontrees,3.2,10.0,FA,0.0198,-0.0049,0.0074
78,#responsibility,2.3455,55.0,CS,0.0096,0.0038,0.0067
70,#confidence,3.1053,19.0,MC,0.014,-0.0016,0.0062
