In [1]:
import jupyterImport
jupyterImport.enableJupyterImports()
import relevance
from calc_percentiles import offeringsWithPercentiles, reports
from Utilities import find, pluck, copyKeys, allWith, analyze
from collections import defaultdict
from operator import itemgetter

In [30]:
def genProfessors(offerings):
  professors = {}
  for offering, profs in allWith('profs', offerings):
    for matchName, displayName in pluck('matchName', 'displayName').all(profs):
      if matchName not in professors:
        professors[matchName] = {
          'displayName': displayName, 'matchName': matchName, 'taught': {}, 
          'teaching': { 
            offering['objectID']: copyKeys(offering, ['group', 'number', 'title', 'termYear', 'objectID'])
          }
        }
  return professors

def linkPastReportsToProfessors(professors, reports):
  for report, profs in allWith('profs', reports):
    for qProf in [qProf for qProf in profs if qProf['matchName'] in professors]:
      qDisp, qMatch = pluck('displayName', 'matchName').getTuple(qProf)
      disp, match = pluck('displayName', 'matchName').getTuple(professors[qMatch])
      pastOffering = copyKeys(report, [
        'group', 
        'number', 
        'title', 
        'term', 
        'year', 
        'size',
        ('overall', 'responses/overall/score'), 
        ('workload', 'responses/workload/score'),
        'reportId'
      ])
      pastOffering.update(copyKeys(qProf, [
        ('instructor', 'responses/instructor/score'), 
        ('instructorPercentile', ('responses/instructor/percentiles/size'))
      ]))
      professors[qMatch]['taught'][report['reportId']] = pastOffering
      # Q display names w/ same # of words are usually better eg. N. Mankiw -> Gregory Mankiw
      if len(disp.split(' ')) == len(qDisp.split(' ')) and disp != qDisp:
  #       print 'Overriding:', disp, '->', qDisp
        professors[qMatch]['displayName'] = qDisp

def lastTimeTaughtReport(matchName, reports):
  for report, reportProfs in allWith('profs', reports):
    for reportProf in reportProfs:
      if reportProf['matchName'] == matchName and find('responses/instructor/score', obj=reportProf):
        addToProf = {
          'relevantBecause': 'taughtPreviously',
          'matched': copyKeys(report, ['group', 'number', 'title', 'term', 'year', 'reportId']) 
        }
        addToProf.update(copyKeys(reportProf, [('score', 'responses/instructor/score'), ('percentile', 'responses/instructor/percentiles/size')]))
        return addToProf

def profRelevance(prof, ofReport):
  if 'profs' in ofReport:
    for repProf, matchName in allWith('matchName', ofReport['profs']):
      if prof['matchName'] == matchName:
        return 999.
  return 0.
      
def calcProfRelevance(prof, offering, toReport, reps, similarity):
  toNumberInt = offering['numberInt']
  try:
    toScore = float(toReport['responses']['workload']['score'])
  except:
    toScore = None
  for ofReport in reps:
    relevanceScore = 0
    if toReport:
      relevanceScore += relevance.size(offering, toReport, ofReport)
      relevanceScore += relevance.workload(offering, toReport, toScore, ofReport)
      relevanceScore += relevance.profs(prof, ofReport, toReport)
    relevanceScore += relevance.number(offering, toNumberInt, ofReport)
    relevanceScore += relevance.group(offering, ofReport)
    similarity[ofReport['reportId']] += relevanceScore    

def mostRecentReport(offering):
  if 'topReport' in offering:
    return offering['topReport']
  elif 'reports' in offering and len(offering['reports']) > 0:
    return offering['reports'][0]
    
def addMostRelevantScoreAndPercentileToOfferingProfs(offerings, professors):
  for offering, profs in allWith('profs', offerings):
    for offeringProf, matchName in allWith('matchName', profs):
      professor = professors[matchName]
      lastTimeTaught = lastTimeTaughtReport(matchName, offering['reports']) if 'reports' in offering else None
      if lastTimeTaught:
        offeringProf.update(lastTimeTaught)
      elif 'taught' in professor and len(professor['taught'].keys()) > 0:
        similarity = defaultdict(lambda: 0.)
        mostRecent = mostRecentReport(offering)
        calcProfRelevance(offeringProf, offering, mostRecent, professor['taught'].values(), similarity)
        mostRelevantId, _ = sorted(similarity.items(), key=itemgetter(1), reverse=True)[0]
        mostRelevant = [x for x in professor['taught'].values() if x['reportId'] == mostRelevantId][0]
        offeringProf.update({'relevantBecause': 'mostSimilarTaughtCourse', 'matched': copyKeys(mostRelevant, ['group', 'number', 'title', 'term', 'year', 'reportId'])})
        offeringProf.update(copyKeys(mostRelevant, [('score', 'instructor'), ('percentile', 'instructorPercentile')]))

In [3]:
professors = genProfessors(offeringsWithPercentiles)
linkPastReportsToProfessors(professors, reports)
addMostRelevantScoreAndPercentileToOfferingProfs(offeringsWithPercentiles, professors)

In [4]:
offeringsWithProfessors = offeringsWithPercentiles