# my.harvard Mappers
Below are all of the mappings from the raw my.harvard scrape to the parsed offerings objects in the database. Each one is broken into its own cell to make testing easier. For testing, `genSamples`, `genSample`, and `Mapper`'s `uniques` method come in handy.

In [1]:
from jupyterImport import enableJupyterImports
enableJupyterImports()
from Utilities import find, Mapper, MultiReplace, genSamples, genSample, name
from load_raw_data import myHarvard
import sys, json

importing Jupyter notebook from Utilities.ipynb
importing Jupyter notebook from load_raw_data.ipynb


In [2]:
deptReplace = MultiReplace({
  '&': 'and', 
  'Biol$': 'Biology', 
  ' Sci ': ' Science ', 
  ' Chem ': ' Chemistry ',
  ' Amer ': ' American ',
  '^Biolog ': 'Biological ',
  ' Bio ': ' Biology ',
  'Med$': 'Medicine',
  'Pharm$': 'Pharmacology',
  'GenEd: ': '',
  'Hlth$': 'Health',
  'Aesthetic and Interpretive': 'Aesthetic and Interpretive Understanding',
  'Pol$': 'Policy',
  ' Regen ': ' Regenerative ',
  'Science of Physical Universe': 'Science of the Physical Universe',
  '^E Asian': 'East Asian',
  'Math Reasoning': 'Mathematical Reasoning',
  'E Europe and Cntrl Asia': 'Eastern Europe, and Central Asia',
  'E Europe, Central Asia': 'Eastern Europe, and Central Asia',
  'Biomedical Sci': 'Biomedical Sciences',
  '^Adv Stud': 'Advanced Studies',
  'and Civ': 'and Civilizations',
  ' Langs ': ' Languages ',
  'Lit$': 'Literature',
  'Arch, Landscape and Urban Plan': 'Architecture, Landscape, and Urban Planning',
  'Envi Science': 'Environmental Science'
})

In [3]:
def mapDepartments(inputs):
  depts, primary = inputs
  depts = depts if isinstance(depts, list) else [depts]
  depts = [deptReplace.sub(dept) for dept in depts if dept != primary]
  return {'departments': depts, 'primaryDepartment': deptReplace.sub(primary)}
departmentsMapper = Mapper(['IS_SCL_DESCR_IS_SCL_DESCRM', 'IS_SCL_DESCR_IS_SCL_DESCRD'], mapDepartments)

In [4]:
prepReplace = MultiReplace({
  'Prerequisite: ': '',
  '^\*': '',
  '\.$': ''
})
def mapRecommendedPrep(prep):
  return {'recommendedPrep': prepReplace.sub(prep)} if prep else {}
recommendedPrepMapper = Mapper('RecPrepDescr', mapRecommendedPrep)

In [5]:
consentRequiredMapper = Mapper('IS_SCL_DESCRSHORT_HU_CONSENT', Mapper.noChange('consentRequired'))

In [6]:
formatMap = {
  'Read Rsch': 'Reading and Research',
  'Doc Diss': 'Doctoral Dissertation',
  'ReadingCrs': 'Reading Course',
  'Fresh Sem': 'Freshman Seminar',
  'JrTutorial': 'Junior Tutorial',
  'SrTutorial': 'Senior Tutorial',
  'Rsrch Wksh': 'Research Workshop',
  'Rsrch Sem': 'Research Seminar',
  'Soph Tutor': 'Sophomore Tutorial',
  'Lab Rsrch': 'Lab Research',
  'Sem Wrkshp': 'Seminar Workshop',
  'House Sem': 'House Seminar'
}
def mapFormat(frmt):
  return {'format': formatMap[frmt] if frmt in formatMap else frmt} if frmt else {}
formatMapper = Mapper('SSR_COMPONENT_NAME', mapFormat)

In [7]:
notesMapper = Mapper('HU_COURSE_PREQ', Mapper.noChange('notes'))

In [8]:
gradingBasisMapper = Mapper('IS_SCL_DESCR100_HU_SCL_GRADE_BASIS', Mapper.noChange('gradingBasis'))

In [9]:
unitsMapper = Mapper('UnitsDisplay', Mapper.noChange('units'))

In [10]:
classNumberMapper = Mapper('CLASS_NBR', Mapper.noChange('classNumber'))

In [11]:
courseIdMapper = Mapper('CRSE_ID', Mapper.noChange('courseId'))

In [12]:
enrollmentCapMapper = Mapper('ENRL_CAP', Mapper.noChange('enrollmentCap'))

In [13]:
groupMapper = Mapper('Subject', Mapper.noChange('group'))

In [14]:
titleReplace = MultiReplace({'Expository Writing 20: ': '' })
def mapTitle(inputs):
  (title, expos) = inputs
  fullTitle = title + ': ' + expos if expos else title
  return {'title': titleReplace.sub(fullTitle)}
titleMapper = Mapper(['Title', 'DESCRFORMAL_COURSE_TOPIC'], mapTitle)

In [15]:
def mapGenEds(genEds):
  return {'genEds': genEds if isinstance(genEds, list) else [genEds]} if genEds else {}
genEdsMapper = Mapper('IS_SCL_DESCR100_HU_SCL_ATTR_GE', mapGenEds)

In [16]:
levelMapper = Mapper('IS_SCL_DESCR100_HU_SCL_ATTR_LEVL', Mapper.noChange('level'))

In [17]:
courseSiteMapper = Mapper('URL_URLNAME', Mapper.noChange('courseSite'))

In [18]:
def mapCrossReg(crossReg):
  return {'crossReg': crossReg if isinstance(crossReg, list) else [crossReg]} if crossReg else {}
crossRegMapper = Mapper('IS_SCL_DESCR100_HU_SCL_ATTR_XREG', mapCrossReg)

In [19]:
def mapTermYear(inputs):
  (normal, likely) = inputs
  if not likely:
    return {'term': normal.split(' ')[1], 'year': normal.split(' ')[0]}
  else:
    return {'likelyTerm': likely.split(' ')[1], 'likelyYear': likely.split(' ')[0]}
termYearMapper = Mapper(['IS_SCL_DESCR_IS_SCL_DESCRH', 'IS_SCL_MEETING_PAT_HU_LIKELY_OFFERED'], mapTermYear)

In [20]:
descReplace = MultiReplace({'<p>': '', '</p>': ''})
def mapDescription(desc):
  return {'description': descReplace.sub(desc)} if desc else {}
descriptionMapper = Mapper('IS_SCL_DESCR', mapDescription)

In [21]:
import re
def mapNumber(inputs):
  (number, group) = inputs
  matched = re.compile('\d\d*').findall(number)
  return {
  'number': number,
  'numberInt': int(matched[0]) if len(matched) > 0 else -1,
  'numberAlt': group + number
}
numberMapper = Mapper(['CATALOG_NBR', 'Subject'], mapNumber)

In [22]:
def mapProfs(profs):
  if not profs:
    return {}
  if not isinstance(profs, list):
    print 'not list -------'
    print json.dumps(profs, indent=2)
  def mapProf(prof):
    fullName = prof['Name']
    return {'displayName': name(fullName), 'matchName': name(fullName, match=True)}
  return {'profs': [mapProf(p) for p in profs]}
profsMapper = Mapper('DESCRLONG_DETAILS', mapProfs)

In [23]:
def mapObjectID(inputs):
  (classNumber, courseId) = inputs
  classNumber = '0' if not classNumber else classNumber
  return {'objectID': '_'.join([courseId, classNumber])}
objectIDMapper = Mapper(['CLASS_NBR', 'CRSE_ID'], mapObjectID)   

In [41]:
def mapTime(time):
  if time is None:
    return None
  hour = int(time.split(':')[0])
  minutes = int(time.split(':')[1].replace('am', '').replace('pm', '')) if ':' in time else None
  if minutes and minutes == 59:
    return '1' if hour == 12 else unicode(hour + 1)
  elif minutes and minutes == 29:
    return unicode(hour) + ':' + '30'
  elif minutes:
    return unicode(hour) + ':' + unicode(minutes)
  else:
    return unicode(hour)
    
daysReplace = MultiReplace({'Mo': 'M', 'We': 'W', 'Fr': 'F'})
def mapDays(days):
    return daysReplace.sub(days).split(' ')

def assignIf(dictionary):
  def assign(key, value):
    if value:
      dictionary[key] = value
  return assign

locReplace = MultiReplace({
  ' \(FAS\)': '',
  ' \(SEAS\)': '',
  ' \(HLS\)': '',
  'Bldg': 'Building', 
  'Ctr': 'Center',
  ' \(HDS\)': '',
  ' \(GSD\)': ''
})
def mapLocName(name):
  return locReplace.sub(name) if name else None

def mapLatLong(latLong):
  return latLong if latLong != '0' else None

def mapSession(inputs):
  (start, end, days, locName, locNumber, latitude, longitude) = inputs
  session = {}; seshSet = assignIf(session)
  seshSet('start', mapTime(start))
  seshSet('end', mapTime(end))
  seshSet('days', mapDays(days)) 
  location = {}; locSet = assignIf(location)
  locSet('name', mapLocName(locName))
  locSet('number', locNumber)
  locSet('longitude', mapLatLong(longitude))
  locSet('latitude', mapLatLong(latitude))
  if len(location.keys()) > 1:
    session['location'] = location
  return session

sessionKeys = [
  'IS_SCL_TIME_START', 
  'IS_SCL_TIME_END', 
  'IS_SCL_MEETING_PAT', 
  'IS_SCL_DESCR_IS_SCL_DESCRG',
  'BLDG_CD',
  'HU_LATITUDE',
  'HU_LONGITUDE'
]
def mapSessions(inputs):
  (multiSection, start, end, days, locNameOrNames, locNumber, latitude, longitude) = inputs
  if days == 'TBA':
    return {}
  if multiSection:
    sessions = []
    locNameOrNames = locNameOrNames if locNameOrNames else [None] * len(multiSection)
    locNames = locNameOrNames if isinstance(locNameOrNames, list) else [locNameOrNames] * len(multiSection)
    for session, locName in zip(multiSection, locNames):
      mapped = {}
      session['IS_SCL_DESCR_IS_SCL_DESCRG'] = locName
      Mapper(sessionKeys, mapSession).map(session, mapped)
      sessions.append(mapped)
    return {'sessions': sessions}
  elif days is not None:
    return {'sessions': [mapSession((start, end, days, locNameOrNames, locNumber, latitude, longitude))]}
  else:
    return {}
    
sessionsMapper = Mapper(['MultiSection'] + sessionKeys, mapSessions)

In [30]:
examDateTimeMapper = Mapper('IS_SCL_DESCR_HU_SCL_EXAM_GROUP', Mapper.noChange('examDateTime'))

In [31]:
enrollmentStatusMapper = Mapper('EnrlDisplay', Mapper.noChange('enrollmentStatus'))

In [38]:
mappers = [
  departmentsMapper,
  recommendedPrepMapper,
  consentRequiredMapper,
  formatMapper,
  notesMapper,
  gradingBasisMapper,
  unitsMapper,
  classNumberMapper,
  courseIdMapper,
  enrollmentCapMapper,
  groupMapper,
  titleMapper,
  genEdsMapper,
  levelMapper,
  courseSiteMapper,
  crossRegMapper,
  termYearMapper,
  descriptionMapper,
  numberMapper,
  profsMapper,
  objectIDMapper,
  sessionsMapper,
  examDateTimeMapper,
  enrollmentStatusMapper
]

In [28]:
rawOfferings = myHarvard()

In [40]:
offerings = []
for raw in rawOfferings:
  parsed = {}
  for mapper in mappers:
    mapper.map(raw, to=parsed)
  offerings.append(parsed)

(<type 'exceptions.TypeError'>, TypeError('expected string or buffer',), <traceback object at 0x1072937e8>)
  File "<ipython-input-40-0078e727137e>", line 6, in <module>
    mapper.map(raw, to=parsed)
  File "<string>", line 48, in map
  File "<ipython-input-37-8ce01abfa325>", line 80, in mapSessions
    return {'sessions': [mapSession((start, end, days, locNameOrNames, locNumber, latitude, longitude))]}
  File "<ipython-input-37-8ce01abfa325>", line 45, in mapSession
    seshSet('days', mapDays(days))
  File "<ipython-input-37-8ce01abfa325>", line 17, in mapDays
    return daysReplace.sub(days).split(' ')
  File "<string>", line 8, in sub
{
  "IS_SCL_DESCR100_HU_SCL_ATTR_XREG": "Available for Harvard Cross Registration", 
  "IS_SCL_DESCR_IS_SCL_DESCRD": "Psychology", 
  "URL_URLNAME": "https://locator.tlt.harvard.edu/course/colgsas-113704/", 
  "Title": "Diagnostic Interviewing", 
  "IS_SCL_DESCR_IS_SCL_DESCRM": "Psychology", 
  "HU_UNITS_MAX": "4", 
  "HU_RECPREP_FLAG": "N", 
  "IS_S

KeyboardInterrupt: 