In [8]:
from jupyterImport import enableJupyterImports
enableJupyterImports()
from siblings import offeringsWithProfessors
from Utilities import genSample
import json

In [9]:
from copy import deepcopy
limit = 10*1024 - 100 # Buffer of a 100 bytes
overLimit = lambda off: calcSize(off) > limit
calcSize = lambda dct: len(json.dumps(dct))

def strippedResponse(response):
  stripped = {'score': response['score']}
  if 'percentiles' in response and 'similar' in response['percentiles']:
    stripped['percentiles'] = {'similar': response['percentiles']['similar']}
  return stripped

def stripReport(reportOrProf):
  if 'responses' in reportOrProf:
    for attribute, response in reportOrProf['responses'].iteritems():
      reportOrProf['responses'][attribute] = strippedResponse(response)
  return reportOrProf

def trimmedReports(reports, targetSize, debug=False):
  trimmed = deepcopy(reports)
  for i, report in enumerate(reversed(trimmed)):
    stripReport(report)
    if 'profs' in report:
      report['profs'] = [stripReport(p) for p in report['profs']]        
    if calcSize(trimmed) < targetSize:
      if debug:
        print '  Got under size limit by stripping', i + 1, 'of', len(reports)
      return trimmed 
  while calcSize(trimmed) > targetSize:
    if debug:
      print '  Still too big', calcSize(trimmed)
    if len(trimmed) > 1:
      trimmed = trimmed[:-1]
    else:
      if debug:
        print '  Removed all', len(reports), 'to get under size limit.'
      return []
  if debug:
    print '  Removed', len(reports), '->', len(trimmed), 'to get under size limit.',
    print '  Last:', reports[-1]['term'], reports[-1]['year'], '->', trimmed[-1]['term'], trimmed[-1]['year']
  return trimmed

def trimProfs(newOffering):
  for prof in newOffering['profs']:
    if 'matched' in prof:
      del prof['matched']

def trimTopReport(newOffering):
  if 'topReport' in newOffering:
    top = newOffering['topReport']
    del newOffering['topReport']
    trimmedTop = trimmedReports([top], 10*1024 - calcSize(newOffering))
    if len(trimmedTop) > 0:
      newOffering['topReport'] = trimmedTop[0]
      
def trimReports(newOffering):
  oldReports = [r for r in newOffering['reports'] if r['year'] < '2014']
  newReports = [r for r in newOffering['reports'] if r['year'] >= '2014']
  del newOffering['reports']
  if overLimit(newOffering):
    trimTopReport(newOffering)
  if (calcSize(newOffering) + calcSize(newReports)) < limit:
    newOffering['reports'] = newReports + trimmedReports(oldReports, limit - (calcSize(newOffering) + calcSize(newReports))) 
  elif calcSize(newOffering) < limit:
    trimmedNew = trimmedReports(newReports, limit - calcSize(newOffering))
    if len(trimmedNew) > 0:
      newOffering['reports'] = trimmedNew

from collections import Counter
def algoliaCompress(offerings):
  algoliaOfferings = []
  for offering in offerings:
    if overLimit(offering):
      newOffering = deepcopy(offering)
      if 'reports' in newOffering:
        trimReports(newOffering)
      if overLimit(newOffering):
        trimTopReport(newOffering)
      if overLimit(newOffering):
        trimProfs(newOffering)
      if overLimit(newOffering):
        print 'Could not get under limit'
    else:
      newOffering = offering
    algoliaOfferings.append(newOffering)
  return algoliaOfferings

In [10]:
algoliaOfferings = algoliaCompress(offeringsWithProfessors)

Could not get under limit
Could not get under limit
Could not get under limit


In [11]:
genSample(algoliaOfferings)

{'classNumber': u'20454',
 'consentRequired': u'No Consent',
 'courseId': u'205724',
 'crossReg': [u'Available for Harvard Cross Registration'],
 'departments': [u'Business MBA', u'Business School MBA'],
 'description': u'http://www.hbs.edu/coursecatalog/1624.html',
 'format': u'Elective',
 'gradingBasis': u'HBSM Graded',
 'group': u'HBSMBA',
 'number': u'1624',
 'numberInt': 1624,
 'objectID': u'205724_20454',
 'otherOfferings': [{'objectID': u'205724_20452',
   'profs': [{'displayName': u'Robert White', 'matchName': u'robert white'}],
   'sessions': [{'days': [u'W', u'Th', u'F'],
     'location': {'latitude': u'42.365878',
      'longitude': u'-71.121779',
      'name': u'Aldrich 208 (HBS)',
      'number': u'05040'},
     'time': {'end': 9.833333333333334, 'start': 8.5}}],
   'termYear': u"Fall '17"},
  {'objectID': u'205724_20453',
   'profs': [{'displayName': u'Ramana Nanda', 'matchName': u'ramana nanda'}],
   'sessions': [{'days': [u'W', u'Th', u'F'],
     'location': {'latitude'

In [None]:
len(algoliaOfferings)

In [12]:
from algoliasearch import algoliasearch
client = algoliasearch.Client("D86FK05FUD", '0cebfab0abe857cd36dd70e37f043a64')
index = client.init_index("offerings")
index.save_objects(algoliaOfferings)

{u'objectIDs': [u'113307_15826',
  u'113307_15827',
  u'113307_15824',
  u'113307_15825',
  u'116391_12689',
  u'113704_0',
  u'104604_19711',
  u'113307_15828',
  u'170226_0',
  u'117788_15901',
  u'110819_0',
  u'160536_13905',
  u'108658_18545',
  u'116549_15165',
  u'116549_15166',
  u'191143_11769',
  u'124526_0',
  u'122706_14509',
  u'122706_14508',
  u'122706_14507',
  u'110638_19745',
  u'191143_11767',
  u'122706_14504',
  u'122706_14503',
  u'122592_18337',
  u'108854_16480',
  u'205052_18436',
  u'111282_11990',
  u'115669_12733',
  u'112844_12107',
  u'116549_15162',
  u'112838_15296',
  u'116549_15163',
  u'205477_10218',
  u'126942_13661',
  u'126942_13660',
  u'190419_10591',
  u'121266_13092',
  u'119079_14209',
  u'119079_14208',
  u'119079_14207',
  u'113307_15829',
  u'180849_18888',
  u'112838_15292',
  u'116418_12710',
  u'112981_16569',
  u'111378_0',
  u'114925_12456',
  u'170499_18451',
  u'112981_16564',
  u'112981_16567',
  u'112838_15290',
  u'190222_11494',