Modes of correlation: Model (oracle, personal), Granularity (recipe level, user level, recipe flat, user flat, all recipes for all users), Surprise (95th percentile, max), Method of correlation (spearman, pearson)

Libraries


In [1]:
import os, pickle, numpy as np
from GloVex.evaluate_personalised import survey_reader
from scipy.stats import pearsonr, spearmanr
cwd = os.getcwd()

Get the oracle surprise estimates by the model

In [2]:
oracle_suprise_estimates_pickle_fn = cwd + '/GloVex/results/new/oracle_suprise_estimates.pickle'
oracle_suprise_estimates_dict = pickle.load(open(oracle_suprise_estimates_pickle_fn, 'rb'))

Get the oracle's surprise estimates (95th percentile and max score)

In [3]:
# Initialize arrays
oracle_surp_estimates_95perc = []
oracle_surp_estimates_max = []
# Iterate over the dict
for each_recipe in oracle_suprise_estimates_dict:
	# Get all of the ingredients combinations' surprise estimates
	oracle_ingr_surp_estimates = [each_ingr_comb[2] for each_ingr_comb in oracle_suprise_estimates_dict[each_recipe]['surprise_cuisine']]
	# Append the 95th_percentile surprise estimate
	oracle_surp_estimates_95perc.append(oracle_suprise_estimates_dict[each_recipe]['95th_percentile'])
	# Append the max score surprise estimate
	oracle_surp_estimates_max.append(max(oracle_ingr_surp_estimates))
# oracle_surp_estimates_95perc
# oracle_surp_estimates_max

Get the personal surprise estimates by the model

In [4]:
user_suprise_estimates_pickle_fn = cwd + '/GloVex/results/new/user_suprise_estimates.pickle'
user_suprise_estimates_dict = pickle.load(open(user_suprise_estimates_pickle_fn,'rb'))

Get the 95th percentils and max surprise estimates

In [5]:
# Initialize surprise estimates for 95th percentile and max score
personalized_surp_estimates_95perc = []
personalized_surp_estimates_max = []
# Iterate over the user surprise estimeates dict
for each_user in user_suprise_estimates_dict:
	# print 'each_user', each_user, user_suprise_estimates_dict[each_user]
	# Initialize the inner arrays for 95th percentile and max score
	users_surp_estimates_95perc = []
	users_surp_estimates_max = []
	# Iterate over the surprises for each user's surprise estimates
	for recipe_idx, each_recipe in enumerate(user_suprise_estimates_dict[each_user]['recipes_surp']):
		# print recipe_idx, user_suprise_estimates_dict[each_user]['recipes_surp'][recipe_idx]['95th_percentile']
		# Get all of the ingredients combinations' surprise estimates
		ingr_comb_suprise_arr = [each_ingr_comb[2] for each_ingr_comb in user_suprise_estimates_dict[each_user]['recipes_surp'][recipe_idx]['surprise_cuisine']]
		# print ingr_comb_suprise_arr
		# print max(ingr_comb_suprise_arr)
		# Append the 95th percentile for the surprise estimates
		users_surp_estimates_95perc.append(user_suprise_estimates_dict[each_user]['recipes_surp'][recipe_idx]['95th_percentile'])
		# Append the max score for the surprise estimates
		users_surp_estimates_max.append(max(ingr_comb_suprise_arr))
	# Append the user's surprise estimates to the personalized main 2D array (95th percentile and max score)
	personalized_surp_estimates_95perc.append(users_surp_estimates_95perc)
	personalized_surp_estimates_max.append(users_surp_estimates_max)
# personalized_surp_estimates_95perc
# personalized_surp_estimates_max


Transpose for recipe level comparison

In [6]:
personalized_surp_estimates_recipelvl_95perc = list(np.transpose(personalized_surp_estimates_95perc))
personalized_surp_estimates_recipelvl_max = list(np.transpose(personalized_surp_estimates_max))

Flatten the arrays in both dimnessions

In [7]:
personalized_surp_estimates_userflat_95perc = [item for sublist in personalized_surp_estimates_95perc for item in sublist]
personalized_surp_estimates_userflat_max = [item for sublist in personalized_surp_estimates_max for item in sublist]
personalized_surp_estimates_recipeflat_95perc = [item for sublist in personalized_surp_estimates_recipelvl_95perc for item in sublist]
personalized_surp_estimates_recipeflat_max = [item for sublist in personalized_surp_estimates_recipelvl_max for item in sublist]

Get the surprises of the users in the survey

In [8]:
survey_fp = '/Volumes/GoogleDrive/My Drive/Live/PQE/Omar/personalized-surprise/qchef_surveydata'
user_fam_scaled_arr, users_surp_ratings_arr = survey_reader(survey_fp)

Transpose for recipe level comparison

In [9]:
users_surp_ratings_userlvl = users_surp_ratings_arr
users_surp_ratings_recipelvl = list(np.transpose(users_surp_ratings_arr))

Flatten the arrays in both dimenssions

In [10]:
users_surp_ratings_user_flat = [item for sublist in users_surp_ratings_userlvl for item in sublist]
users_surp_ratings_recipe_flat = [item for sublist in list(np.transpose(users_surp_ratings_arr)) for item in sublist]

Mode: Oracle + Personalized, per user (default for oracle), 95th percentile

In [11]:
for user_idx, (each_personal_score, each_users_rating) in enumerate(zip(personalized_surp_estimates_95perc, users_surp_ratings_userlvl)):
	print user_idx
	print each_personal_score
	print each_users_rating
	print 'oracle_surp_estimates: spearmanr', spearmanr(oracle_surp_estimates_95perc, each_users_rating)
	print 'oracle_surp_estimates: pearsonr', pearsonr(oracle_surp_estimates_95perc, each_users_rating)
	print 'each_personal_score: spearmanr', spearmanr(each_personal_score, each_users_rating)
	print 'each_personal_score: pearsonr', pearsonr(each_personal_score, each_users_rating)

0
[0.028929721010827643, -0.8863879953910269, 0.6586530676765014, -0.4468323932292995, 0.7493311274020704, 0.7769672304116093, 0.25178225605472326, 1.820851219042163, 3.4992518111744015, -0.34728576461298233]
[1, 1, 4, 4, 1, 4, 5, 4, 3, 4]
oracle_surp_estimates: spearmanr (0.30813875924572515, 0.38638381485150064)
oracle_surp_estimates: pearsonr (0.22543514207124077, 0.5311675107950075)
each_personal_score: spearmanr (0.09834215720608248, 0.786944984154626)
each_personal_score: pearsonr (0.14720804886376246, 0.6848703190058377)
1
[0.5436363806717961, -0.24937998554519694, 1.3311161275546537, 0.1339915324871328, 1.3938903613409888, 1.49734122161952, 0.7757765059679845, 2.5636580994075437, 3.8311755317516774, 0.10003276519671964]
[1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
oracle_surp_estimates: spearmanr (nan, nan)
oracle_surp_estimates: pearsonr (nan, 1.0)
each_personal_score: spearmanr (nan, nan)
each_personal_score: pearsonr (nan, 1.0)
2
[0.501867080242336, -0.3333045942194819, 1.2144491472326793

  c /= stddev[:, None]
  c /= stddev[None, :]
  cond1 = (scale > 0) & (x > self.a) & (x < self.b)
  cond1 = (scale > 0) & (x > self.a) & (x < self.b)
  cond2 = cond0 & (x <= self.a)
  r = r_num / r_den


Mode: Oracle + Personalized, per user (default for oracle), max score

In [12]:
for user_idx, (each_personal_score, each_users_rating) in enumerate(zip(personalized_surp_estimates_max, users_surp_ratings_userlvl)):
	print user_idx
	print each_personal_score
	print each_users_rating
	print 'oracle_surp_estimates: spearmanr', spearmanr(oracle_surp_estimates_max, each_users_rating)
	print 'oracle_surp_estimates: pearsonr', pearsonr(oracle_surp_estimates_max, each_users_rating)
	print 'each_personal_score: spearmanr', spearmanr(each_personal_score, each_users_rating)
	print 'each_personal_score: pearsonr', pearsonr(each_personal_score, each_users_rating)

0
[0.48790048028225025, -0.707503280261354, 0.7603070766366502, -0.03543186751564214, 3.8059580332580722, 1.4716768665424258, 0.8105135465639222, 2.353106981887523, 4.407839848709313, 0.35901522166489186]
[1, 1, 4, 4, 1, 4, 5, 4, 3, 4]
oracle_surp_estimates: spearmanr (0.36058790975563576, 0.3060212830567241)
oracle_surp_estimates: pearsonr (0.2882286617927404, 0.4193198598657479)
each_personal_score: spearmanr (0.006556143813738832, 0.9856590518353308)
each_personal_score: pearsonr (-0.08827291315072675, 0.8084006134829851)
1
[1.021739912141993, -0.04521917008970724, 1.5656208905644358, 0.4826758817105218, 4.57327267745409, 2.1116708950495306, 1.3540113489072998, 2.6834683624340157, 4.546031735017476, 1.0647990501959337]
[1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
oracle_surp_estimates: spearmanr (nan, nan)
oracle_surp_estimates: pearsonr (nan, 1.0)
each_personal_score: spearmanr (nan, nan)
each_personal_score: pearsonr (nan, 1.0)
2
[0.9674690575713439, -0.12326203159868737, 1.4128388110656944, 0.

Mode: Personalized, per recipes, 95th percentile

In [13]:
for user_idx, (each_personal_score, each_users_rating) in enumerate(zip(personalized_surp_estimates_recipelvl_95perc, users_surp_ratings_recipelvl)):
	print user_idx
	print each_personal_score
	print each_users_rating
	print 'each_personal_score: spearmanr', spearmanr(each_personal_score, each_users_rating)
	print 'each_personal_score: pearsonr', pearsonr(each_personal_score, each_users_rating)

0
[ 0.02892972  0.54363638  0.50186708  0.46813093  0.49614864  0.30289717
  0.24000773 -0.32276945  0.58543223  0.13869066 -0.19585277  0.0339943
  0.09446871  0.12353877  0.1099622   0.00350265  0.09327732  0.13005212
  0.41974505  0.59027166  0.4796247   0.26830817  0.22001407  0.29641629
  0.18536243 -0.10571924  0.12068708  0.41234407  0.44094906  0.14147284
  0.57620164  0.02709159 -0.07477928 -0.22884929 -0.03179999  0.17785771
  0.57788923  0.37242334  0.26912608  0.26830817  0.46553757  0.48977486
  0.58251775  0.37664102  0.59027166 -0.13292282  0.59027166 -0.10154385
  0.10722372  0.43355441  0.00768528  0.14708432  0.14063802  0.13123458
  0.44172002  0.14009325  0.24637627  0.25328963  0.48100554  0.07424448
 -0.47064532 -0.17143878  0.23889219 -0.50028982 -0.10152953 -0.10754359
 -0.36777007 -0.03179999 -0.3994154   0.26830817 -1.28027137  0.17852841
  0.49184995  0.2905442   0.29641629 -0.22709063 -1.73222263  0.26830817
  0.4751392   0.1151406  -0.20656095  0.27855182  

Mode: Personalized, per recipes, max score

In [14]:
for user_idx, (each_personal_score, each_users_rating) in enumerate(zip(personalized_surp_estimates_recipelvl_max, users_surp_ratings_recipelvl)):
	print user_idx
	print each_personal_score
	print each_users_rating
	print 'each_personal_score: spearmanr', spearmanr(each_personal_score, each_users_rating)
	print 'each_personal_score: pearsonr', pearsonr(each_personal_score, each_users_rating)

0
[ 0.48790048  1.02173991  0.96746906  0.93303051  0.94728874  0.75428319
  0.73836534  0.15243059  1.04509754  0.56906887  0.27428706  0.49291496
  0.54888221  0.59485708  0.56340136  0.44966595  0.55954206  0.57246511
  0.91126494  1.04812184  0.9516403   0.72462306  0.70036529  0.76023613
  0.66665103  0.34974118  0.5963713   0.86310368  0.88441721  0.60453745
  1.04479532  0.4852749   0.36429535  0.21813645  0.39293742  0.67111946
  1.03837715  0.80529861  0.71852163  0.72462306  0.94998092  0.95315983
  1.04207007  0.82170863  1.04812184  0.35311536  1.04812184  0.40045014
  0.56312895  0.87647879  0.49087504  0.6022291   0.57535345  0.59451073
  0.9017204   0.59206399  0.70482313  0.71243027  0.92843915  0.5297503
 -0.07044177  0.27601695  0.68322541 -0.00235126  0.3783977   0.37415244
  0.10029285  0.39293742  0.09717144  0.72462306 -0.7089343   0.64891033
  0.95766167  0.73289735  0.76023613  0.2832819  -1.29871073  0.72462306
  0.93835242  0.56128654  0.26408465  0.74941089  

Modes: Personalized, all users, user flat, 95th percentile/max score

In [15]:
print '95th percentile'
print 'spearmanr', spearmanr(personalized_surp_estimates_userflat_95perc, users_surp_ratings_user_flat)
print 'pearsonr', pearsonr(personalized_surp_estimates_userflat_95perc, users_surp_ratings_user_flat)
print 'Max score'
print 'spearmanr', spearmanr(personalized_surp_estimates_userflat_max, users_surp_ratings_user_flat)
print 'pearsonr', pearsonr(personalized_surp_estimates_userflat_max, users_surp_ratings_user_flat)

95th percentile
spearmanr (0.25528637430985407, 2.9184672410628215e-14)
pearsonr (0.24902913226520892, 1.2689017212127914e-13)
Max score
spearmanr (0.2849274677345722, 1.584574937206444e-17)
pearsonr (0.2804369193116763, 5.2567341223892415e-17)


Modes: Personalized, all recipes, recipe flat, 95th percentile/max score

In [16]:
print '95th percentile'
print 'spearmanr', spearmanr(personalized_surp_estimates_recipeflat_95perc, users_surp_ratings_recipe_flat)
print 'pearsonr', pearsonr(personalized_surp_estimates_recipeflat_95perc, users_surp_ratings_recipe_flat)
print 'Max score'
print 'spearmanr', spearmanr(personalized_surp_estimates_recipeflat_max, users_surp_ratings_recipe_flat)
print 'pearsonr', pearsonr(personalized_surp_estimates_recipeflat_max, users_surp_ratings_recipe_flat)

95th percentile
spearmanr (0.25528637430985407, 2.9184672410628215e-14)
pearsonr (0.24902913226520898, 1.2689017212127914e-13)
Max score
spearmanr (0.2849274677345722, 1.584574937206444e-17)
pearsonr (0.28043691931167636, 5.2567341223892415e-17)


In [17]:
oracle_surp_estimates_repeated_95perc = [oracle_surp_estimates_95perc] * len(users_surp_ratings_userlvl)
oracle_surp_estimates_repeated_max = [oracle_surp_estimates_max] * len(users_surp_ratings_userlvl)

In [18]:
oracle_surp_estimates_userflat_95perc = [item for sublist in oracle_surp_estimates_repeated_95perc for item in sublist]
oracle_surp_estimates_userflat_max = [item for sublist in oracle_surp_estimates_repeated_max for item in sublist]

In [20]:
print '95th percentile'
print 'spearmanr', spearmanr(oracle_surp_estimates_userflat_95perc, users_surp_ratings_user_flat)
print 'pearsonr', pearsonr(oracle_surp_estimates_userflat_95perc, users_surp_ratings_user_flat)
print 'Max score'
print 'spearmanr', spearmanr(oracle_surp_estimates_userflat_max, users_surp_ratings_user_flat)
print 'pearsonr', pearsonr(oracle_surp_estimates_userflat_max, users_surp_ratings_user_flat)

95th percentile
spearmanr (0.037650674493277386, 0.2700620548249365)
pearsonr (0.041088435275697124, 0.22870352587937326)
Max score
spearmanr (0.10066169524054402, 0.0031247058077166225)
pearsonr (0.044049425321728605, 0.196866593866097)


In [19]:
personalized_surp_estimates_userflat_95perc

[0.028929721010827643,
 -0.8863879953910269,
 0.6586530676765014,
 -0.4468323932292995,
 0.7493311274020704,
 0.7769672304116093,
 0.25178225605472326,
 1.820851219042163,
 3.4992518111744015,
 -0.34728576461298233,
 0.5436363806717961,
 -0.24937998554519694,
 1.3311161275546537,
 0.1339915324871328,
 1.3938903613409888,
 1.49734122161952,
 0.7757765059679845,
 2.5636580994075437,
 3.8311755317516774,
 0.10003276519671964,
 0.501867080242336,
 -0.3333045942194819,
 1.2144491472326793,
 0.09937361813471979,
 1.2695517779900765,
 1.2280272141790771,
 0.6645857934601478,
 2.42695464967051,
 3.7035750118237103,
 0.0443524740941007,
 0.46813092687529967,
 -0.3615228944139048,
 1.2124324076803883,
 0.08705298889391463,
 1.246852779225628,
 1.2342896518953212,
 0.6569969757084289,
 2.343825893607176,
 3.845032353376318,
 0.03531593156786994,
 0.49614863506720613,
 -0.3807180697221458,
 1.2005658225526512,
 -0.0019874285791614005,
 1.24183448559447,
 1.298907500856568,
 0.6659822939426872,
 2.