In [50]:
import numpy as np
import pandas as pd
import scipy
from sklearn.metrics.pairwise import cosine_similarity
from scipy import sparse
from scipy import spatial
from scipy.spatial.distance import squareform, pdist, cdist

user_recipes = pd.DataFrame({'id': ['ur1', 'ur2', 'ur3', 'ur4', 'ur5', 'ur6'],
                               'z1':  [0, 0, 0, 0, 0, 0],
                               'z2':  [0, 1, 1, 1, 1, 0],
                               'z3':  [1, 1, 1, 1, 1, 1],
                               'z4':  [1, 0, 0, 0, 0, 0],
                               'z5':  [0, 1, 1, 1, 1, 1],
                               'z6':  [0, 1, 1, 1, 1, 0],
                               'z7':  [0, 0, 0, 0, 0, 0],
                               'z8':  [0, 0, 0, 0, 0, 1],
                               'z9':  [0, 0, 0, 1, 0, 0],
                               'z10': [1, 0, 0, 0, 0, 1],
                               'z11': [1, 0, 0, 0, 0, 0], 
                               'z12': [0, 0, 1, 0, 0, 0]})

recipe_db = pd.DataFrame({'id': ['r1', 'r2', 'r3', 'r4', 'r5', 'r6', 'r7', 'r8', 'r9', 'r10'],
                               'z1':  [0, 1, 0, 0, 0, 1, 1, 0, 0, 0],
                               'z2':  [0, 0, 1, 1, 1, 0, 0, 0, 0, 0],
                               'z3':  [1, 1, 0, 1, 1, 1, 0, 1, 0, 1],
                               'z4':  [0, 0, 0, 0, 0, 0, 1, 1, 0, 0],
                               'z5':  [0, 1, 1, 0, 1, 0, 0, 0, 1, 1],
                               'z6':  [1, 1, 1, 1, 1, 0, 0, 0, 0, 0],
                               'z7':  [0, 0, 0, 0, 1, 0, 1, 0, 0, 0],
                               'z8':  [0, 0, 1, 0, 0, 1, 0, 0, 0, 1],
                               'z9':  [0, 0, 0, 1, 0, 0, 0, 1, 0, 0],
                               'z10': [1, 0, 0, 0, 1, 1, 0, 0, 1, 1],
                               'z11': [1, 0, 0, 1, 0, 0, 1, 0, 0, 0], 
                               'z12': [0, 1, 1, 0, 0, 1, 0, 1, 0, 1]})

user_recipes = user_recipes.set_index('id')
recipe_db = recipe_db.set_index('id')

In [None]:
from IPython.display import display_html
def display_side_by_side(*args):
    html_str=''
    for df in args:
        html_str+=df.to_html()
    display_html(html_str.replace('table','table style="display:inline"'),raw=True)

In [52]:
### Delta Methode - Rezept zu Rezeptvergleich

result_1 = pd.DataFrame()

for index, recipe in recipe_db.iterrows():
    user_recipes_subtracted = user_recipes.sub(recipe)
    recipe = pd.Series({'delta':user_recipes_subtracted.abs().values.sum()}, name=index)
    result_1 = result_1.append(recipe)

result_1 = result_1.sort_values(by='delta')
result_1

Unnamed: 0,delta
r5,20.0
r1,24.0
r2,24.0
r4,24.0
r9,24.0
r3,26.0
r10,26.0
r8,32.0
r6,36.0
r7,46.0


In [89]:
##Euklidische Distanz - rezept zu rezept vergelich

result_array = cdist(user_recipes, recipe_db,'euclidean')
result_2 = pd.DataFrame(result_array, columns=recipe_db.index.values, index=user_recipes.index.values)
result_2

result_3 = pd.DataFrame(result_2.sum(), columns=['euclidiean_sum'])
result_3 = result_3.sort_values(by='euclidiean_sum')

result_3

Unnamed: 0,r1,r2,r3,r4,r5,r6,r7,r8,r9,r10
ur1,1.414214,2.645751,3.0,2.236068,2.44949,2.236068,2.0,2.0,2.0,2.236068
ur2,2.0,1.732051,1.732051,1.732051,1.414214,2.645751,2.828427,2.44949,2.0,2.236068
ur3,2.236068,1.414214,1.414214,2.0,1.732051,2.44949,3.0,2.236068,2.236068,2.0
ur4,2.236068,2.0,2.0,1.414214,1.732051,2.828427,3.0,2.236068,2.236068,2.44949
ur5,2.0,1.732051,1.732051,1.732051,1.414214,2.645751,2.828427,2.44949,2.0,2.236068
ur6,2.0,2.236068,2.236068,2.645751,2.0,1.732051,2.828427,2.44949,1.414214,1.0


Unnamed: 0,euclidiean_sum
r5,10.742018
r4,11.760134
r2,11.760134
r1,11.88635
r9,11.88635
r3,12.114383
r10,12.157694
r8,13.820605
r6,14.537538
r7,16.485281


In [78]:
##Euklidische Distanz - Userpräferenz
#1. Array mit user Rezepten zusammenstellen (am besten mit index, sodass jedes Rezept eine feste nummer hat)
#2. Aus allen Rezepten des Arrays einen Userpräferenz Vektor bilden (Rezeptpräsenz wird aufadiert)

user_pref = pd.DataFrame(index=['user_pref'], columns=['z1', 'z2', 'z3', 'z4', 'z5', 'z6', 'z7', 'z8', 'z9', 'z10', 'z11', 'z12'])
user_pref.loc['user_pref'] = user_recipes.sum()
distance_array = cdist(user_pref.loc[['user_pref']], recipe_db, metric='euclidean')
result_4 = pd.DataFrame(distance_array, columns=recipe_db.index.values, index=['euclidiean_user_pref'])
result_4 = result_4.T.sort_values(by='euclidiean_user_pref')
result_4

Unnamed: 0,euclidiean_user_pref
r5,8.124038
r2,8.660254
r4,8.660254
r3,8.774964
r10,8.774964
r1,8.944272
r6,9.327379
r8,9.380832
r9,9.486833
r7,10.099505


In [80]:

result_array = cosine_similarity(user_recipes, recipe_db)
result_2 = pd.DataFrame(result_array, columns=recipe_db.index.values, index=user_recipes.index.values)
result_5 = pd.DataFrame(result_2.sum(), columns=['cosine_distance_sum'])
result_5 = result_5.sort_values(by='cosine_distance_sum')
result_5 = result_5.sort_values(ascending=False, by='cosine_distance_sum')

In [90]:
display_side_by_side(result_1, result_3, result_4, result_5)


Unnamed: 0,delta
r5,20.0
r1,24.0
r2,24.0
r4,24.0
r9,24.0
r3,26.0
r10,26.0
r8,32.0
r6,36.0
r7,46.0

Unnamed: 0,euclidiean_sum
r5,10.742018
r4,11.760134
r2,11.760134
r1,11.88635
r9,11.88635
r3,12.114383
r10,12.157694
r8,13.820605
r6,14.537538
r7,16.485281

Unnamed: 0,euclidiean_user_pref
r5,8.124038
r2,8.660254
r4,8.660254
r3,8.774964
r10,8.774964
r1,8.944272
r6,9.327379
r8,9.380832
r9,9.486833
r7,10.099505

Unnamed: 0,cosine_distance_sum
r5,4.114207
r4,3.412461
r2,3.412461
r10,3.236068
r3,3.188854
r1,3.144427
r9,2.400222
r6,2.165248
r8,2.144427
r7,0.5


In [83]:

result_array = cosine_similarity(user_recipes, recipe_db)
result_2 = pd.DataFrame(result_array, columns=recipe_db.index.values, index=user_recipes.index.values)
result_3 = pd.DataFrame(result_2.sum(), columns=['distance_sum'])
result_3 = result_3.sort_values(by='distance_sum')
result_3.sort_values(ascending=False, by='distance_sum')

Unnamed: 0,distance_sum
r5,4.114207
r4,3.412461
r2,3.412461
r10,3.236068
r3,3.188854
r1,3.144427
r9,2.400222
r6,2.165248
r8,2.144427
r7,0.5
