In [1]:
import pandas as pd
import numpy as np
from scipy.stats import pearsonr
import seaborn as sns
from sklearn.metrics.pairwise import cosine_similarity
import datetime
from sklearn.preprocessing import LabelEncoder

In [14]:
# Load the data
df = pd.read_csv('supermarket_data.csv')

# Pivot the data to create a user-item matrix
matrix = df.pivot_table(index='Product Name', columns='User ID', values='Interaction Type')
matrix.head()

User ID,0,1,2,3,4,5,6,7,8,9,...,90,91,92,93,94,95,96,97,98,99
Product Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
3-ring staple pack,,,,,,,1.0,,,,...,,,,1.5,,,0.0,,,
9-3/4 Diameter Round Wall Clock,0.0,1.0,1.0,1.0,2.0,2.0,,,3.0,2.5,...,3.0,2.0,0.5,2.0,,,,,1.0,
American Pencil,2.333333,,2.0,,1.0,0.0,0.0,,1.0,2.0,...,,2.5,,2.0,2.0,,3.0,1.0,1.5,
Avery 477,2.0,,,1.0,1.666667,,,1.0,,0.0,...,,,1.0,0.0,2.0,,,,0.0,
Avery Non-Stick Binders,3.0,,1.0,0.5,,,1.5,0.0,,1.0,...,3.0,1.5,,,2.0,1.0,1.0,0.0,1.0,1.5


In [15]:
# Normalize user-item matrix
matrix_norm = matrix.subtract(matrix.mean(axis=1), axis = 0)
matrix_norm.head()



User ID,0,1,2,3,4,5,6,7,8,9,...,90,91,92,93,94,95,96,97,98,99
Product Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
3-ring staple pack,,,,,,,-0.60303,,,,...,,,,-0.10303,,,-1.60303,,,
9-3/4 Diameter Round Wall Clock,-1.383929,-0.383929,-0.383929,-0.383929,0.616071,0.616071,,,1.616071,1.116071,...,1.616071,0.616071,-0.883929,0.616071,,,,,-0.383929,
American Pencil,0.878571,,0.545238,,-0.454762,-1.454762,-1.454762,,-0.454762,0.545238,...,,1.045238,,0.545238,0.545238,,1.545238,-0.454762,0.045238,
Avery 477,0.583333,,,-0.416667,0.25,,,-0.416667,,-1.416667,...,,,-0.416667,-1.416667,0.583333,,,,-1.416667,
Avery Non-Stick Binders,1.570312,,-0.429688,-0.929688,,,0.070312,-1.429688,,-0.429688,...,1.570312,0.070312,,,0.570312,-0.429688,-0.429688,-1.429688,-0.429688,0.070312


In [16]:
# Item similarity matrix using Pearson correlation
item_similarity = matrix_norm.T.corr()
item_similarity.head()

Product Name,3-ring staple pack,9-3/4 Diameter Round Wall Clock,American Pencil,Avery 477,Avery Non-Stick Binders,"Belkin 325VA UPS Surge Protector, 6'",Belkin F9G930V10-GRY 9 Outlet Surge,"Bevis Round Conference Table Top, X-Base","Bevis Traditional Conference Table Top, Plinth Base",Binding Machine Supplies,...,Xblue XB-1670-86 X16 Small Office Telephone - Titanium,Xerox 1934,Xerox 1979,Xerox 1985,Xerox 1995,Xerox 2,Xerox 21,Xerox 224,"XtraLife ClearVue Slant-D Ring Binder, White, 3""",XtraLife ClearVue Slant-D Ring Binders by Cardinal
Product Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
3-ring staple pack,1.0,-0.041222,0.058295,-0.062426,-0.265925,-0.055956,0.02185,0.045183,0.23536,0.230521,...,0.078696,-0.219516,-0.13875,0.008578,0.162673,0.012005,0.146988,-0.085959,0.273032,-0.031412
9-3/4 Diameter Round Wall Clock,-0.041222,1.0,0.071003,-0.32761,0.3825,0.194598,0.202139,-0.170898,0.029635,0.176432,...,-0.175588,-0.456468,-0.010051,-0.101544,0.282679,0.161065,-0.0582,0.023627,0.012887,-0.111152
American Pencil,0.058295,0.071003,1.0,-0.171252,0.018224,0.107681,0.219092,0.249481,-0.237776,0.073926,...,-0.513204,0.243468,-0.255048,0.054038,0.109329,0.121202,-0.125283,-0.143903,0.045226,0.112694
Avery 477,-0.062426,-0.32761,-0.171252,1.0,0.294632,-0.006621,-0.249883,0.015302,-0.090521,0.15401,...,0.091613,0.321177,-0.07051,-0.490059,-0.192985,0.204967,0.106069,-0.05254,-0.079876,-0.272208
Avery Non-Stick Binders,-0.265925,0.3825,0.018224,0.294632,1.0,-0.046882,0.017157,-0.047902,-0.233331,-0.205497,...,-0.188529,0.264739,-0.041105,-0.08507,0.059119,0.164654,-0.108911,-0.040247,0.057874,-0.261846


In [19]:

# Item-based recommendation function
def item_based_rec(picked_userid=1, number_of_similar_items=5, number_of_recommendations =3):
  import operator
  # Products that the target user has not interact with
  picked_userid_unsee = pd.DataFrame(matrix_norm[picked_userid].isna()).reset_index()
  picked_userid_unsee = picked_userid_unsee[picked_userid_unsee[picked_userid]==True]['Product Name'].values.tolist()

  # Products that the target user has interact
  picked_userid_see = pd.DataFrame(matrix_norm[picked_userid].dropna(axis=0, how='all')\
                            .sort_values(ascending=False))\
                            .reset_index()\
                            .rename(columns={picked_userid:'Interaction Type'})
  
  # Dictionary to save the unsee product and predicted a interaction
  interaction_prediction ={}  

  # Loop through unsee products          
  for picked_item in picked_userid_unsee: 
    # Calculate the similarity score of the picked products with other products
    picked_product_similarity_score = item_similarity[[picked_item]].reset_index().rename(columns={picked_item:'similarity_score'})
    # Rank the similarities between the picked user see products and the unsee products.
    picked_userid_see_similarity = pd.merge(left=picked_userid_see, 
                                                right=picked_product_similarity_score, 
                                                on='Product Name', 
                                                how='inner')\
                                        .sort_values('similarity_score', ascending=False)[:number_of_similar_items]
    # Calculate the predicted interaction using weighted average of similarity scores and the interactions from user
    predicted_interaction = round(np.average(picked_userid_see_similarity["Interaction Type"], 
                                        weights=picked_userid_see_similarity['similarity_score']), 6)
    # Save the predicted interaction in the dictionary
    interaction_prediction[picked_item] = predicted_interaction
    # Return the top recommended products
  return sorted(interaction_prediction.items(), key=operator.itemgetter(1), reverse=True)[:number_of_recommendations]

# Get recommendations
recommended_product = item_based_rec(picked_userid=97, number_of_similar_items=10, number_of_recommendations =3)
print(recommended_product)

[('Case Logic 2.4GHz Wireless Keyboard', 0.519904), ('Hot File 7-Pocket, Floor Stand', 0.492073), ('Eldon 200 Class Desk Accessories, Burgundy', 0.364229)]
