In [1]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
from random import sample
from surprise import KNNWithMeans
from surprise import Dataset
from surprise import accuracy
from surprise import Reader
import os
import random
from surprise.model_selection import train_test_split
from sklearn.decomposition import TruncatedSVD

In [2]:
raw_data = pd.read_csv("Data/Gift_Cards.csv", names=['Item', 'User', 'Rating', 'Timestamp'])
data = raw_data.sample(n=30000)
data.drop(['Timestamp'], axis=1,inplace=True)
data = data[['User', 'Item', 'Rating']]

In [3]:
data

Unnamed: 0,User,Item,Rating
71353,A35BYNYXN1AXIA,B00BWDH4DK,5.0
90036,AZJPNK73JF3XP,B00G4IW938,5.0
49459,A1694RRHCSBY68,B007V6ETDK,5.0
20420,A3BL3EDEKSNN5O,B004LLIKVU,5.0
42801,AA6Z24GSPHM42,B0066AZGJI,4.0
...,...,...,...
14306,A7W82JXXBPCXB,B004LLJ6XG,5.0
25796,A2MRYNJGI1DQF6,B004Q7CK9M,1.0
14526,ADBT8Z5WQ4ANH,B004LLIL5U,5.0
84303,ASOF9K10JOSWO,B00CT76TBG,5.0


In [4]:
new_df=data.head(10000)
ratings_matrix = new_df.pivot_table(values='Rating', index='User', columns='Item', fill_value=0)
ratings_matrix.head()

Item,1619923009,B001GXRQW0,B001H53QE4,B001M1UVQO,B002BSHDJK,B002DN7XS4,B002NZXF9S,B002O018DM,B002O0536U,B002OOBESC,...,B01GF7GNCA,B01GKWEH64,B01GKWEJTO,B01GKWEPBG,B01GKWETLC,B01GKZ37SA,B01GKZ3LH2,B01GMFJ8JU,B01GP1W4LA,B01H5PPJT4
User,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
A0389233WNHNUK4EAK4Y,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
A0415852RV6U7JD8OV9G,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
A07646477D6302BG20LZ,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
A0962950K4K3R28UIBBB,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
A100WO06OQR8BQ,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [5]:
X = ratings_matrix.T
X1 = X
X.head()

User,A0389233WNHNUK4EAK4Y,A0415852RV6U7JD8OV9G,A07646477D6302BG20LZ,A0962950K4K3R28UIBBB,A100WO06OQR8BQ,A10216DRBUBI66,A103CFCN4S77GK,A103GF8QHXLVWP,A103KP42L3JQSO,A1041TJQASGLI,...,AZQAJXHA6AVDK,AZS0FWMTG19I6,AZVDN7JDJWIGA,AZVIQ5SU7XPD5,AZVLT556GE854,AZVXKT4R1CXJW,AZWLV7TIEY2OK,AZY31SPDXGJQ,AZY9K2FIDN8N8,AZZVLL8QF9TKK
Item,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1619923009,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
B001GXRQW0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
B001H53QE4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
B001M1UVQO,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
B002BSHDJK,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [6]:
#Decomposing the Matrix

SVD = TruncatedSVD(n_components=10)
decomposed_matrix = SVD.fit_transform(X)
decomposed_matrix.shape

(922, 10)

In [7]:
correlation_matrix = np.corrcoef(decomposed_matrix)
correlation_matrix.shape

(922, 922)

In [8]:
i = X.index[random.randint(0, np.size(correlation_matrix, 0))]

product_names = list(X.index)
product_ID = product_names.index(i)
product_ID

592

In [9]:
correlation_product_ID = correlation_matrix[product_ID]
correlation_product_ID.shape

(922,)

In [10]:
Recommend = list(X.index[correlation_product_ID > 0.65])

# Removes the item already bought by the customer
Recommend.remove(i) 

Recommend[0:9]

['B002BSHDJK',
 'B002DN7XS4',
 'B002O0536U',
 'B002PY04EG',
 'B002UKLD7M',
 'B002XNLC04',
 'B002XNOVDE',
 'B00414Y7Y6',
 'B004KNWWOA']