In [1]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
from random import sample
from surprise import KNNWithMeans
from surprise import Dataset
from surprise import accuracy
from surprise import Reader
import os
import random
from surprise.model_selection import train_test_split
from sklearn.decomposition import TruncatedSVD

In [2]:
raw_data = pd.read_csv("Data/Gift_Cards.csv", names=['Item', 'User', 'Rating', 'Timestamp'])
data = raw_data.sample(n=30000)
data.drop(['Timestamp'], axis=1,inplace=True)
data = data[['User', 'Item', 'Rating']]

In [3]:
data

Unnamed: 0,User,Item,Rating
687,ARW4YCYJJND9,B001GXRQW0,5.0
146795,A251GXMCOU9QWM,B01E4QPDV6,5.0
16365,A37YPXCC8U58UZ,B004LLIKVU,5.0
63066,A6T2B13WYCMJG,B00ADR2XIC,5.0
130232,A1L514XKC5MAB6,B01E4QUN0W,5.0
...,...,...,...
11036,A3OUP6QOEG2GVQ,B004LLIL3M,5.0
45666,A2DH8BIHFB5N4W,B006PJHP62,1.0
12054,A3JFMNXJTAFK1X,B004LLILGO,5.0
43600,ARATOBWQIBY13,B0066AZGD4,5.0


In [4]:
new_df=data.head(10000)
ratings_matrix = new_df.pivot_table(values='Rating', index='User', columns='Item', fill_value=0)
ratings_matrix.head()

Item,B001GXRQW0,B001H53QE4,B002BSHDJK,B002DN7XS4,B002MS7BPA,B002O018DM,B002O0536U,B002OOBESC,B002PY04EG,B002QFXC7U,...,B01GF7GNCA,B01GKWEH64,B01GKWEJTO,B01GKWEPBG,B01GKZ326M,B01GKZ37SA,B01GKZ3SQG,B01GOQHGL4,B01GP1W4LA,B01H5PPJT4
User,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
A0990567BBBHIFRRPVKE,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
A100297S0KWNIB,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
A100H2245MF6NL,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
A100OKR33PD05B,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
A100RJ6JVXR6BP,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [5]:
X = ratings_matrix.T
X1 = X
X.head()

User,A0990567BBBHIFRRPVKE,A100297S0KWNIB,A100H2245MF6NL,A100OKR33PD05B,A100RJ6JVXR6BP,A1011J2YDYORNO,A101MH9F0K7ME5,A101WFNV9RNIHU,A10225934GDDWL2ICVX6,A10230158VUYE548CAI7,...,AZWLV7TIEY2OK,AZWQ56HWVYO0Q,AZX88Q3FW7PLP,AZXBBI0YREY79,AZXLPHKI6Z0LB,AZY5WTW6SEMGY,AZYMTBVQ1HT80,AZZIP8Y3V1Y8I,AZZNK89PXD006,AZZQEHFVAGRLU
Item,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
B001GXRQW0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
B001H53QE4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
B002BSHDJK,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
B002DN7XS4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
B002MS7BPA,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [6]:
#Decomposing the Matrix

SVD = TruncatedSVD(n_components=10)
decomposed_matrix = SVD.fit_transform(X)
decomposed_matrix.shape

(920, 10)

In [7]:
correlation_matrix = np.corrcoef(decomposed_matrix)
correlation_matrix.shape

(920, 920)

In [8]:
i = X.index[random.randint(0, np.size(correlation_matrix, 0))]

product_names = list(X.index)
product_ID = product_names.index(i)
product_ID

439

In [9]:
correlation_product_ID = correlation_matrix[product_ID]
correlation_product_ID.shape

(920,)

In [10]:
Recommend = list(X.index[correlation_product_ID > 0.65])

# Removes the item already bought by the customer
Recommend.remove(i) 

Recommend[0:24]

['B004KNWWRC',
 'B004KNWWRW',
 'B004LLIL5A',
 'B004LLILF0',
 'B005DHN6E2',
 'B005ESMMWW',
 'B0062ON8M0',
 'B007EE5PEQ',
 'B00B2TFSO6',
 'B00BT1XF5Q',
 'B00BXLTJ8O',
 'B00BXLUI5M',
 'B00BXLV55Y',
 'B00CRQ4YDE',
 'B00EDYJC8A',
 'B00FGEHO0O',
 'B00FTGF3P2',
 'B00G4IUQVK',
 'B00G4IURXW',
 'B00O3YIGOG',
 'B00O3YJMS0',
 'B00OGME00O',
 'B00P9VD9ZS',
 'B00PMLDTO6']