In [1]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
from random import sample
from surprise import KNNWithMeans
from surprise import Dataset
from surprise import accuracy
from surprise import Reader
import os
import random
from surprise.model_selection import train_test_split
from sklearn.decomposition import TruncatedSVD

In [2]:
raw_data = pd.read_csv("Data/AMAZON_FASHION.csv", names=['Item', 'User', 'Rating', 'Timestamp'])
data = raw_data.sample(n=30000)
data.drop(['Timestamp'], axis=1,inplace=True)
data = data[['User', 'Item', 'Rating']]

In [3]:
data

Unnamed: 0,User,Item,Rating
423269,A3PRIORGJARWCE,B0068D5I2S,3.0
441997,A2M49TY3ALK2EB,B00ARBUGQK,5.0
439311,AMP3FJ7JCRINJ,B00AA0K0M8,3.0
770668,A1FP4HXOVMQHOF,B019ZYRVWG,5.0
249192,A1JP7OAOTT0V2H,B00SY4TLXG,1.0
...,...,...,...
544179,A10K54UM4UCEKN,B00MI3H5AU,4.0
725438,A3H8MHT2585W5A,B016N70W8I,3.0
120567,A1A6QHU0YFXGI,B00CZ5067A,5.0
254041,A11ATUDRNCYG3O,B00U5QQCI8,3.0


In [4]:
new_df=data.head(10000)
ratings_matrix = new_df.pivot_table(values='Rating', index='User', columns='Item', fill_value=0)
ratings_matrix.head()

Item,1291691480,B00007GDFV,B0000A98WO,B0000AOE9U,B0000EVWYO,B0000EVXVG,B0000U0HBC,B0000ZG44Q,B00011QQB0,B00015VKT4,...,B01HH3WKYQ,B01HH8GOU2,B01HHAKVK4,B01HHJZNBM,B01HHRLGTC,B01HI7K476,B01HITZ0PA,B01HIWLG64,B01HJEOBCC,B01HJEOC9E
User,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
A08261361L1DAN2OSHSZ,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
A0971645OPXCUP95QITK,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
A10074U9M272RL,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
A1008MTX9ZTFEG,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
A100C1Z111U34R,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [5]:
X = ratings_matrix.T
X1 = X
X.head()

User,A08261361L1DAN2OSHSZ,A0971645OPXCUP95QITK,A10074U9M272RL,A1008MTX9ZTFEG,A100C1Z111U34R,A101465AJXVSN5,A101Q01ACJYD9E,A102S25GK6LQPZ,A102VVYZ2AYL9Z,A102W23QVFGNDO,...,AZTXSNXV81KNW,AZUXHVSLZBOM0,AZWEPIW89KBRM,AZWZ5PITVG2O8,AZX1U53TGAXPV,AZX54DTSNCNOK,AZXHLTA2PUB1K,AZYN955ZFYXVW,AZZC7RYE3RMLL,AZZMQ85DPFEG3
Item,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1291691480,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
B00007GDFV,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
B0000A98WO,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
B0000AOE9U,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
B0000EVWYO,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [6]:
#Decomposing the Matrix

SVD = TruncatedSVD(n_components=10)
decomposed_matrix = SVD.fit_transform(X)
decomposed_matrix.shape

(7476, 10)

In [7]:
correlation_matrix = np.corrcoef(decomposed_matrix)
correlation_matrix.shape

(7476, 7476)

In [8]:
i = X.index[random.randint(0, np.size(correlation_matrix, 0))]

product_names = list(X.index)
product_ID = product_names.index(i)
product_ID

2969

In [9]:
correlation_product_ID = correlation_matrix[product_ID]
correlation_product_ID.shape

(7476,)

In [10]:
Recommend = list(X.index[correlation_product_ID > 0.65])

# Removes the item already bought by the customer
Recommend.remove(i) 

Recommend[0:24]

['1291691480',
 'B0000ZG44Q',
 'B00015VKT4',
 'B00062NHH0',
 'B0006HB4XE',
 'B0006SE0WK',
 'B0007MV6PO',
 'B0007NBT2S',
 'B0007OTES8',
 'B0007Y2BL0',
 'B0008F6WMM',
 'B0009A1EA6',
 'B0009A6OZG',
 'B0009JMGM2',
 'B0009JO9NQ',
 'B0009NAGS4',
 'B000CRU952',
 'B000E3AUIA',
 'B000E8J9RI',
 'B000FSIFC2',
 'B000FXSQ2Q',
 'B000G1MHF4',
 'B000GHRZN2',
 'B000HZBFJI']