In [1]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
from random import sample
from surprise import KNNWithMeans
from surprise import Dataset
from surprise import accuracy
from surprise import Reader
import os
import random
from surprise.model_selection import train_test_split
from sklearn.decomposition import TruncatedSVD

In [2]:
raw_data = pd.read_csv("Data/Digital_Music.csv", names=['Item', 'User', 'Rating', 'Timestamp'])
data = raw_data.sample(n=30000)
data.drop(['Timestamp'], axis=1,inplace=True)
data = data[['User', 'Item', 'Rating']]

In [3]:
data

Unnamed: 0,User,Item,Rating
1306297,A1XH98D6JD6HKW,B00H48BXTI,5.0
1340145,A1R8VH3NG80O82,B00J62YPZ4,5.0
1133721,A56WB4XRA3SM1,B008C69LRC,5.0
970003,A30LWFPVNW7MQO,B003LCYG72,3.0
1565428,A3GK52JYZNHNYY,B01DVDLJWA,5.0
...,...,...,...
583064,AYMTGFOQI3ZQ8,B01DLS3HGG,4.0
1182853,A54TCKUG05LTW,B00AOMR7K0,5.0
803051,A2NK4VYMFSUY7P,B0013FYQRE,5.0
507637,A160NCW0GEKJAZ,B00UXOEEQO,5.0


In [4]:
new_df=data.head(10000)
ratings_matrix = new_df.pivot_table(values='Rating', index='User', columns='Item', fill_value=0)
ratings_matrix.head()

Item,0001377647,0618866760,0633076341,0977624617,1582703272,1882513274,1932192077,1934532142,1937509095,278472414X,...,B01HCGZZ40,B01HEIQ1JE,B01HF1L9WY,B01HGWUEQE,B01HHN1R8G,B01HHZO2QS,B01HI945EW,B01HIUYFRS,B01HIY9HSG,B01HJ91B2K
User,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
A0516219AIWHJPISHDZ0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
A1017F8A0W4X1O,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
A101821AVE9NOU,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
A102KMGPKK000Y,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
A102N1O57JP398,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [5]:
X = ratings_matrix.T
X1 = X
X.head()

User,A0516219AIWHJPISHDZ0,A1017F8A0W4X1O,A101821AVE9NOU,A102KMGPKK000Y,A102N1O57JP398,A103JEX1YIUA17,A103JKBYKSSF3V,A103VWISMNIVCG,A103WYBAWMAA59,A103Z3UXM0W2Q4,...,AZUNMZASVESI0,AZV0YCIQA8NQO,AZVAYRBOTPET7,AZVSOV0PCDSVK,AZXWUZ9PPSOTL,AZY1HUEW2XZ6F,AZZ0TPXC0LEJO,AZZJAZJWEX63I,AZZRFRQA9U05K,AZZSKEO8CQK1Y
Item,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1377647,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
618866760,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
633076341,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
977624617,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1582703272,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [6]:
#Decomposing the Matrix

SVD = TruncatedSVD(n_components=10)
decomposed_matrix = SVD.fit_transform(X)
decomposed_matrix.shape

(8557, 10)

In [7]:
correlation_matrix = np.corrcoef(decomposed_matrix)
correlation_matrix.shape

(8557, 8557)

In [8]:
i = X.index[random.randint(0, np.size(correlation_matrix, 0))]

product_names = list(X.index)
product_ID = product_names.index(i)
product_ID

8141

In [9]:
correlation_product_ID = correlation_matrix[product_ID]
correlation_product_ID.shape

(8557,)

In [10]:
Recommend = list(X.index[correlation_product_ID > 0.65])

# Removes the item already bought by the customer
Recommend.remove(i) 

Recommend[0:24]

['0977624617',
 '1932192077',
 '1934532142',
 '5557505946',
 '555807230X',
 '555820690X',
 'B0000041EV',
 'B000007RPD',
 'B000025EWH',
 'B000025FKH',
 'B000026BCL',
 'B000026P8M',
 'B00003IBF6',
 'B00004S9VC',
 'B00004SUCL',
 'B00004UFYR',
 'B0000561W5',
 'B000059O53',
 'B00005FX8C',
 'B000069KOG',
 'B00007EB95',
 'B00008LOAX',
 'B00008OD4P',
 'B000092A6B']