In [29]:
%matplotlib inline
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from scipy import stats
from ast import literal_eval
from sklearn.feature_extraction.text import TfidfVectorizer, CountVectorizer
from sklearn.metrics.pairwise import linear_kernel, cosine_similarity
from nltk.stem.snowball import SnowballStemmer
from nltk.stem.wordnet import WordNetLemmatizer
from nltk.corpus import wordnet

from surprise import SVD
from surprise import Dataset
from surprise.model_selection import cross_validate
import warnings; warnings.simplefilter('ignore')

In [30]:
full_df=pd.read_csv('full_dataset.csv')

In [31]:
full_df['CustomerID'] = full_df['CustomerID'].astype(int)
full_df['InvoiceDate']=pd.to_datetime(full_df['InvoiceDate'])
full_df['InvoiceDate'] = full_df['InvoiceDate'].dt.strftime('%d-%m-%Y')
df=full_df.copy()

In [32]:
df['Desc']=df['Description']
df = df.groupby(['StockCode']).agg({'Quantity': 'sum', 'UnitPrice': 'first','Description':'first','Desc':'first','InvoiceNo':'count'})

# reset the index to make StockCode and Description columns
df = df.reset_index()
df=df[df['Quantity']>0]

In [33]:
stemmer = SnowballStemmer('english')
df['Description']=df['Description'].apply(lambda x: ' '.join([stemmer.stem(word) for word in x.split()]))

In [34]:
df.to_csv('rec.csv', index=False)


In [35]:
ratings=pd.read_csv('product_rating.csv')
from surprise import Dataset, NormalPredictor, Reader

reader = Reader(rating_scale=(1, 10))

data = Dataset.load_from_df(ratings[['CustomerID', 'StockCode', 'purchase_rate']], reader)
svd=SVD()
svd.n_epochs=10
cross_validate( svd,data, measures=['RMSE'], cv=5)

trainset = data.build_full_trainset()
svd.fit(trainset)

<surprise.prediction_algorithms.matrix_factorization.SVD at 0x104575a50>

In [36]:
tf = TfidfVectorizer(analyzer='word',ngram_range=(1, 2),min_df=0, stop_words='english')
tfidf_matrix = tf.fit_transform(df['Description'])
cosine_sim = linear_kernel(tfidf_matrix, tfidf_matrix)
df = df.reset_index()
indices = pd.Series(df.index, index=df['StockCode'])

In [37]:
def get_recommendations(title):
    results=[]
    idx = indices[title]
    sim_scores = list(enumerate(cosine_sim[idx]))
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
    sim_scores = sim_scores[1:10]
    for score in(sim_scores):
        if(score[1]>0):
            results.append(score)
    if(len(results))<1:
        return 'No Such elements like this'
    item_indices = [i[0] for i in results]
       
    return df.loc[item_indices,['StockCode','Desc','UnitPrice']]

In [38]:
x1=(get_recommendations('85211'))

In [54]:
x1=(get_recommendations('16043'))
print(x1.to_string(index=False))


StockCode                             Desc  UnitPrice
    16052         TEATIME PUSH DOWN RUBBER       0.42
   47518F      ICON PLACEMAT POP ART ELVIS       0.42
   46115B GREEN POP ART MAO CUSHION COVER        0.19
    10120                     DOGGY RUBBER       0.21
   16258A   SWIRLY CIRCULAR RUBBERS IN BAG       0.42
   82011B     BATHROOM SCALES RUBBER DUCKS       3.75
   47310M       SMALL POP BOX,FUNKY MONKEY       1.25
    23528                SPACEBOY WALL ART       2.66
    23543              KEEP CALM WALL ART        7.45


In [44]:
def get_scores(title):
    results=[]
    idx = indices[title]
    sim_scores = list(enumerate(cosine_sim[idx]))
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
    sim_scores = sim_scores[1:11]
    for score in(sim_scores):
        if(score[1]>0):
            results.append(score)
    if(len(results))<1:
        return 'No Such elements like this'
    item_indices = [i[0] for i in results]    
    scores = [i[1] for i in results]    
    res=df.loc[item_indices,['StockCode','Desc','UnitPrice']]
    res['score']=scores
    res=res[res['score']>0.2]

    return res

In [84]:
x1=get_scores('85123A')
x1=x1.reset_index()
x1.index+=1
x1[['StockCode','Desc','UnitPrice']]

Unnamed: 0,StockCode,Desc,UnitPrice
1,21733,RED HANGING HEART T-LIGHT HOLDER,2.55
2,21814,HEART T-LIGHT HOLDER,1.45
3,85118,HEART T-LIGHT HOLDER,1.25
4,23087,ZINC HEART T-LIGHT HOLDER,1.25
5,21313,GLASS HEART T-LIGHT HOLDER,0.85
6,84970S,HANGING HEART ZINC T-LIGHT HOLDER,0.64
7,84978,HANGING HEART JAR T-LIGHT HOLDER,1.25
8,71038,WHITE HANGING BEADS CANDLE HOLDER,5.45
9,23273,HEART T-LIGHT HOLDER WILLIE WINKIE,1.65
10,35968,FOLK ART METAL HEART T-LIGHT HOLDER,0.38


In [52]:
def fav_clients(title):
    df=get_scores(title)
    df_clients=pd.DataFrame()
    for item in df['StockCode']:
        y=full_df[full_df['StockCode']==item]
        df_clients=pd.concat([df_clients, y], axis=0)
    merged_df = pd.merge(df_clients, df, on='StockCode')
    merged_df['rate']=merged_df['Quantity']*merged_df['score']
    customer_totals = merged_df.groupby('CustomerID')['rate'].sum()
    top_customers = customer_totals.sort_values(ascending=False).head(5)
    return top_customers

In [68]:
s=fav_clients('16043')

In [69]:
s

CustomerID
13694    171.107709
14298    120.125914
18041     45.985701
14911     32.935641
15241     30.031478
Name: rate, dtype: float64

In [82]:
x = fav_clients('16043').reset_index()
x.index += 1
x=x[['CustomerID']]
x

Unnamed: 0,CustomerID
0,13694
1,14298
2,18041
3,14911
4,15241


In [51]:
def colab_filter(user,title):
    res=get_scores(title)
    ests=[]
    for item in res['StockCode']:
        est=svd.predict(user, item).est
        ests.append(est)
    res['rates']=ests
    res = res.sort_values('rates', ascending=False)
    res=res[['StockCode','Desc','UnitPrice']]
    return res.head()

In [None]:
colab_filter(12971,'16043')