# Content Based Recommendation System

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings("ignore")
from sklearn.cluster import KMeans
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import sigmoid_kernel
import datetime as dt
%matplotlib inline

In [2]:
rating_df = pd.read_csv("Datasets/updated_new.csv")

In [3]:
rating_df = rating_df[rating_df['StockCode'] != "BANK CHARGES"]
rating_df = rating_df[rating_df['StockCode'] != "DOT"]
rating_df = rating_df[rating_df['StockCode'] != "C2"]
rating_df = rating_df[rating_df['StockCode'] != "PADS"]

In [4]:
product_description = rating_df['Description'].drop_duplicates()
product_description

0          WHITE HANGING HEART T-LIGHT HOLDER
1                         WHITE METAL LANTERN
2              CREAM CUPID HEARTS COAT HANGER
3         KNITTED UNION FLAG HOT WATER BOTTLE
4              RED WOOLLY HOTTIE WHITE HEART.
                         ...                 
263907              LETTER "W" BLING KEY RING
263909              LETTER "Z" BLING KEY RING
264772         PINK CRYSTAL SKULL PHONE CHARM
266980     CREAM HANGING HEART T-LIGHT HOLDER
267864            PAPER CRAFT , LITTLE BIRDIE
Name: Description, Length: 3872, dtype: object

In [5]:
tfv =  TfidfVectorizer(min_df=3, max_features=1000, analyzer="word", 
                       stop_words="english", ngram_range = (1, 3),
                      strip_accents='unicode' , token_pattern = r'\w{1,}')

product_description = product_description.fillna(" ")

In [6]:
tfv_matrix = tfv.fit_transform(product_description.head(3000))
tfv_matrix

<3000x1000 sparse matrix of type '<class 'numpy.float64'>'
	with 13941 stored elements in Compressed Sparse Row format>

In [7]:
sig = sigmoid_kernel(tfv_matrix,tfv_matrix)

In [8]:
sig[1]

array([0.76163876, 0.76201381, 0.76159416, ..., 0.76166455, 0.76159416,
       0.76166272])

In [9]:
indices = pd.Series(product_description.head(3872).index,index=product_description[0:3872]).drop_duplicates()

In [10]:
indices.head(10)

Description
WHITE HANGING HEART T-LIGHT HOLDER     0
WHITE METAL LANTERN                    1
CREAM CUPID HEARTS COAT HANGER         2
KNITTED UNION FLAG HOT WATER BOTTLE    3
RED WOOLLY HOTTIE WHITE HEART.         4
SET 7 BABUSHKA NESTING BOXES           5
GLASS STAR FROSTED T-LIGHT HOLDER      6
HAND WARMER UNION JACK                 7
HAND WARMER RED POLKA DOT              8
ASSORTED COLOUR BIRD ORNAMENT          9
dtype: int64

In [21]:
def content_recomm(title,sig=sig):
    ind = indices[title]
    sig_scores = list(enumerate(sig[ind]))
    sig_scores = sorted(sig_scores, key = lambda x: x[1] , reverse=True)
    sig_scores = sig_scores[1:11]
    product_indices = [i[0] for i in sig_scores]
    return product_description.iloc[product_indices]

In [20]:
content_recomm('WHITE HANGING HEART T-LIGHT HOLDER')

[57, 500, 1907, 1036, 165, 1563, 303, 430, 1549, 1908]


['RED HANGING HEART T-LIGHT HOLDER',
 'HEART T-LIGHT HOLDER ',
 'HEART T-LIGHT HOLDER',
 'GLASS HEART T-LIGHT HOLDER ',
 'HANGING HEART ZINC T-LIGHT HOLDER',
 'HANGING HEART JAR T-LIGHT HOLDER',
 'SILVER HANGING T-LIGHT HOLDER',
 'ROTATING LEAVES T-LIGHT HOLDER',
 'PERIWINKLE T-LIGHT HOLDER',
 'FOLK ART METAL HEART T-LIGHT HOLDER']