# Collaborative Filtering

In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings("ignore")
from sklearn.cluster import KMeans
from sklearn.feature_extraction.text import TfidfVectorizer
import datetime as dt
%matplotlib inline

In [3]:
df = pd.read_csv("Datasets/data.csv")

In [4]:
df.head(5)

Unnamed: 0,InvoiceNo,StockCode,Description,Quantity,InvoiceDate,UnitPrice,CustomerID,Country
0,536365,85123A,WHITE HANGING HEART T-LIGHT HOLDER,6,12/1/2010 8:26,2.55,17850.0,United Kingdom
1,536365,71053,WHITE METAL LANTERN,6,12/1/2010 8:26,3.39,17850.0,United Kingdom
2,536365,84406B,CREAM CUPID HEARTS COAT HANGER,8,12/1/2010 8:26,2.75,17850.0,United Kingdom
3,536365,84029G,KNITTED UNION FLAG HOT WATER BOTTLE,6,12/1/2010 8:26,3.39,17850.0,United Kingdom
4,536365,84029E,RED WOOLLY HOTTIE WHITE HEART.,6,12/1/2010 8:26,3.39,17850.0,United Kingdom


In [5]:
Sub_df = df[['CustomerID','StockCode','Description']]
Sub_df = Sub_df.drop_duplicates()

In [6]:
rating_df = pd.read_csv("Datasets/updated_new.csv")

In [7]:
rating_df.head()

Unnamed: 0,CustomerID,StockCode,Description,Rating
0,17850,85123A,WHITE HANGING HEART T-LIGHT HOLDER,5
1,17850,71053,WHITE METAL LANTERN,5
2,17850,84406B,CREAM CUPID HEARTS COAT HANGER,2
3,17850,84029G,KNITTED UNION FLAG HOT WATER BOTTLE,5
4,17850,84029E,RED WOOLLY HOTTIE WHITE HEART.,3


In [8]:
rating_df = rating_df[rating_df['StockCode'] != "BANK CHARGES"]
rating_df = rating_df[rating_df['StockCode'] != "DOT"]
rating_df = rating_df[rating_df['StockCode'] != "C2"]
rating_df = rating_df[rating_df['StockCode'] != "PADS"]

In [9]:
rating_df.groupby('Description')['Rating'].mean().sort_values(ascending=False).head()

Description
CHRISTMAS TABLE CANDLE SILVER SPIKE    5.0
MIDNIGHT BLUE CRYSTAL DROP EARRINGS    5.0
BLUE/YELLOW CERAMIC CANDLE HOLDER      5.0
PURPLE ANEMONE ARTIFICIAL FLOWER       5.0
PURPLE CHUNKY GLASS+BEAD NECKLACE      5.0
Name: Rating, dtype: float64

In [10]:
rating_df.groupby('Description')['Rating'].count().sort_values(ascending=False).head()

Description
REGENCY CAKESTAND 3 TIER              881
WHITE HANGING HEART T-LIGHT HOLDER    856
PARTY BUNTING                         708
ASSORTED COLOUR BIRD ORNAMENT         678
SET OF 3 CAKE TINS PANTRY DESIGN      640
Name: Rating, dtype: int64

In [11]:
mean_rating = pd.DataFrame(rating_df.groupby('Description')['Rating'].mean())
mean_rating.head()

Unnamed: 0_level_0,Rating
Description,Unnamed: 1_level_1
4 PURPLE FLOCK DINNER CANDLES,3.387097
50'S CHRISTMAS GIFT BAG LARGE,2.773585
DOLLY GIRL BEAKER,3.06
I LOVE LONDON MINI BACKPACK,2.810345
I LOVE LONDON MINI RUCKSACK,3.0


In [12]:
mean_rating['No. of ratings'] = rating_df.groupby('Description')['Rating'].count()

In [13]:
mean_rating.head()

Unnamed: 0_level_0,Rating,No. of ratings
Description,Unnamed: 1_level_1,Unnamed: 2_level_1
4 PURPLE FLOCK DINNER CANDLES,3.387097,31
50'S CHRISTMAS GIFT BAG LARGE,2.773585,106
DOLLY GIRL BEAKER,3.06,100
I LOVE LONDON MINI BACKPACK,2.810345,58
I LOVE LONDON MINI RUCKSACK,3.0,1


In [14]:
ratings_matrix = rating_df.pivot_table(values="Rating",index="CustomerID",columns="Description")

In [15]:
ratings_matrix

Description,4 PURPLE FLOCK DINNER CANDLES,50'S CHRISTMAS GIFT BAG LARGE,DOLLY GIRL BEAKER,I LOVE LONDON MINI BACKPACK,I LOVE LONDON MINI RUCKSACK,NINE DRAWER OFFICE TIDY,OVAL WALL MIRROR DIAMANTE,RED SPOT GIFT BAG LARGE,SET 2 TEA TOWELS I LOVE LONDON,SPACEBOY BABY GIFT SET,...,ZINC STAR T-LIGHT HOLDER,ZINC SWEETHEART SOAP DISH,ZINC SWEETHEART WIRE LETTER RACK,ZINC T-LIGHT HOLDER STAR LARGE,ZINC T-LIGHT HOLDER STARS LARGE,ZINC T-LIGHT HOLDER STARS SMALL,ZINC TOP 2 DOOR WOODEN SHELF,ZINC WILLIE WINKIE CANDLE STICK,ZINC WIRE KITCHEN ORGANISER,ZINC WIRE SWEETHEART LETTER TRAY
CustomerID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
12346,,,,,,,,,,,...,,,,,,,,,,
12347,,,,,,,,,,,...,,,,,,,,,,
12348,,,,,,,,,,,...,,,,,,,,,,
12349,,,,,,,,,,,...,,,,,,,,,,
12350,,,,,,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
18280,,,,,,,,,,,...,,,,,,,,,,
18281,,,,,,,,,,2.0,...,,,,,,,,,,
18282,,,,,,,,,,,...,,,,,,,,,,
18283,,,,,,,,1.0,,,...,,,,,,,,,,


In [16]:
mean_rating.sort_values('No. of ratings',ascending=False).head()

Unnamed: 0_level_0,Rating,No. of ratings
Description,Unnamed: 1_level_1,Unnamed: 2_level_1
REGENCY CAKESTAND 3 TIER,3.00454,881
WHITE HANGING HEART T-LIGHT HOLDER,3.044393,856
PARTY BUNTING,2.95904,708
ASSORTED COLOUR BIRD ORNAMENT,2.926254,678
SET OF 3 CAKE TINS PANTRY DESIGN,2.98125,640


In [17]:
Regency_cakestand_ratings = ratings_matrix['REGENCY CAKESTAND 3 TIER'] 
Regency_cakestand_ratings.head()

CustomerID
12346    NaN
12347    1.0
12348    NaN
12349    2.0
12350    NaN
Name: REGENCY CAKESTAND 3 TIER, dtype: float64

In [18]:
def recommended_items(item):
    item_ratings = ratings_matrix[item]
    item_ratings_corr = ratings_matrix.corrwith(item_ratings)
    item_ratings_corrdf = pd.DataFrame(item_ratings_corr,columns=["Corrleation"]) 
    item_ratings_corrdf.dropna(inplace=True)
    item_ratings_corrdf = item_ratings_corrdf.join(mean_rating['No. of ratings'])
    item_ratings_corrdf = item_ratings_corrdf[item_ratings_corrdf['No. of ratings']>100].sort_values('Corrleation',ascending=False)
    return item_ratings_corrdf.head(10)

In [19]:
recommended_items('REGENCY CAKESTAND 3 TIER')

Unnamed: 0_level_0,Corrleation,No. of ratings
Description,Unnamed: 1_level_1,Unnamed: 2_level_1
REGENCY CAKESTAND 3 TIER,1.0,881
PACK OF 12 SKULL TISSUES,0.449672,129
BUNDLE OF 3 SCHOOL EXERCISE BOOKS,0.422428,145
HAPPY STENCIL CRAFT,0.408874,171
SWALLOW SQUARE TISSUE BOX,0.408523,103
SMALL PURPLE BABUSHKA NOTEBOOK,0.363655,110
RED RETROSPOT CHILDRENS UMBRELLA,0.360465,138
SET 12 KIDS COLOUR CHALK STICKS,0.353196,193
COLOURING PENCILS BROWN TUBE,0.343098,193
JAM JAR WITH GREEN LID,0.341867,165


In [20]:
recommended_items('SWALLOW SQUARE TISSUE BOX')

Unnamed: 0_level_0,Corrleation,No. of ratings
Description,Unnamed: 1_level_1,Unnamed: 2_level_1
SWALLOW SQUARE TISSUE BOX,1.0,103
SET OF 6 T-LIGHTS SNOWMEN,0.981981,106
PACK OF 12 COLOURED PENCILS,0.95533,162
POPPY'S PLAYHOUSE BATHROOM,0.94388,123
ZINC WILLIE WINKIE CANDLE STICK,0.939336,137
BLUE POLKADOT PLATE,0.888235,107
UNION STRIPE WITH FRINGE HAMMOCK,0.872872,113
SMALL IVORY HEART WALL ORGANISER,0.834441,115
SWALLOW WOODEN CHRISTMAS DECORATION,0.790569,102
HOLIDAY FUN LUDO,0.769897,136
