In [1]:
%matplotlib inline

import pandas as pd
from sklearn.cross_validation import train_test_split
import numpy as np
import time
import Recommenders as Recommenders






In [2]:
df = pd.read_csv('./data/finalData.csv')

## Most popular items from the dataset

In [3]:
items_grouped = df.groupby(['Items']).agg({'Quantity': 'count'}).reset_index()
quantity_sum = items_grouped['Quantity'].sum()
items_grouped['percentage']  = items_grouped['Quantity'].div(quantity_sum)*100
items_grouped.sort_values(['Quantity', 'Items'], ascending = [0,1])



Unnamed: 0,Items,Quantity,percentage
3787,WHITE HANGING HEART T-LIGHT HOLDER,1885,0.410213
2806,REGENCY CAKESTAND 3 TIER,1691,0.367995
1790,JUMBO BAG RED RETROSPOT,1618,0.352109
2376,PARTY BUNTING,1484,0.322948
1973,LUNCH BAG RED RETROSPOT,1334,0.290305
3084,SET OF 3 CAKE TINS PANTRY DESIGN,1332,0.289870
2164,NATURAL SLATE HEART CHALKBOARD,1196,0.260273
1965,LUNCH BAG BLACK SKULL.,1178,0.256356
3411,SPOTTY BUNTING,1086,0.236335
1757,JAM MAKING SET WITH JARS,1074,0.233724


## Number of unique users in the dataset

In [4]:
users = df.CustomerID.unique()

In [5]:
len(users)

5534

## Number of unique items in the dataset

In [6]:
items = df.Items.unique()

In [7]:
len(items)

3947

## Item recommender

In [8]:
train_data, test_data = train_test_split(df, test_size = 0.20, random_state=0)
train_data.head(5)

Unnamed: 0,InvoiceNo,StockCode,Items,Quantity,InvoiceDate,UnitPrice,CustomerID,Country
12911,537608.0,22554,PLASTERS IN TIN WOODLAND ANIMALS,1,2010-12-07 13:20:00,1.65,15024.0,United Kingdom
385928,575739.0,23482,PEARLISED IVORY HEART LARGE,1,2011-11-11 09:05:00,3.29,575739.0,United Kingdom
420319,578347.0,82567,"AIRLINE LOUNGE,METAL SIGN",2,2011-11-24 09:26:00,1.63,578347.0,United Kingdom
139152,550500.0,21212,PACK OF 72 RETROSPOT CAKE CASES,1,2011-04-18 15:26:00,1.25,550500.0,United Kingdom
401599,576900.0,22736,RIBBON REEL MAKING SNOWMEN,10,2011-11-17 08:53:00,1.65,15489.0,United Kingdom


## Create an instance of popularity based recommender class

In [9]:
pm = Recommenders.popularity_recommender_py()
pm.create(train_data, 'CustomerID', 'Items')

In [10]:
user_id = users[5]
pm.recommend(user_id)

Unnamed: 0,user_id,Items,score,Rank
3741,16098.0,WHITE HANGING HEART T-LIGHT HOLDER,1497,1.0
2769,16098.0,REGENCY CAKESTAND 3 TIER,1328,2.0
1762,16098.0,JUMBO BAG RED RETROSPOT,1284,3.0
2342,16098.0,PARTY BUNTING,1215,4.0
1941,16098.0,LUNCH BAG RED RETROSPOT,1085,5.0
3043,16098.0,SET OF 3 CAKE TINS PANTRY DESIGN,1029,6.0
2132,16098.0,NATURAL SLATE HEART CHALKBOARD,961,7.0
1933,16098.0,LUNCH BAG BLACK SKULL.,933,8.0
3367,16098.0,SPOTTY BUNTING,886,9.0
1943,16098.0,LUNCH BAG SPACEBOY DESIGN,867,10.0


In [11]:
user_id = users[8]
pm.recommend(user_id)

Unnamed: 0,user_id,Items,score,Rank
3741,16250.0,WHITE HANGING HEART T-LIGHT HOLDER,1497,1.0
2769,16250.0,REGENCY CAKESTAND 3 TIER,1328,2.0
1762,16250.0,JUMBO BAG RED RETROSPOT,1284,3.0
2342,16250.0,PARTY BUNTING,1215,4.0
1941,16250.0,LUNCH BAG RED RETROSPOT,1085,5.0
3043,16250.0,SET OF 3 CAKE TINS PANTRY DESIGN,1029,6.0
2132,16250.0,NATURAL SLATE HEART CHALKBOARD,961,7.0
1933,16250.0,LUNCH BAG BLACK SKULL.,933,8.0
3367,16250.0,SPOTTY BUNTING,886,9.0
1943,16250.0,LUNCH BAG SPACEBOY DESIGN,867,10.0


The above model will be same for all as it is most popular recommendations used for new users

## personalized model to predict items 

### (input -> user)

### (output -> items)

In [12]:
is_model = Recommenders.item_similarity_recommender_py()
is_model.create(train_data, 'CustomerID', 'Items')

In [13]:
user_id = users[5]
user_items = is_model.get_user_items(user_id)
#
print("------------------------------------------------------------------------------------")
print("Training data items for the user userid: %s:" % user_id)
print("------------------------------------------------------------------------------------")

for user_item in user_items:
    print(user_item)

print("----------------------------------------------------------------------")
print("Recommendation process going on:")
print("----------------------------------------------------------------------")

#Recommend items for the user using personalized model
is_model.recommend(user_id)

------------------------------------------------------------------------------------
Training data items for the user userid: 16098.0:
------------------------------------------------------------------------------------
ALARM CLOCK BAKELIKE RED 
INFLATABLE POLITICAL GLOBE 
VINTAGE SNAKES & LADDERS
3 TIER CAKE TIN GREEN AND CREAM
ALARM CLOCK BAKELIKE PINK
BINGO SET
IVORY REFECTORY CLOCK
PINK FLY SWAT
ALARM CLOCK BAKELIKE IVORY
IVORY GIANT GARDEN THERMOMETER
LE GRAND TRAY CHIC SET
ALARM CLOCK BAKELIKE GREEN
ALARM CLOCK BAKELIKE CHOCOLATE
JUMBO SHOPPER VINTAGE RED PAISLEY
SET/5 RED RETROSPOT LID GLASS BOWLS
TOY TIDY PINK POLKADOT
RED METAL BOX TOP SECRET
REVOLVER WOODEN RULER 
RECYCLING BAG RETROSPOT 
ALARM CLOCK BAKELIKE ORANGE
ANTIQUE GLASS DRESSING TABLE POT
S/15 SILVER GLASS BAUBLES IN BAG
CHOCOLATE CALCULATOR
SET 3 WICKER OVAL BASKETS W LIDS
BLUE FLY SWAT
----------------------------------------------------------------------
Recommendation process going on:
--------------------------

Unnamed: 0,user_id,item,score,rank
0,16098.0,JUMBO BAG RED RETROSPOT,0.075109,1
1,16098.0,PACK OF 72 RETROSPOT CAKE CASES,0.074269,2
2,16098.0,SUKI SHOULDER BAG,0.072922,3
3,16098.0,RECIPE BOX PANTRY YELLOW DESIGN,0.072304,4
4,16098.0,SKULL SHOULDER BAG,0.072112,5
5,16098.0,DOLLY GIRL LUNCH BOX,0.072085,6
6,16098.0,LUNCH BAG RED RETROSPOT,0.071943,7
7,16098.0,BOX OF 24 COCKTAIL PARASOLS,0.07148,8
8,16098.0,PHOTO CUBE,0.071246,9
9,16098.0,JUMBO BAG APPLES,0.070518,10


In [14]:


user_id = users[7]
#Fill in the code here
user_items = is_model.get_user_items(user_id)
#
print("------------------------------------------------------------------------------------")
print("Training data items for the user userid: %s:" % user_id)
print("------------------------------------------------------------------------------------")

for user_item in user_items:
    print(user_item)

print("----------------------------------------------------------------------")
print("Recommendation process going on:")
print("----------------------------------------------------------------------")

#Recommend items for the user using personalized model
is_model.recommend(user_id)



------------------------------------------------------------------------------------
Training data items for the user userid: 17420.0:
------------------------------------------------------------------------------------
JUMBO BAG DOLLY GIRL DESIGN
JUMBO BAG SCANDINAVIAN BLUE PAISLEY
VINTAGE BELLS GARLAND
JUMBO BAG SPACEBOY DESIGN
JUMBO BAG PINK VINTAGE PAISLEY
LUNCH BAG SPACEBOY DESIGN 
JUMBO BAG 50'S CHRISTMAS 
JUMBO BAG VINTAGE LEAF
ZINC FOLKART SLEIGH BELLS
TREASURE TIN BUFFALO BILL 
JAM MAKING SET PRINTED
ORGANISER WOOD ANTIQUE WHITE 
JUMBO BAG PEARS
JAM MAKING SET WITH JARS
SET 3 WICKER OVAL BASKETS W LIDS
TREASURE TIN GYMKHANA DESIGN
16 PIECE CUTLERY SET PANTRY DESIGN
LUNCH BAG APPLE DESIGN
WHITE WOOD GARDEN PLANT LADDER
JUMBO BAG APPLES
LUNCH BAG ALPHABET DESIGN
----------------------------------------------------------------------
Recommendation process going on:
----------------------------------------------------------------------
No. of unique items for the user: 21
no. of u

Unnamed: 0,user_id,item,score,rank
0,17420.0,JUMBO BAG ALPHABET,0.148389,1
1,17420.0,JUMBO BAG VINTAGE DOILY,0.14151,2
2,17420.0,JUMBO BAG RED RETROSPOT,0.13584,3
3,17420.0,LUNCH BAG RED RETROSPOT,0.131548,4
4,17420.0,LUNCH BAG WOODLAND,0.130845,5
5,17420.0,LUNCH BAG SUKI DESIGN,0.129479,6
6,17420.0,LUNCH BAG PINK POLKADOT,0.129108,7
7,17420.0,LUNCH BAG CARS BLUE,0.129092,8
8,17420.0,LUNCH BAG BLACK SKULL.,0.127004,9
9,17420.0,JUMBO BAG STRAWBERRY,0.125203,10


In [15]:
is_model.get_similar_items(['LUNCH BAG BLACK SKULL'])

no. of unique items in the training set: 3898
Non zero values in cooccurence_matrix :0


Unnamed: 0,user_id,item,score,rank
0,,SET/4 2 TONE EGG SHAPE MIXING BOWLS,0.0,1
1,,WRAP I LOVE LONDON,0.0,2
2,,ORANGE/FUSCHIA STONES NECKLACE,0.0,3
3,,PURPLE ANEMONE ARTIFICIAL FLOWER,0.0,4
4,,LARGE PARLOUR FRAME,0.0,5
5,,PINK SPOTS CHOCOLATE NESTING BOXES,0.0,6
6,,WRAP ENGLISH ROSE,0.0,7
7,,SET OF 4 KNICK KNACK TINS DOILEY,0.0,8
8,,OCEAN SCENT CANDLE JEWELLED DRAWER,0.0,9
9,,PASTEL PINK PHOTO ALBUM,0.0,10
