# Building Recommendation System

In [None]:
!pip install rake_nltk

In [29]:
import pandas as pd
from rake_nltk import Rake
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.feature_extraction.text import CountVectorizer

#### Importing dataset

In [30]:
dataframe = pd.read_excel('products.xlsx')

dataframe = pd.DataFrame(dataframe)
dataframe.head()

Unnamed: 0,ProductId,Title,Image URL,Division,Category,Sub category,Article type,Offer price,Sale price,Color,Gender
0,E8AC41D107EA5DBB5DF561A684899B38,Color Dose Pearls Bronze 59 Nail Polish,"https://media6.ppl-media.com/tr:h-750,w-750,c-...",Makeup,Nails,Nail Polish,Nail Polish,59,59,Color Dose,women
1,04AE770377AABCCBCCDD5FBC93CC83E1,Rimmel Space Dust Nail Polish - Luna Love #004,"https://media6.ppl-media.com/tr:h-750,w-750,c-...",Makeup,Nails,Nail Polish,Nail Polish,310,310,Rimmel,women
2,21B0CF144B3CFFDDCCE4937C7731A7CE,Color Dose Pearls Copper Rust 58 Nail Polish,"https://media6.ppl-media.com/tr:h-750,w-750,c-...",Makeup,Nails,Nail Polish,Nail Polish,59,59,Color Dose,women
3,129CE0C7596D2D346E5CDC54E55A3A67,Color Dose Matt Nude 25 Nail Polish,"https://media6.ppl-media.com/tr:h-235,w-235,c-...",,Nails,Nail Polish,Nail Polish,59,59,color dose,women
4,A5E51E0F6C9929ECD16D875EC9A4AFA5,SUGAR Tip Tac Toe Nail Lacquer - 032 Mary Popp...,"https://media6.ppl-media.com/tr:h-750,w-750,c-...",Makeup,Nails,Nail Polish,Nail Polish,237,249,Sugar,women


In [31]:
dataframe = dataframe.fillna(' ')

#### Creating Bag of Words

In [32]:
dataframe['bag_of_words'] =dataframe["Division"] + dataframe["Category"] + dataframe["Sub category"] + dataframe["Gender"] 

In [33]:
dataframe = dataframe[["ProductId", "bag_of_words","Title"]]
dataframe.head()

Unnamed: 0,ProductId,bag_of_words,Title
0,E8AC41D107EA5DBB5DF561A684899B38,MakeupNailsNail Polishwomen,Color Dose Pearls Bronze 59 Nail Polish
1,04AE770377AABCCBCCDD5FBC93CC83E1,MakeupNailsNail Polishwomen,Rimmel Space Dust Nail Polish - Luna Love #004
2,21B0CF144B3CFFDDCCE4937C7731A7CE,MakeupNailsNail Polishwomen,Color Dose Pearls Copper Rust 58 Nail Polish
3,129CE0C7596D2D346E5CDC54E55A3A67,NailsNail Polishwomen,Color Dose Matt Nude 25 Nail Polish
4,A5E51E0F6C9929ECD16D875EC9A4AFA5,MakeupNailsNail Polishwomen,SUGAR Tip Tac Toe Nail Lacquer - 032 Mary Popp...


In [34]:
dataframe.set_index('ProductId', inplace = True)
dataframe.head()

Unnamed: 0_level_0,bag_of_words,Title
ProductId,Unnamed: 1_level_1,Unnamed: 2_level_1
E8AC41D107EA5DBB5DF561A684899B38,MakeupNailsNail Polishwomen,Color Dose Pearls Bronze 59 Nail Polish
04AE770377AABCCBCCDD5FBC93CC83E1,MakeupNailsNail Polishwomen,Rimmel Space Dust Nail Polish - Luna Love #004
21B0CF144B3CFFDDCCE4937C7731A7CE,MakeupNailsNail Polishwomen,Color Dose Pearls Copper Rust 58 Nail Polish
129CE0C7596D2D346E5CDC54E55A3A67,NailsNail Polishwomen,Color Dose Matt Nude 25 Nail Polish
A5E51E0F6C9929ECD16D875EC9A4AFA5,MakeupNailsNail Polishwomen,SUGAR Tip Tac Toe Nail Lacquer - 032 Mary Popp...


#### Generating the count vectorizer matrix

In [None]:
count = CountVectorizer()
count_matrix = count.fit_transform(dataframe['bag_of_words'])

# Creating ordered Series
# list to be used later for index matching
indices = pd.Series(dataframe.index)
indices[:5]

#### Generating the cosine similarity matrix

In [36]:
cosine_sim = cosine_similarity(count_matrix, count_matrix)
cosine_sim

array([[1., 1., 1., ..., 0., 0., 0.],
       [1., 1., 1., ..., 0., 0., 0.],
       [1., 1., 1., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 1., 1., 1.],
       [0., 0., 0., ..., 1., 1., 1.],
       [0., 0., 0., ..., 1., 1., 1.]])

In [37]:
# Associating product ids with ordered numerical values
# function will be used to match indices
indices = pd.Series(dataframe.index)

#  defining the function that takes in userid 
# as input and returns the top 10 recommendations
def recommendations(productId, cosine_sim = cosine_sim):
    
    # initializing the empty list of recommended products
    recommended_products = []
    
    # gettin the index of the product that matches the productId
    idx = indices[indices == productId].index[0]

    # creating a Series with the similarity scores in descending order
    score_series = pd.Series(cosine_sim[idx]).sort_values(ascending = False)

    # getting the indices of the 10 most similar products
    top_10_indexes = list(score_series.iloc[1:11].index)
    
    # populating the list with the titles of the best 10 matching products
    for i in top_10_indexes:
        recommended_products.append(list(dataframe.Title)[i])
        
    return recommended_products

#### Output

In [38]:
recommendations('E8AC41D107EA5DBB5DF561A684899B38')

['Stay Quirky Nail Polish, Matte Finish, Pastel - Matt-nificent 1079 (6 ml)',
 'Sally Hansen Triple Shine Nail - Pixie Slicks #200 (10 ml)',
 'SUGAR Tip Tac Toe Nail Lacquer - 004 Mauve Mountains (Dusty Pink)',
 'Avon Nailpaint True Color Nwp+ Golden Wine (8 ml)',
 'Stay Quirky Nail Polish, Mauve - Owned 263 (6 ml)',
 'Sally Hansen Sugar Coat Textured Nail Color - Sweetie #400',
 'Faces Magnetix Nail Lacquer Silver Charm 9 5 ml Diagonal Pattern',
 'Rimmel Lycra Pro Professional Finish - Celebrity Bash #391',
 'Modelones Temperature Change Color Thermal Gel Polish UV Led Nail Gel Polish Soak off UV Gel Nail Polish Chameleon Varnish (#5756) (10 ml)',
 'Yves Rocher Nail Polish Peony 33 (5 ml)']

In [39]:
recommendations('129CE0C7596D2D346E5CDC54E55A3A67')

["Stay Quirky Nail Polish, It'S Pinke 466 (6 ml)",
 'Teen Beauty Wow Factor ! Matte Nail Paints Combo Pack Of Two (9 ml + 9 ml)',
 'LYN Live Your Now Nail Polish In the Nude to Party (8 ml)',
 'Stay Quirky Nail Polish, Maroon Mod 113 (6 ml)',
 'Teen Beauty Matte Nail Polish Hot Chocolate (9 ml)',
 'Stay Quirky Nail Polish, Pink-Iness 403 (6 ml)',
 'Eylina Ultra Hd Nail Polish Melon Red NP004 (9 ml)',
 'Stay Quirky Nail Polish, Hitched To Red 218 (6 ml)',
 'Avon Color Nailwear Pro Plus Sizzling Red (8 ml)',
 'Teen Beauty Breezy Feelings ! Matte Nail Paints Combo Pack Of Two (9 ml + 9 ml)']