In [61]:
# import libraries
import pandas as pd
import numpy as np
from sklearn.impute import SimpleImputer
from sklearn.feature_extraction.text import TfidfVectorizer

In [62]:
# loading the dataset
ds = pd.read_csv(r'C:\Users\jayak\OneDrive\Desktop\Data Analytics\Filtering\nike_data_2022_09.csv')

In [63]:
ds.head()

Unnamed: 0,index,url,name,sub_title,brand,model,color,price,currency,availability,description,raw_description,avg_rating,review_count,images,available_sizes,uniq_id,scraped_at
0,0,https://www.nike.com/t/dri-fit-team-minnesota-...,Nike Dri-FIT Team (MLB Minnesota Twins),Men's Long-Sleeve T-Shirt,Nike,14226571,Navy,40.0,USD,InStock,SWEAT-WICKING COMFORT.The Nike Dri-FIT Team (M...,"<div class=""pi-pdpmainbody""><p><b class=""headl...",,,https://static.nike.com/a/images/t_PDP_1280_v1...,S | M | L | XL | 2XL,c3229e54-aa58-5fdd-9f71-fbe66366b2b2,20/09/2022 23:32:28
1,1,https://www.nike.com/t/club-américa-womens-dri...,Club América,Women's Nike Dri-FIT Soccer Jersey Dress,Nike,13814665,Black/Black,90.0,USD,InStock,"Inspired by traditional soccer jerseys, the Cl...","<div class=""pi-pdpmainbody""><br/><p>Inspired b...",5.0,1.0,https://static.nike.com/a/images/t_PDP_1280_v1...,L (12–14),f8ebb2ed-17ae-5719-b750-5ea3ec69b75c,20/09/2022 23:32:40
2,2,https://www.nike.com/t/sportswear-swoosh-mens-...,Nike Sportswear Swoosh,Men's Overalls,Nike,13015648,Black/White,140.0,USD,OutOfStock,WORKING HARD TO KEEP YOU COMFORTABLE.The Nike ...,"<div class=""pi-pdpmainbody""><p><b class=""headl...",4.9,11.0,https://static.nike.com/a/images/t_PDP_1280_v1...,,88120081-e6cb-5399-b9dc-a2d3d5dd5206,20/09/2022 23:33:16
3,3,https://www.nike.com/t/dri-fit-one-luxe-big-ki...,Nike Dri-FIT One Luxe,Big Kids' (Girls') Printed Tights (Extended Size),Nike,13809796,Black/Rush Pink,22.97,USD,OutOfStock,ELEVATED COMFORT GOES FULL BLOOM.The Nike Dri-...,"<div class=""pi-pdpmainbody""><p><b class=""headl...",,,https://static.nike.com/a/images/t_PDP_1280_v1...,,98348cc5-1520-5b6e-a5f6-c42547b6a092,20/09/2022 23:33:17
4,4,https://www.nike.com/t/paris-saint-germain-rep...,Paris Saint-Germain Repel Academy AWF,Big Kids' Soccer Jacket,Nike,13327415,Dark Grey/Black/Siren Red/Siren Red,70.0,USD,InStock,WATER-REPELLENT COVERAGE GETS PSG DETAILS.The ...,"<div class=""pi-pdpmainbody""><p><b class=""headl...",,,https://static.nike.com/a/images/t_PDP_1280_v1...,XS | S | M | L | XL,f15981a5-d8c9-53fa-880d-80606be188fe,20/09/2022 23:33:22


In [64]:
# dropping unnecessary columns
ds.drop(columns=['url','raw_description','images','uniq_id','scraped_at'], axis=1, inplace=True)

In [65]:
ds.head()

Unnamed: 0,index,name,sub_title,brand,model,color,price,currency,availability,description,avg_rating,review_count,available_sizes
0,0,Nike Dri-FIT Team (MLB Minnesota Twins),Men's Long-Sleeve T-Shirt,Nike,14226571,Navy,40.0,USD,InStock,SWEAT-WICKING COMFORT.The Nike Dri-FIT Team (M...,,,S | M | L | XL | 2XL
1,1,Club América,Women's Nike Dri-FIT Soccer Jersey Dress,Nike,13814665,Black/Black,90.0,USD,InStock,"Inspired by traditional soccer jerseys, the Cl...",5.0,1.0,L (12–14)
2,2,Nike Sportswear Swoosh,Men's Overalls,Nike,13015648,Black/White,140.0,USD,OutOfStock,WORKING HARD TO KEEP YOU COMFORTABLE.The Nike ...,4.9,11.0,
3,3,Nike Dri-FIT One Luxe,Big Kids' (Girls') Printed Tights (Extended Size),Nike,13809796,Black/Rush Pink,22.97,USD,OutOfStock,ELEVATED COMFORT GOES FULL BLOOM.The Nike Dri-...,,,
4,4,Paris Saint-Germain Repel Academy AWF,Big Kids' Soccer Jacket,Nike,13327415,Dark Grey/Black/Siren Red/Siren Red,70.0,USD,InStock,WATER-REPELLENT COVERAGE GETS PSG DETAILS.The ...,,,XS | S | M | L | XL


In [66]:
# using Tf-idf to convert text data into numerical data
tfidf = TfidfVectorizer(min_df=2,max_features=None,
                        strip_accents = 'unicode',analyzer='word',token_pattern=r'\w{1,}',
                        ngram_range=(1,3),stop_words='english')
    

In [67]:
# Fitting the TF-IDF on the 'description' column
tfv_matrix = tfidf.fit_transform(ds['description'])

In [68]:
# creating a dataframe from the matrix
from sklearn.metrics.pairwise import sigmoid_kernel
sig = sigmoid_kernel(tfv_matrix,tfv_matrix)

In [69]:
# assigning the indices to the products
indices = pd.Series(ds.index,index=ds['name']).drop_duplicates()

In [70]:
indices

name
Nike Dri-FIT Team (MLB Minnesota Twins)                             0
Club América                                                        1
Nike Sportswear Swoosh                                              2
Nike Dri-FIT One Luxe                                               3
Paris Saint-Germain Repel Academy AWF                               4
                                                                 ... 
Air Jordan XXXV Low DS PF                                         107
Nike College Dri-FIT (UCLA)                                       108
Jordan                                                            109
NFL Los Angeles Rams Nike Vapor Untouchable (Matthew Stafford)    110
Nike Team First (MLB Houston Astros)                              111
Length: 112, dtype: int64

In [71]:
# creating a function to recommend products
def rec(name,sig=sig):
    idx = indices[name]
    sig_score = list(enumerate(sig[idx]))
    sig_scores = sorted(sig_score, key=lambda x: x[1],reverse=True)
    movie_indices = [i[0] for i in sig_scores]
    return ds['name'].iloc[movie_indices]

In [72]:
# checking the recommendation
rec('Nike College Dri-FIT (UCLA)')

108              Nike College Dri-FIT (UCLA)
101    Nike College Dri-FIT (Michigan State)
80                              Nike Dri-FIT
92                          U.S. Academy Pro
66           Nike Dri-FIT Performance Select
                       ...                  
56                             Nike 8-Pound 
67                       Air Jordan 1 Low SE
20      Denver Nuggets Nike x Filip Pagowski
30              Nike Air Force 1 LV8 Utility
104                                     Nike
Name: name, Length: 112, dtype: object