In [1]:
import pandas as pd
import numpy as np
import matplotlib as mpl  
import matplotlib.pyplot as plt  
import seaborn as sns  
import sklearn
from textblob import TextBlob
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import linear_kernel

In [2]:
# Calling Clean Data
data = pd.read_csv('cleaned_data.csv')

In [3]:
data.shape

(19662, 13)

In [4]:
data.head()

Unnamed: 0,Clothing ID,Age,Title,Review Text,Rating,Recommended IND,Positive Feedback Count,Division Name,Department Name,Class Name,Polarity,Subjectivity,Sentiment Label
0,1077,60,Some major design flaws,I had such high hopes for this dress and reall...,3,0,0,General,Dresses,Dresses,0.073675,0.356294,Positive
1,1049,50,My favorite buy!,"I love, love, love this jumpsuit. it's fun, fl...",5,1,0,General Petite,Bottoms,Pants,0.55,0.625,Positive
2,847,47,Flattering shirt,This shirt is very flattering to all due to th...,5,1,6,General,Tops,Blouses,0.512891,0.56875,Positive
3,1080,49,Not for the very petite,"I love tracy reese dresses, but this one is no...",2,0,4,General,Dresses,Dresses,0.17875,0.533125,Positive
4,858,39,Cagrcoal shimmer fun,I aded this in my basket at hte last mintue to...,5,1,1,General Petite,Tops,Knits,0.13375,0.607778,Positive


In [5]:
data.describe()

Unnamed: 0,Clothing ID,Age,Rating,Recommended IND,Positive Feedback Count,Polarity,Subjectivity
count,19662.0,19662.0,19662.0,19662.0,19662.0,19662.0,19662.0
mean,921.297274,43.260808,4.183145,0.818177,2.652477,0.246685,0.557902
std,200.227528,12.258122,1.112224,0.385708,5.834285,0.173206,0.12405
min,1.0,18.0,1.0,0.0,0.0,-0.975,0.0
25%,861.0,34.0,4.0,1.0,0.0,0.139286,0.479
50%,936.0,41.0,5.0,1.0,1.0,0.237373,0.551641
75%,1078.0,52.0,5.0,1.0,3.0,0.346154,0.631481
max,1205.0,99.0,5.0,1.0,122.0,1.0,1.0


In [6]:
data.describe().T.drop('count',axis=1)

Unnamed: 0,mean,std,min,25%,50%,75%,max
Clothing ID,921.297274,200.227528,1.0,861.0,936.0,1078.0,1205.0
Age,43.260808,12.258122,18.0,34.0,41.0,52.0,99.0
Rating,4.183145,1.112224,1.0,4.0,5.0,5.0,5.0
Recommended IND,0.818177,0.385708,0.0,1.0,1.0,1.0,1.0
Positive Feedback Count,2.652477,5.834285,0.0,0.0,1.0,3.0,122.0
Polarity,0.246685,0.173206,-0.975,0.139286,0.237373,0.346154,1.0
Subjectivity,0.557902,0.12405,0.0,0.479,0.551641,0.631481,1.0


In [7]:
data['Clothing ID'].unique()

array([1077, 1049,  847, ...,  721,  262,  522])

In [8]:
data['Clothing ID'].value_counts()

1078    871
862     658
1094    651
1081    487
829     452
       ... 
201       1
1200      1
525       1
1127      1
522       1
Name: Clothing ID, Length: 1095, dtype: int64

In [9]:
data = data[['Clothing ID', 'Title', 'Review Text','Rating']]

In [10]:
# Getting Feedback of customers about Clothing ID
data = data.dropna(subset=['Review Text'])

In [11]:
 # TF-IDF used for feacture Vectorizer scikit-learn
tfidf = TfidfVectorizer(stop_words='english')
tfidf_matrix = tfidf.fit_transform(data['Review Text'])

In [12]:
 #Applying Cosine Similarity
cosine_similarities = linear_kernel(tfidf_matrix, tfidf_matrix)

In [None]:
def recommend_items(Clothing_id, cosine_similarities, data, top_n=5):
    indices = pd.Series(data.index, index=data['Clothing ID']).drop_duplicates()
    if Clothing_id not in indices:
        print(f"Clothing ID {Clothing_id} not found in the data.")
        return None
    
    idx = indices[Clothing_id]
    
    # Locating the cosine similarities for the specific Clothing_id 
    similarity_scores = cosine_similarities[idx, :]
    
    # Displaying the indices of the top_n items with highest similarity 
    top_items_indices = similarity_scores.argsort()[::-1][1:top_n+1].flatten()
    
    return data.iloc[top_items_indices]

Clothing_id = 861  # Selected Clothing ID
recommended_items = recommend_items(Clothing_id, cosine_similarities, data)

if recommended_items is not None:
    print(recommended_items)

    # Visualising the recommended Clothing ID
    plt.figure(figsize=(12, 6))
    plt.bar(recommended_items['Clothing ID'], recommended_items['Rating'], color='red')
    plt.xlabel('Clothing ID')
    plt.ylabel('Rating')
    plt.title(f'Recommended Items for Clothing ID {Clothing_id}')
    plt.xticks(rotation=45)  
    plt.grid(axis='y')  
    plt.show()


       Clothing ID                            Title  \
19661         1104  Please make more like this one!   
14575          820                 Wonderful shirt!   
7012          1092              Fabulous fall dress   
14574          860                     Gorgeous top   
7014          1092              Tons of compliments   
...            ...                              ...   
15674          859             Good just not great.   
5462          1020                             Cute   
16652         1086                   Classic winter   
4811           857     Tee shirt material... blowsy   
18873          861                         Awesome!   

                                             Review Text  Rating  
19661  This dress in a lovely platinum is feminine an...       5  
14575  I love this shirt! the pictures do not do it j...       5  
7012   This dress is beautiful - the pattern is prett...       5  
14574  I purchased this top in the purple. it is much...       5  
7014