In [87]:
import spacy
from textblob import TextBlob
import plotly.express as px
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

In [88]:
file_path = "../data/All_Beauty.jsonl"
df_reviews = pd.read_json(file_path, lines=True)
file_path = "../data/meta_All_Beauty.jsonl"
df_meta = pd.read_json(file_path, lines=True)

In [104]:
# df = df_reviews.join(df_meta, on=["parent_asin"], how="inner")
df = pd.merge(df_reviews, df_meta, on=["parent_asin"], how="inner")

In [105]:
df = df.sample(frac=0.01).reset_index(drop=True)

In [106]:
df.rename(columns={"title_x": "review_title", "title_y": "title", "text": "review"}, inplace=True)
df["review_input"] = df["review_title"] + df["review"]

In [107]:
nlp = spacy.load('en_core_web_sm')

def clean_text(text):
    doc = nlp(text)
    cleaned_tokens = [token.lemma_.lower() for token in doc if not token.is_stop and not token.is_punct and token.is_alpha]
    return " ".join(cleaned_tokens)

In [108]:
def get_sentiment(text):
    blob = TextBlob(text)
    return blob.sentiment.polarity

In [109]:
# df['cleaned_text'] = df['text'].apply(clean_text)

In [110]:
df['sentiment'] = df['review_input'].apply(get_sentiment)

In [111]:
px.histogram(df, x='sentiment', nbins=50, title="Sentiment scores histogram").show()

In [112]:
px.violin(df, x='rating', y='sentiment', title="Violin plots for sentiment by rating", box=True).show()

In [113]:
vectorizer = TfidfVectorizer(stop_words='english', max_df=0.95, min_df=2, ngram_range=(1, 1))
tfidf_matrix = vectorizer.fit_transform(df['review_input'])

In [114]:
cosine_sim = cosine_similarity(tfidf_matrix)

In [115]:
product_id = df["asin"].values[0]

In [116]:
idx = df.index[df['asin'] == product_id][0]

In [121]:
df.loc[idx]

rating                                                               5
review_title                                            Good hairspray
review                          Great hold. Brushes out. No real smell
images_x                                                            []
asin                                                        B00H37H8TE
parent_asin                                                 B09C2ZW2GL
user_id                                   AHYW6FOFR3F2CESDWKS3IAGUS3FA
timestamp                                   2019-01-02 02:26:12.369000
helpful_vote                                                         0
verified_purchase                                                 True
main_category                                               All Beauty
title                        Tigi Bedhead Hard Head Hairspray (6 Pack)
average_rating                                                     4.8
rating_number                                                     7060
featur

In [118]:
top_n = 10

sim_scores = list(enumerate(cosine_sim[idx]))
sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
content_recommendations_idx = [i[0] for i in sim_scores[1:top_n+1]]

In [126]:
content_recommendations_idx

[6733, 6495, 5451, 4147, 4267, 6954, 827, 2619, 5120, 3876]

In [124]:
df.loc[content_recommendations_idx][["title"]].to_dict("records")

[{'title': 'Professional 24 Piece All Natural Makeup Brushes Kit Includes Make Up Brush Leather Organizer Case/Bag - Top Rated Set'},
 {'title': 'Farmona Perfect Beauty Very Mature Skin 60 + Moisturizing Day Cream'},
 {'title': 'Easkep Nail Art Brushes, Acrylic Art Design Dotting Painting Brush Pen Set, Nail Art Tips Builder Brush Nail Painting Pen Set for acrylic application, Nail Art Dust Remover Brushes Powder 6 PCS'},
 {'title': 'Duke Cannon Supply Co. THICK High-Viscosity Body Wash for Men - Smells Like Old Glory - Tobacco, Cedarwood, Amber, 3.4 Fl Oz, Travel Size Mini'},
 {'title': '1 oz, Light Orange - Bargz Perfume - Rossas The Chlo Body Oil For Women Scented Fragrance'},
 {'title': 'Ebeauty Makeup Brushes 24 Piece Set Professional Natural Soft Hair Wooden Handle Cosmetic Tool Foundation Brush Kit with Black Synthetic Leather Case'},
 {'title': 'Beyond The Zone Frozen Stiff Ultimate Hold Hair Spray, 10oz'},
 {'title': 'Tigi Bedhead Hard Head Hairspray (6 Pack)'},
 {'title': "Po