<a href="https://colab.research.google.com/github/Abubakar-2004/abubakar.github.io/blob/main/Reviews_of_HydraGlow.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# IMPORTING LIBRARIES

In [None]:
import re
import pandas as pd
from collections import Counter
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np

with open("moisturizer_reviews_X.txt", "r", encoding="utf-8") as f:
    moisturizer_reviews_X = f.readlines()

with open("moisturizer_reviews_lnstagram.txt", "r", encoding="utf-8") as f:
    moisturizer_reviews_Instagram = f.readlines()

reviews = moisturizer_reviews_X + moisturizer_reviews_Instagram
print("Total reviews:", len(reviews))

Total reviews: 1465


# Load Dataset


In [None]:
df=pd.read_csv("moisturizer_reviews_X.txt", sep='\t')
df.head()

Unnamed: 0,R001 | HydraGlow Moisturizer | eczema-prone skin | AM | ⭐️⭐️⭐️⭐️⭐️ | decent. too floral / absorbs fast. btw
0,R002 | HydraGlow | eczema-prone skin | before ...
1,R003 | Hydra-Glow | acne-prone skin | morning ...
2,R004 | Hydra-Glow | dry skin | before makeup |...
3,R005 | HydraGlow | oily skin | before makeup |...
4,R006 | Hydra-Glow | normal skin | AM | 5/5 | n...


In [None]:
df = pd.read_csv("moisturizer_reviews_lnstagram.txt", sep='\t')
df.head()

Unnamed: 0,"R001 | HydraGlow | sensitive skin | after shower | 4 stars | It works, but not perfectly. I used it for 30 days and tracked how my skin felt. I"
0,appreciated that separates with primer. It ten...
1,"floral. A plus is that shiny finish. For me, t..."
2,"Under makeup, it reduced the appearance of fla..."
3,due to the inclusion of barrier-friendly compo...
4,sensory irritation for some users.


# SENTIMENT REGEX

In [None]:

positive_re = re.compile(r"\b(good|great|amazing|love|hydrating|smooth|perfect|nice|soft|lightweight)\b", re.I)
negative_re = re.compile(r"\b(bad|greasy|burn|sting|breakout|pimples|irritating|heavy|dry|sticky)\b", re.I)

negation_re = re.compile(r"\b(not|didn't|never)\s+(good|great|like|love|bad)\b", re.I)
mixed_re = re.compile(r"\bbut\b", re.I)

#features
features = {
    "hydration": re.compile(r"hydration|moistur|dry", re.I),
    "texture": re.compile(r"texture|greasy|sticky|lightweight|heavy", re.I),
    "fragrance": re.compile(r"fragrance|scent|smell|perfume", re.I),
    "absorption": re.compile(r"absorb|sink|soak", re.I),
    "acne": re.compile(r"acne|pimple|breakout|zit", re.I),
    "sensitivity": re.compile(r"sensitive|sting|burn|irritat", re.I),
    "packaging": re.compile(r"packaging|bottle|pump|tube", re.I),
    "price": re.compile(r"price|cost|expensive|cheap|overpriced", re.I),
    "spf": re.compile(r"spf|sun|uv", re.I)
}


superiority_re = re.compile(r"best|holy grail|better than|beats|more .* than", re.I)

#superiority
weak_re = re.compile(r"not great|disappoint|overpriced|burn|sting|pimple|greasy", re.I)

# Comparison & brands
compare_re = re.compile(r"better than|worse than|vs|compared to|switched from", re.I)
brands_re = re.compile(r"cerave|cetaphil|olay|neutrogena", re.I)


# SENTIMENT CLASSIFICATION


In [None]:
sentiment_count = Counter()

for r in reviews:
    pos = bool(positive_re.search(r))
    neg = bool(negative_re.search(r))
    negation = bool(negation_re.search(r))
    mixed = bool(mixed_re.search(r))

    if negation:
        sentiment_count["Negative"] += 1
    elif pos and neg or mixed:
        sentiment_count["Mixed"] += 1
    elif pos:
        sentiment_count["Positive"] += 1
    elif neg:
        sentiment_count["Negative"] += 1
    else:
        sentiment_count["Neutral"] += 1

sentiment_count


Counter({'Neutral': 946, 'Negative': 210, 'Mixed': 142, 'Positive': 167})

# Feature Frequency

In [None]:
feature_counts = Counter()

for r in reviews:
    for f, pattern in features.items():
        if pattern.search(r):
            feature_counts[f] += 1

feature_counts


Counter({'hydration': 526,
         'absorption': 95,
         'fragrance': 178,
         'price': 104,
         'acne': 185,
         'spf': 152,
         'packaging': 109,
         'texture': 137,
         'sensitivity': 208})

# Positive & Negative Features


In [None]:
feature_sentiment = {f: {"positive":0, "negative":0} for f in features}

for r in reviews:
    for f, pattern in features.items():
        if pattern.search(r):
            if positive_re.search(r):
                feature_sentiment[f]["positive"] += 1
            if negative_re.search(r):
                feature_sentiment[f]["negative"] += 1

feature_sentiment


{'hydration': {'positive': 192, 'negative': 179},
 'texture': {'positive': 71, 'negative': 90},
 'fragrance': {'positive': 77, 'negative': 43},
 'absorption': {'positive': 32, 'negative': 17},
 'acne': {'positive': 68, 'negative': 43},
 'sensitivity': {'positive': 71, 'negative': 39},
 'packaging': {'positive': 54, 'negative': 28},
 'price': {'positive': 40, 'negative': 26},
 'spf': {'positive': 68, 'negative': 35}}

# Superiorty Decision

In [None]:
superiority_lines = [r for r in reviews if superiority_re.search(r)]
len(superiority_lines)


155

# Weak Points

In [None]:
weak_points = Counter()

for r in reviews:
    if weak_re.search(r):
        for f, pattern in features.items():
            if pattern.search(r):
                weak_points[f] += 1

weak_points


Counter({'fragrance': 52,
         'price': 63,
         'acne': 65,
         'hydration': 143,
         'packaging': 38,
         'texture': 90,
         'absorption': 19,
         'sensitivity': 105,
         'spf': 44})

# Comparision and Brand Count

In [None]:
comparison_count = 0
brand_mentions = Counter()

for r in reviews:
    if compare_re.search(r):
        comparison_count += 1
    brands = brands_re.findall(r.lower())
    for b in brands:
        brand_mentions[b] += 1

comparison_count, brand_mentions


(79, Counter({'cerave': 18, 'cetaphil': 15, 'olay': 10, 'neutrogena': 17}))

# Export Report

In [None]:
report = {
    "Sentiment": dict(sentiment_count),
    "Top Features": dict(feature_counts.most_common(5)),
    "Feature Sentiment": feature_sentiment,
    "Superiority Mentions": {"count": len(superiority_lines)},
    "Weak Points": dict(weak_points),
    "Comparisons": {"count": comparison_count},
    "Brand Mentions": dict(brand_mentions)
}

pd.DataFrame.from_dict(report, orient="index").to_csv("hackathon_report.csv")
report

{'Sentiment': {'Neutral': 946, 'Negative': 210, 'Mixed': 142, 'Positive': 167},
 'Top Features': {'hydration': 526,
  'sensitivity': 208,
  'acne': 185,
  'fragrance': 178,
  'spf': 152},
 'Feature Sentiment': {'hydration': {'positive': 192, 'negative': 179},
  'texture': {'positive': 71, 'negative': 90},
  'fragrance': {'positive': 77, 'negative': 43},
  'absorption': {'positive': 32, 'negative': 17},
  'acne': {'positive': 68, 'negative': 43},
  'sensitivity': {'positive': 71, 'negative': 39},
  'packaging': {'positive': 54, 'negative': 28},
  'price': {'positive': 40, 'negative': 26},
  'spf': {'positive': 68, 'negative': 35}},
 'Superiority Mentions': {'count': 155},
 'Weak Points': {'fragrance': 52,
  'price': 63,
  'acne': 65,
  'hydration': 143,
  'packaging': 38,
  'texture': 90,
  'absorption': 19,
  'sensitivity': 105,
  'spf': 44},
 'Comparisons': {'count': 79},
 'Brand Mentions': {'cerave': 18,
  'cetaphil': 15,
  'olay': 10,
  'neutrogena': 17}}



---

