# Customer Segmentation and Sentiment Analysis

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from textblob import TextBlob
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler
import re


In [None]:
# Load the dataset
df = pd.read_csv("../data/Womens Clothing E-Commerce Reviews.csv")
df.head()


In [None]:
def clean_text(text):
    if isinstance(text, str):
        text = text.lower()
        text = re.sub(r"[^a-z\s]", "", text)
        text = re.sub(r"\s+", " ", text).strip()
        return text
    return ""


In [None]:
df["Cleaned_Review"] = df["Review Text"].apply(clean_text)
df["Cleaned_Review"].head()


In [None]:
def get_sentiment(text):
    if text:
        return TextBlob(text).sentiment.polarity
    return 0.0

df["Sentiment"] = df["Cleaned_Review"].apply(get_sentiment)
df["Sentiment"].describe()


In [None]:
features = df[["Age", "Rating", "Positive Feedback Count"]].dropna()
scaler = StandardScaler()
X_scaled = scaler.fit_transform(features)

kmeans = KMeans(n_clusters=3, random_state=42)
labels = kmeans.fit_predict(X_scaled)
features["Cluster"] = labels
features.head()


In [None]:
sns.scatterplot(data=features, x="Age", y="Positive Feedback Count", hue="Cluster", palette="tab10")
plt.title("Customer Clusters")
plt.show()


In [None]:
sns.histplot(df["Sentiment"], kde=True)
plt.title("Sentiment Score Distribution")
plt.show()
