In [5]:
import streamlit as st
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

# Load data
df = pd.read_csv("product_info.csv")

# Drop missing
df = df.dropna(subset=["brand_name", "primary_category", "rating", "child_max_price"])

# Sidebar filters
st.sidebar.title("Filters")
brands = st.sidebar.multiselect("Select Brands", options=df["brand_name"].unique(), default=df["brand_name"].unique()[:5])
categories = st.sidebar.multiselect("Select Categories", options=df["primary_category"].unique(), default=df["primary_category"].unique()[:3])
price_range = st.sidebar.slider("Price Range", float(df["child_max_price"].min()), float(df["child_max_price"].max()), (10.0, 100.0))

# Filter data
filtered_df = df[
    (df["brand_name"].isin(brands)) &
    (df["primary_category"].isin(categories)) &
    (df["child_max_price"].between(price_range[0], price_range[1]))
]

# Metrics
st.title("Sephora Product Explorer")
st.metric("Average Rating", round(filtered_df["rating"].mean(), 2))
st.metric("Average Price", f"${round(filtered_df['child_max_price'].mean(), 2)}")

# Scatter plot
st.subheader("Rating vs. Price")
fig1, ax1 = plt.subplots()
sns.scatterplot(data=filtered_df, x="child_max_price", y="rating", ax=ax1, color="purple")
st.pyplot(fig1)

# Rating histogram
st.subheader("Rating Distribution")
fig2, ax2 = plt.subplots()
sns.histplot(filtered_df["rating"], bins=20, kde=True, ax=ax2, color="orange")
st.pyplot(fig2)

# Show filtered data
st.subheader("Filtered Products")
st.dataframe(filtered_df[["product_name", "brand_name", "rating", "child_max_price"]].reset_index(drop=True))



DeltaGenerator()