In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Aesthetic settings
sns.set_theme(style="whitegrid")
pd.set_option("display.max_columns", None)

# Load the cleaned dataset
df = pd.read_csv("cleaned_ebay_deals.csv")
df.head()


In [None]:
# Convert timestamp and extract hour
df["timestamp"] = pd.to_datetime(df["timestamp"], errors="coerce")
df["hour"] = df["timestamp"].dt.hour

# Plot number of deals per hour
plt.figure(figsize=(10, 5))
sns.countplot(x="hour", data=df, palette="Greens_d")
plt.title("Number of Deals Scraped per Hour")
plt.xlabel("Hour of Day")
plt.ylabel("Count of Deals")
plt.show()


In [None]:
fig, ax = plt.subplots(1, 2, figsize=(14, 5))

# Price distribution
sns.histplot(df["price"], bins=30, kde=True, ax=ax[0])
ax[0].set_title("Distribution of Product Prices")

# Boxplot
sns.boxplot(x=df["price"], ax=ax[1])
ax[1].set_title("Boxplot of Product Prices")

plt.show()


In [None]:
# Original vs Discounted Price
plt.figure(figsize=(7, 5))
sns.scatterplot(x="original_price", y="price", data=df)
plt.title("Original vs Discounted Prices")
plt.xlabel("Original Price ($)")
plt.ylabel("Discounted Price ($)")
plt.show()


In [None]:
# Discount percentage distribution
plt.figure(figsize=(8, 5))
sns.histplot(df["discount_percentage"], bins=30, kde=True, color="green")
plt.title("Distribution of Discount Percentage")
plt.xlabel("Discount (%)")
plt.ylabel("Frequency")
plt.show()


In [None]:
plt.figure(figsize=(10, 5))
shipping_counts = df["shipping"].value_counts()
sns.barplot(x=shipping_counts.index, y=shipping_counts.values, palette="Blues_r")
plt.title("Frequency of Shipping Options")
plt.ylabel("Count")
plt.xlabel("Shipping Info")
plt.xticks(rotation=45, ha="right")
plt.show()


In [None]:
keywords = ["Apple", "Samsung", "Laptop", "iPhone", "Tablet", "Gimbal"]
freq = {k: df["title"].str.contains(k, case=False, na=False).sum() for k in keywords}

plt.figure(figsize=(8, 5))
sns.barplot(x=list(freq.keys()), y=list(freq.values()), palette="Greens_r")
plt.title("Keyword Frequency in Product Titles")
plt.ylabel("Occurrences")
plt.xlabel("Keyword")
plt.show()


In [None]:
keywords = ["Apple", "Samsung", "Laptop", "iPhone", "Tablet", "Gimbal"]
freq = {k: df["title"].str.contains(k, case=False, na=False).sum() for k in keywords}

plt.figure(figsize=(8, 5))
sns.barplot(x=list(freq.keys()), y=list(freq.values()), palette="Greens_r")
plt.title("Keyword Frequency in Product Titles")
plt.ylabel("Occurrences")
plt.xlabel("Keyword")
plt.show()


In [None]:
# Compute absolute price difference
df["price_difference"] = (df["original_price"] - df["price"]).abs()

plt.figure(figsize=(8, 5))
sns.histplot(df["price_difference"], bins=25, kde=True, color="orange")
plt.title("Distribution of Price Differences")
plt.xlabel("Price Difference ($)")
plt.ylabel("Frequency")
plt.show()


In [None]:
# Top 5 highest discount deals
top5 = df.sort_values("discount_percentage", ascending=False).head(5)
top5[["title", "price", "original_price", "discount_percentage"]]