In [None]:
import pandas as pd
import seaborn as sns
import numpy as np
import matplotlib.pyplot as plt
import re

%matplotlib inline

csv_file = 'cleaned_ebay_deals.csv'
df = pd.read_csv(csv_file)

In [None]:
def prepare_timestamp_and_hour(dataframe):
    dataframe['timestamp'] = pd.to_datetime(dataframe['timestamp'], format='%Y-%m-%d %H:%M:%S')
    dataframe.sort_values(by='timestamp', inplace=True)
    dataframe['hour'] = dataframe['timestamp'].dt.hour
    return dataframe

def get_deals_per_hour(dataframe):
    return dataframe.groupby('hour').size()

def plot_deals_bar_chart(deals_per_hour_series):
    plt.figure(figsize=(10, 6))
    deals_per_hour_series.plot(kind='bar', color='blue', edgecolor='black')
    plt.title('Number of Deals per Hour')
    plt.xlabel('Hour of Day')
    plt.ylabel('Number of Deals')
    plt.xticks(rotation=0)
    plt.tight_layout()
    plt.show()

df = prepare_timestamp_and_hour(df)
deals_per_hour = get_deals_per_hour(df)
plot_deals_bar_chart(deals_per_hour)

In [None]:
plt.figure(figsize=(12, 6))
sns.histplot(df['price'], bins=30, color='orange', edgecolor='black')
plt.title('Distribution of Product Prices - Histogram')
plt.xlabel('Price (USD)')
plt.ylabel('Frequency')
plt.show()

plt.figure(figsize=(8, 6))
sns.boxplot(x=df['price'], color='mediumpurple')
plt.title('Distribution of Product Prices - Boxplot')
plt.xlabel('Price (USD)')
plt.show()

plt.figure(figsize=(8, 6))
sns.scatterplot(x='original_price', y='price', data=df, alpha=0.7, color='darkcyan')
plt.title('Scatter Plot: Original Price vs Price')
plt.xlabel('Original Price (USD)')
plt.ylabel('Price (USD)')
plt.grid(True)
plt.show()

plt.figure(figsize=(12, 6))
sns.histplot(df['discount_percentage'], bins=30, kde=True, color='goldenrod', edgecolor='black')
plt.title('Distribution of Discount Percentage')
plt.xlabel('Discount Percentage (%)')
plt.ylabel('Count')
plt.show()

In [None]:
shipping_counts = df['shipping'].value_counts()
print(shipping_counts)

plt.figure(figsize=(10, 6))
ax = sns.barplot(
    x=shipping_counts.index,
    y=shipping_counts.values,
    palette=['#FF7F50', '#6495ED', '#FFD700', '#90EE90', '#DA70D6'] 
)

plt.title('Frequency of Shipping Options')
plt.xlabel('Shipping Options')
plt.ylabel('Count')
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()

In [None]:
keywords = ["Apple", "Samsung", "Laptop", "iPhone", "Tablet", "Gimbal"]
keyword_counts = {}

for keyword in keywords:
    count = df['title'].str.count(keyword, flags=re.IGNORECASE).sum()
    keyword_counts[keyword] = count

keyword_counts_series = pd.Series(keyword_counts)

plt.figure(figsize=(10, 6))
ax = sns.barplot(
    x=keyword_counts_series.index,
    y=keyword_counts_series.values,
    palette=['#FF6347', '#4682B4', '#32CD32', '#FFD700', '#9370DB', '#20B2AA'] 
)

plt.title('Keyword Frequency in Product Titles')
plt.xlabel('Keyword')
plt.ylabel('Frequency')
plt.tight_layout()
plt.show()

In [None]:
df['discount_abs'] = df['original_price'] - df['price']

plt.figure(figsize=(10, 6))
sns.histplot(
    df['discount_abs'],
    bins=30,
    kde=True,
    color='mediumslateblue',
    edgecolor='black'
)
plt.title('Histogram of Absolute Price Discount')
plt.xlabel('Absolute Discount (USD)')
plt.ylabel('Frequency')
plt.tight_layout()
plt.show()

In [None]:
def get_top_discounted_deals(dataframe, top_n=5):
    top_deals = (
        dataframe.sort_values(by="discount_percentage", ascending=False)
                 .drop_duplicates(subset=["title"])
                 .head(top_n)
    )
    return top_deals

top5_deals = get_top_discounted_deals(df, top_n=5)
display(top5_deals)
