In [None]:
from google.colab import files
uploaded = files.upload()

In [None]:
import pandas as pd
import matplotlib.pyplot as plt


In [None]:
df = pd.read_csv("retail_sales_dataset.csv")

# Quick view
df.head()


In [None]:
df.info()
df.columns


# **Convert Date to real datetime**

In [None]:
df["Date"] = pd.to_datetime(df["Date"])

In [None]:
df.info()

# **Null Value Checking**

In [None]:
df.isna().sum()


# **Bar Chart (Top Categories by Total Sales)**

In [None]:
category_sales = (
    df.groupby("Product Category")["Total Amount"]
    .sum()
    .sort_values(ascending=False)
)

plt.figure(figsize=(8,5))
plt.bar(category_sales.index, category_sales.values)
plt.title("Total Sales by Product Category")
plt.xlabel("Product Category")
plt.ylabel("Total Sales (Total Amount)")
plt.grid(True, axis="y")
plt.show()


# **Line Chart (Monthly Sales Trend)**

In [None]:
df["Month"] = df["Date"].dt.to_period("M").dt.to_timestamp()

monthly_sales = (
    df.groupby("Month")["Total Amount"]
    .sum()
    .sort_index()
)

plt.figure(figsize=(10,5))
plt.plot(monthly_sales.index, monthly_sales.values, marker="o")
plt.title("Monthly Sales Trend")
plt.xlabel("Month")
plt.ylabel("Total Sales (Total Amount)")
plt.grid(True)
plt.show()


In [None]:
peak_month = monthly_sales.idxmax()
peak_value = monthly_sales.max()

plt.figure(figsize=(10,5))
plt.plot(monthly_sales.index, monthly_sales.values, marker="o")
plt.scatter([peak_month], [peak_value], s=150)  # highlight point
plt.title(f"Monthly Sales Trend (Peak: {peak_month.strftime('%b %Y')} = {peak_value})")
plt.xlabel("Month")
plt.ylabel("Total Sales (Total Amount)")
plt.grid(True)
plt.show()


# **Histogram (Distribution of Total Amount)**

In [None]:
plt.figure(figsize=(8,5))
plt.hist(df["Total Amount"], bins=20)
plt.title("Distribution of Total Amount (Transaction Value)")
plt.xlabel("Total Amount")
plt.ylabel("Count of Transactions")
plt.grid(True)
plt.show()


# **Scatter Plot**

In [None]:
plt.figure(figsize=(7,5))
plt.scatter(df["Price per Unit"], df["Total Amount"], alpha=0.6)
plt.title("Price per Unit vs Total Amount")
plt.xlabel("Price per Unit")
plt.ylabel("Total Amount")
plt.grid(True)
plt.show()


# **3 Insights**

1.Category insight: Electronics has the highest total sales, slightly above Clothing, while Beauty is the lowest. This suggests Electronics drives the most revenue overall.

2.Trend insight: Monthly sales peak around May 2023, showing a strong spike compared to other months. Also, January 2024 is extremely low, likely because it contains only a small number of days/partial data.

3.Distribution + correlation insight: The histogram shows many transactions are small, but a few very large transactions exist (outliers). The scatter plot shows Total Amount increases strongly with Price per Unit, meaning expensive items are a major reason for high bills.