In [5]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import os

# Set Seaborn style
sns.set(style="whitegrid")

# Ensure charts folder exists
os.makedirs("charts", exist_ok=True)

# ✅ Load dataset from local file
file_path = "Sample - Superstore.csv"  # Ensure this file exists in the same directory

try:
    df = pd.read_csv(file_path, encoding='ISO-8859-1')
    print("✅ Dataset loaded successfully!")
except FileNotFoundError:
    print(f"❌ File not found: {file_path}")
    raise
except Exception as e:
    print("❌ Error loading dataset:", e)
    raise

# ✅ Preprocessing
df['Order Date'] = pd.to_datetime(df['Order Date'], errors='coerce')
df.dropna(subset=['Order Date'], inplace=True)
df['YearMonth'] = df['Order Date'].dt.to_period('M').astype(str)

# 1. Sales and Profit by Category
category_profit = df.groupby('Category')[['Sales', 'Profit']].sum()
category_profit.plot(kind='bar', figsize=(8, 5), rot=0)
plt.title('Sales and Profit by Category')
plt.ylabel('Amount')
plt.tight_layout()
plt.savefig("charts/sales_profit_by_category.png")
plt.close()

# 2. Sales by Region
plt.figure(figsize=(7, 5))
sns.barplot(data=df, x='Region', y='Sales', estimator=sum, errorbar=None)
plt.title('Total Sales by Region')
plt.ylabel('Total Sales')
plt.xticks(rotation=45)
plt.tight_layout()
plt.savefig("charts/sales_by_region.png")
plt.close()

# 3. Monthly Sales Trend
monthly_sales = df.groupby('YearMonth')['Sales'].sum().reset_index()
plt.figure(figsize=(10, 5))
sns.lineplot(data=monthly_sales, x='YearMonth', y='Sales', marker='o')
plt.title('Monthly Sales Trend')
plt.xlabel('Month')
plt.ylabel('Sales')
plt.xticks(rotation=45)
plt.tight_layout()
plt.savefig("charts/monthly_sales_trend.png")
plt.close()

# 4. Discount vs Profit Scatter Plot
plt.figure(figsize=(8, 5))
sns.scatterplot(data=df, x='Discount', y='Profit', hue='Category', alpha=0.6)
plt.title('Discount vs Profit by Category')
plt.tight_layout()
plt.savefig("charts/discount_vs_profit.png")
plt.close()

# 5. Profit Heatmap by Sub-Category & Region
pivot = df.pivot_table(values='Profit', index='Sub-Category', columns='Region', aggfunc='sum')
plt.figure(figsize=(10, 6))
sns.heatmap(pivot, annot=True, fmt='.0f', cmap='coolwarm', center=0)
plt.title('Profit Heatmap: Sub-Category vs Region')
plt.tight_layout()
plt.savefig("charts/profit_heatmap.png")
plt.close()

# 6. Top 10 Products by Sales
top_products = df.groupby('Product Name')['Sales'].sum().sort_values(ascending=False).head(10)
plt.figure(figsize=(10, 6))
sns.barplot(x=top_products.values, y=top_products.index)
plt.title('Top 10 Products by Sales')
plt.xlabel('Sales')
plt.tight_layout()
plt.savefig("charts/top_10_products.png")
plt.close()

print("✅ All charts generated and saved in the 'charts/' folder.")


✅ Dataset loaded successfully!
✅ All charts generated and saved in the 'charts/' folder.
