# Day_03 – Rossmann Sales Forecasting and Store Performance Analysis

### 📌 Objective:
- Clean and explore Rossmann store data
- Analyze patterns in sales, promotions, holidays, and store types
- Prepare insights for Power BI dashboard


In [None]:
# 📦 Importing Libraries
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np

sns.set(style="whitegrid")

In [None]:
# 📂 Load Dataset
df = pd.read_csv('Dataset/Rossmann.csv', low_memory=False)
df.head()

In [None]:
# 🧼 Basic Data Cleaning
df['Date'] = pd.to_datetime(df['Date'])
df.fillna({'Open': 1, 'Promo': 0, 'SchoolHoliday': 0, 'CompetitionDistance': df['CompetitionDistance'].median()}, inplace=True)
df.dropna(inplace=True)
df.info()

In [None]:
# 🧠 Feature Engineering
df['Year'] = df['Date'].dt.year
df['Month'] = df['Date'].dt.month
df['Day'] = df['Date'].dt.day
df['WeekOfYear'] = df['Date'].dt.isocalendar().week
df['Sales_per_Customer'] = df['Sales'] / df['Customers']
df.head()

In [None]:
# 📊 Sales Distribution by StoreType
plt.figure(figsize=(8,6))
sns.boxplot(x='StoreType', y='Sales', data=df)
plt.title('Sales Distribution by StoreType')
plt.show()

In [None]:
# 📈 Monthly Sales Trend
monthly_sales = df.groupby(['Year', 'Month'])['Sales'].sum().reset_index()
monthly_sales['Date'] = pd.to_datetime(monthly_sales[['Year', 'Month']].assign(DAY=1))

plt.figure(figsize=(12,6))
sns.lineplot(x='Date', y='Sales', data=monthly_sales)
plt.title('Monthly Sales Trend')
plt.xlabel('Date')
plt.ylabel('Total Sales')
plt.show()

In [None]:
# 🎯 Promo vs Non-Promo Sales
promo_comparison = df.groupby('Promo')['Sales'].mean().reset_index()
sns.barplot(x='Promo', y='Sales', data=promo_comparison)
plt.title('Average Sales: Promo vs No Promo')
plt.xticks([0, 1], ['No Promo', 'Promo'])
plt.show()