# Import Libraries

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import os
from sklearn.preprocessing import MinMaxScaler

# Load Data

In [None]:
file_path = r"D:\Project\sales-analysis\data\AusApparalSales4thQrt2020.csv"
df = pd.read_csv(file_path, parse_dates=['Date'])

# Data Wrangling

In [None]:
# Check missing values
print("Missing values summary:")
print(df.isna().sum())

# Handle missing values
df_clean = df.dropna(subset=['State', 'Sales', 'Units', 'Group'])

# Normalization
scaler = MinMaxScaler()
df_clean[['Sales_norm', 'Units_norm']] = scaler.fit_transform(df_clean[['Sales', 'Units']])

# Data Analysis

In [None]:
# Descriptive statistics
desc_stats = df_clean[['Sales', 'Units']].describe()
desc_stats.loc['mode'] = df_clean[['Sales', 'Units']].mode().iloc[0]

# Sales performance
state_sales = df_clean.groupby('State')['Sales'].sum().sort_values(ascending=False)
group_sales = df_clean.groupby('Group')['Sales'].sum().sort_values(ascending=False)

# Visualization

In [None]:
# Configure plots
%matplotlib inline
plt.figure(figsize=(15, 10))

# State-wise sales
plt.subplot(2,2,1)
state_group_sales = df_clean.groupby(['State', 'Group'])['Sales'].sum().unstack()
state_group_sales.plot(kind='bar', stacked=True)
plt.title('State-wise Sales by Group')

# Time analysis
plt.subplot(2,2,2)
df_clean['Hour'] = pd.to_datetime(df_clean['Time']).dt.hour
sns.lineplot(x='Hour', y='Sales', data=df_clean)
plt.title('Hourly Sales Pattern')

# Generate Reports

In [None]:
# Create output directory
os.makedirs(r"D:\Project\sales-analysis\reports", exist_ok=True)

# Save reports
df_clean.to_csv(r"D:\Project\sales-analysis\reports\cleaned_data.csv", index=False)