#  AAL Sales Analysis (Q4 2020)

This notebook analyzes AAL's Q4 2020 sales data by state and demographic group to assist in strategic planning for expansion.

In [None]:

import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings('ignore')
from scipy import stats
from sklearn.preprocessing import MinMaxScaler


In [None]:

# Load dataset
data = pd.read_csv('AusApparalSales4thQrt2020.csv')
data.head()


##  Data Wrangling
Check for missing values and normalize relevant numeric columns.

In [None]:

# Check missing values
data.isna().sum()


In [None]:

# Normalize numerical features
scaler = MinMaxScaler()
if 'Sales' in data.columns and 'Units' in data.columns:
    data[['Sales', 'Units']] = scaler.fit_transform(data[['Sales', 'Units']])
data.head()


GroupBy is effective for aggregating data, e.g., `data.groupby('State')['Sales'].sum()` gives total sales by state.

##  Data Analysis
Descriptive statistics and identifying top/bottom performers.

In [None]:

# Descriptive statistics
data[['Sales', 'Units']].describe()


In [None]:

# Highest and lowest sales by state
state_sales = data.groupby('State')['Sales'].sum().sort_values(ascending=False)
state_sales


In [None]:

# Convert to datetime and extract time features
data['Date'] = pd.to_datetime(data['Date'], errors='coerce')
data['Week'] = data['Date'].dt.isocalendar().week
data['Month'] = data['Date'].dt.month
data['Quarter'] = data['Date'].dt.quarter


In [None]:

# Generate reports
weekly_report = data.groupby('Week')[['Sales', 'Units']].sum()
monthly_report = data.groupby('Month')[['Sales', 'Units']].sum()
quarterly_report = data.groupby('Quarter')[['Sales', 'Units']].sum()
weekly_report, monthly_report, quarterly_report


## Visualization Dashboard

In [None]:

# State-wise sales by demographic
plt.figure(figsize=(12,6))
sns.barplot(x='State', y='Sales', hue='Demographic_Group', data=data)
plt.title('State-wise Sales by Demographic Group')
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()


In [None]:

# Group-wise total sales
group_sales = data.groupby('Demographic_Group')['Sales'].sum().reset_index()
sns.barplot(data=group_sales, x='Demographic_Group', y='Sales')
plt.title('Total Sales by Demographic Group')
plt.show()


In [None]:

# Sales by hour (if time available)
if 'Hour' not in data.columns:
    data['Hour'] = data['Date'].dt.hour
sns.histplot(data['Hour'], bins=24, kde=True)
plt.title('Sales Distribution by Hour')
plt.xlabel('Hour of Day')
plt.ylabel('Sales Frequency')
plt.show()


In [None]:

# Box plot for sales distribution
sns.boxplot(x='Demographic_Group', y='Sales', data=data)
plt.title('Sales Distribution by Demographic Group')
plt.show()


##  Final Insights
- States with top and bottom sales identified.
- Demographic group performance visualized.
- Time-of-day analysis highlights peak hours.

###  Recommendations:
- Focus marketing on underperforming states.
- Launch personalized campaigns during peak hours.
- Use Seaborn for advanced statistical visualizations.