# Athletic Sales Analysis

Analyzing sales data for athletic wear across the United States

In [None]:
import pandas as pd
import numpy as np

## 1. Data Loading and Cleaning

In [None]:
# Load and combine data
df_2020 = pd.read_csv('athletic_sales_2020.csv')
df_2021 = pd.read_csv('athletic_sales_2021.csv')

# Combine datasets
df = pd.concat([df_2020, df_2021], ignore_index=True)

# Convert invoice_date to datetime
df['invoice_date'] = pd.to_datetime(df['invoice_date'])

print('Data info:')
df.info()

## 2. Regional Products Analysis

In [None]:
# Analysis of products sold by region
region_products = pd.pivot_table(df, values='units_sold', index=['region', 'state', 'city'], aggfunc='sum')
region_products = region_products.sort_values('units_sold', ascending=False)

print('Top 5 Regions by Products Sold:')
display(region_products.head())

## 3. Regional Sales Analysis

In [None]:
# Analysis of sales by region
region_sales = pd.pivot_table(df, values='total_sales', index=['region', 'state', 'city'], aggfunc='sum')
region_sales = region_sales.sort_values('total_sales', ascending=False)
region_sales['total_sales'] = region_sales['total_sales'].apply(lambda x: f'${x:,.2f}')

print('Top 5 Regions by Sales:')
display(region_sales.head())

## 4. Retailer Analysis

In [None]:
# Analysis of sales by retailer
retailer_sales = pd.pivot_table(df, values='total_sales', index=['retailer', 'region', 'state', 'city'], aggfunc='sum')
retailer_sales = retailer_sales.sort_values('total_sales', ascending=False)
retailer_sales['total_sales'] = retailer_sales['total_sales'].apply(lambda x: f'${x:,.2f}')

print('Top 5 Retailers by Sales:')
display(retailer_sales.head())

## 5. Women's Athletic Footwear Analysis

In [None]:
# Filter for women's athletic footwear
womens_footwear = df[df['product'].str.contains('Women.*Athletic.*Footwear', case=False)]

# Analysis of women's footwear sales by retailer
womens_sales = pd.pivot_table(womens_footwear, values='total_sales', index=['retailer', 'region', 'state', 'city'], aggfunc='sum')
womens_sales = womens_sales.sort_values('total_sales', ascending=False)
womens_sales['total_sales'] = womens_sales['total_sales'].apply(lambda x: f'${x:,.2f}')

print('Top 5 Retailers for Women\'s Athletic Footwear:')
display(womens_sales.head())

## 6. Time-Based Sales Analysis

In [None]:
# Daily sales analysis
daily_sales = pd.pivot_table(womens_footwear, values='total_sales', index='invoice_date', aggfunc='sum')
daily_sales = daily_sales.sort_values('total_sales', ascending=False)
daily_sales['total_sales'] = daily_sales['total_sales'].apply(lambda x: f'${x:,.2f}')

print('Top 10 Days by Women\'s Athletic Footwear Sales:')
display(daily_sales.head(10))

# Weekly sales analysis
weekly_sales = pd.pivot_table(womens_footwear, values='total_sales', index='invoice_date', aggfunc='sum')
weekly_sales = weekly_sales.resample('W').sum()
weekly_sales = weekly_sales.sort_values('total_sales', ascending=False)
weekly_sales['total_sales'] = weekly_sales['total_sales'].apply(lambda x: f'${x:,.2f}')

print('\nTop 10 Weeks by Women\'s Athletic Footwear Sales:')
display(weekly_sales.head(10))