# 📊 Sales Data Analysis
This notebook demonstrates data manipulation and visualization using **pandas**, **matplotlib.pyplot**, and **matplotlib.dates**.

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.dates as mdates

In [None]:
df = pd.read_csv('sales_data.csv', parse_dates=['Date'])
df.head()

In [None]:
df.info()

In [None]:
df.describe()

In [None]:
revenue_by_product = df.groupby('Product')['Revenue'].sum().sort_values()
revenue_by_product.plot(kind='barh', title='Total Revenue by Product', color='skyblue')
plt.xlabel('Revenue')
plt.tight_layout()
plt.show()

In [None]:
daily_revenue = df.groupby('Date')['Revenue'].sum()
fig, ax = plt.subplots(figsize=(10, 5))
ax.plot(daily_revenue.index, daily_revenue.values, label='Total Revenue')
ax.set_title('Daily Revenue Trend')
ax.set_xlabel('Date')
ax.set_ylabel('Revenue')
ax.xaxis.set_major_locator(mdates.MonthLocator())
ax.xaxis.set_major_formatter(mdates.DateFormatter('%b %Y'))
ax.tick_params(axis='x', rotation=45)
ax.grid(True)
plt.tight_layout()
plt.legend()
plt.show()

In [None]:
fig, ax = plt.subplots(figsize=(12, 6))
for product in df['Product'].unique():
    product_data = df[df['Product'] == product].groupby('Date')['Revenue'].sum()
    ax.plot(product_data.index, product_data.values, label=product)
ax.set_title('Revenue Over Time by Product')
ax.set_xlabel('Date')
ax.set_ylabel('Revenue')
ax.xaxis.set_major_locator(mdates.MonthLocator())
ax.xaxis.set_major_formatter(mdates.DateFormatter('%b %Y'))
ax.tick_params(axis='x', rotation=45)
ax.grid(True)
plt.legend()
plt.tight_layout()
plt.show()

## 🧹 Data Cleaning and Transformation

In [None]:
# Check for missing values
df.isna().sum()

In [None]:
# Add a new column for weekday
df['Weekday'] = df['Date'].dt.day_name()

# Use .assign to add profit (example: assume fixed cost per unit)
df = df.assign(CostPerUnit=20, Profit=lambda d: d['Revenue'] - d['CostPerUnit'] * d['Units Sold'])
df.head()

## 📊 Aggregations and Pivot Tables

In [None]:
# Pivot table: Total Revenue by Product and Month
df['Month'] = df['Date'].dt.to_period('M')
pivot = df.pivot_table(values='Revenue', index='Month', columns='Product', aggfunc='sum')
pivot

In [None]:
# Plot the pivot table
pivot.plot(figsize=(10, 6), marker='o', title='Monthly Revenue by Product')
plt.ylabel('Revenue')
plt.xlabel('Month')
plt.grid(True)
plt.tight_layout()
plt.show()

## 📈 Rolling Averages

In [None]:
# Rolling average of revenue
rolling_avg = df.groupby('Date')['Revenue'].sum().rolling(window=7).mean()
rolling_avg.plot(figsize=(10, 5), title='7-Day Rolling Average of Revenue')
plt.grid(True)
plt.tight_layout()
plt.show()

## 🧮 Value Counts and Crosstab

In [None]:
# How often is each product sold per weekday?
pd.crosstab(df['Weekday'], df['Product'])

In [None]:
# Value counts for weekday sales frequency
df['Weekday'].value_counts().sort_index()