# Sales Data Analysis
## Exploratory Data Analysis on Sales Dataset

### 1. Import Libraries

In [None]:
import pandas as pd
import matplotlib.pyplot as plt

### 2. Load Data

In [None]:
# Load the sales data
df = pd.read_csv("data/sales.csv")

### 3. Basic Data Inspection

In [None]:
# Display first few rows
df.head()

In [None]:
# Dataset information
df.info()

In [None]:
# Statistical summary
df.describe()

### 4. Data Preprocessing

In [None]:
# Convert date column to datetime format
df['date'] = pd.to_datetime(df['date'])

# Check for missing values
print("Missing values per column:")
df.isnull().sum()

### 5. Time Series Analysis - Monthly Sales Trend

In [None]:
# Aggregate sales by month
monthly_sales = df.groupby(pd.Grouper(key='date', freq='M'))['sales'].sum()

# Plot monthly trend
plt.figure(figsize=(10, 4))
monthly_sales.plot()
plt.title("Monthly Sales Trend")
plt.xlabel("Date")
plt.ylabel("Sales")
plt.grid(True, alpha=0.3)
plt.show()

### 6. Seasonality Analysis

In [None]:
# Extract month from date
df['month'] = df['date'].dt.month

# Plot average sales by month
plt.figure(figsize=(8, 4))
df.groupby('month')['sales'].mean().plot(kind='bar')
plt.title("Average Sales by Month")
plt.xlabel("Month")
plt.ylabel("Average Sales")
plt.xticks(rotation=0)
plt.grid(True, alpha=0.3, axis='y')
plt.show()

### 7. Outlier Detection

In [None]:
# Boxplot to identify outliers
plt.figure(figsize=(6, 4))
df.boxplot(column='sales')
plt.title("Sales Distribution - Outlier Check")
plt.ylabel("Sales")
plt.show()

### 8. Summary Insights

**Key Observations:**
- Monthly sales trends show [describe pattern]
- Seasonal patterns indicate [describe seasonality]
- Outliers detected: [describe outliers if any]

**Next Steps:**
- Feature engineering for forecasting
- Model building (ARIMA, Prophet, or ML models)
- Evaluate model performance