3. forecasting_notebook.ipynb
Here’s an updated Jupyter notebook that includes more detailed exploratory data analysis (EDA) and model training:

In [None]:
# Sales Forecasting Analysis

## 1. Import Libraries

```python
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error
from sklearn.preprocessing import LabelEncoder


2. Load and Explore Data

In [None]:
# Load the dataset
data = pd.read_csv('../data/sales_data.csv', parse_dates=['date'])

# Display basic info
data.info()

# Show the first few rows
data.head()

# Check for missing values
data.isnull().sum()

# Summary statistics
data.describe()


3. Exploratory Data Analysis

In [None]:
# Plot units sold over time
plt.figure(figsize=(12, 6))
plt.plot(data['date'], data['units_sold'], alpha=0.5)
plt.title('Units Sold Over Time')
plt.xlabel('Date')
plt.ylabel('Units Sold')
plt.show()

# Plot revenue distribution
plt.figure(figsize=(12, 6))
sns.histplot(data['revenue'], bins=50, kde=True)
plt.title('Revenue Distribution')
plt.xlabel('Revenue')
plt.ylabel('Frequency')
plt.show()

# Plot average revenue by weather condition
plt.figure(figsize=(12, 6))
sns.boxplot(x='weather_condition', y='revenue', data=data)
plt.title('Revenue by Weather Condition')
plt.xlabel('Weather Condition')
plt.ylabel('Revenue')
plt.show()


4. Feature Engineering

In [None]:
# Create additional features
data['month'] = data['date'].dt.month
data['day_of_week'] = data['date'].dt.dayofweek

# Encode categorical features
label_encoders = {}
for column in ['store_id', 'product_id', 'weather_condition', 'season']:
    le = LabelEncoder()
    data[column] = le.fit_transform(data[column])
    label_encoders[column] = le


5. Train-Test Split

In [None]:
# Define features and target
features = ['store_id', 'product_id', 'units_sold', 'discount_rate', 'holiday_flag', 'weather_condition', 'advertisement_spend', 'month', 'day_of_week', 'season']
X = data[features]
y = data['revenue']

# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


6. Build and Evaluate Model

In [None]:
# Initialize and train model
model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Make predictions
y_pred = model.predict(X_test)

# Evaluate the model
mae = mean_absolute_error(y_test, y_pred)
print(f'Mean Absolute Error: {mae}')


7. Feature Importances

In [None]:
# Plot feature importances
importances = model.feature_importances_
indices = np.argsort(importances)[::-1]

plt.figure(figsize=(12, 8))
plt.title('Feature Importances')
plt.bar(range(X.shape[1]), importances[indices], align='center')
plt.xticks(range(X.shape[1]), np.array(features)[indices], rotation=90)
plt.xlim([-1, X.shape[1]])
plt.show()


Final Directory Structure

In [None]:
predictive_analytics_forecasting/
│   ├── README.md
│   ├── data/
│   │   ├── sales_data.csv
│   │   └── forecast_results.csv (Generated after running the model)
│   ├── code/
│   │   └── forecast_model.py
│   └── notebooks/
│       └── forecasting_notebook.ipynb
└── generate_data.py
