# **Inventory Demand Forecasting - Model Exploration**

# 1. Setup and Initial Imports

In [None]:
# Basic data manipulation and analysis
import pandas as pd
import numpy as np

# Visualization
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go

# Time series analysis
from statsmodels.tsa.seasonal import seasonal_decompose
from statsmodels.tsa.stattools import adfuller

# Machine learning
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

# Deep learning
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout

# Project modules
import sys
sys.path.append('..')
from src.data_processor import DataProcessor
from src.feature_engineering import AdvancedFeatureEngineer
from src.model_ensemble import ModelEnsemble
from src.evaluator import ModelEvaluator
from src.visualizer import Visualizer

# Settings
import warnings
warnings.filterwarnings('ignore')
plt.style.use('seaborn')
%matplotlib inline

# Display settings
pd.set_option('display.max_columns', None)
pd.set_option('display.float_format', lambda x: '%.3f' % x)

# 2. Data Loading and Initial Exploration

In [None]:
# Initialize processor
processor = DataProcessor()
df = processor.load_data('../data/train.csv')

# Display basic information
print("Dataset Shape:", df.shape)
print("\nData Types:")
print(df.dtypes)
print("\nMissing Values:")
print(df.isnull().sum())

In [None]:
# Basic statistics
df.describe()

In [None]:
# Display first few rows
df.head()

# 3. Time Series Analysis

### 3.1 Sales Distribution and Trends

In [None]:
# Plot overall sales distribution
plt.figure(figsize=(12, 6))
plt.hist(df['sales'], bins=50)
plt.title('Distribution of Sales')
plt.xlabel('Sales')
plt.ylabel('Frequency')
plt.show()

# Plot sales trends
plt.figure(figsize=(15, 6))
daily_sales = df.groupby('date')['sales'].mean()
daily_sales.plot()
plt.title('Average Daily Sales Over Time')
plt.xlabel('Date')
plt.ylabel('Average Sales')
plt.show()

### 3.2 Seasonal Decomposition

In [None]:
# Perform seasonal decomposition
decomposition = seasonal_decompose(daily_sales, period=30)

# Plot components
plt.figure(figsize=(15, 12))

plt.subplot(411)
plt.plot(decomposition.observed)
plt.title('Observed')

plt.subplot(412)
plt.plot(decomposition.trend)
plt.title('Trend')

plt.subplot(413)
plt.plot(decomposition.seasonal)
plt.title('Seasonal')

plt.subplot(414)
plt.plot(decomposition.resid)
plt.title('Residual')

plt.tight_layout()
plt.show()

### 3.3 Store and Item Analysis

In [None]:
# Sales by store
store_sales = df.groupby(['store', 'date'])['sales'].mean().unstack()
plt.figure(figsize=(15, 6))
sns.boxplot(data=store_sales)
plt.title('Sales Distribution by Store')
plt.xticks(rotation=45)
plt.show()

# Top selling items
item_sales = df.groupby('item')['sales'].sum().sort_values(ascending=False)
plt.figure(figsize=(12, 6))
item_sales.head(10).plot(kind='bar')
plt.title('Top 10 Items by Total Sales')
plt.xlabel('Item')
plt.ylabel('Total Sales')
plt.show()

# 4. Feature Engineering

In [None]:
# Initialize feature engineer
engineer = AdvancedFeatureEngineer()
processed_df = engineer.create_features(df)

# Display new features
print("Created Features:")
new_features = set(processed_df.columns) - set(df.columns)
print(sorted(new_features))

### 4.1 Feature Correlations

In [None]:
# Calculate correlations
corr_matrix = processed_df.select_dtypes(include=[np.number]).corr()

# Plot correlation heatmap
plt.figure(figsize=(15, 12))
sns.heatmap(corr_matrix, annot=True, cmap='coolwarm', center=0)
plt.title('Feature Correlations')
plt.show()

# 5. Model Development and Evaluation

### 5.1 Data Preparation

In [None]:
# Split data
train_data = processed_df[processed_df['date'] < '2017-12-01']
test_data = processed_df[processed_df['date'] >= '2017-12-01']

print("Train set shape:", train_data.shape)
print("Test set shape:", test_data.shape)

### 5.2 Model Training and Evaluation

In [None]:
# Initialize ensemble
ensemble = ModelEnsemble()
ensemble.build_models()

# Train models
features = train_data.drop(['date', 'sales'], axis=1)
target = train_data['sales']

for model_name in ensemble.models:
    print(f"Training {model_name}...")
    ensemble.train_model(model_name, features, target)

# Generate predictions
test_features = test_data.drop(['date', 'sales'], axis=1)
predictions = ensemble.ensemble_predict(test_features)

### 5.3 Performance Evaluation

In [None]:
# Initialize evaluator
evaluator = ModelEvaluator()
metrics = evaluator.calculate_metrics(test_data['sales'].values, predictions, 'ensemble')

# Display metrics
print("\nModel Performance Metrics:")
for metric, value in metrics.items():
    print(f"{metric}: {value:.4f}")

### 5.4 Visualization of Results

In [None]:
# Initialize visualizer
visualizer = Visualizer()

# Plot actual vs predicted
fig = visualizer.plot_forecast_comparison(
    test_data['sales'].values,
    {'Ensemble': predictions},
    test_data['date']
)
fig.show()

# Plot residuals analysis
fig = visualizer.plot_residuals_analysis(
    test_data['sales'].values,
    predictions
)
fig.show()

# 6. Feature Importance Analysis

In [None]:
# Get feature importance from XGBoost model
importance_df = engineer.get_feature_importance(
    ensemble.models['xgboost'],
    features.columns
)

# Plot feature importance
visualizer.plot_feature_importance(importance_df)

# 7. Conclusions and Recommendations

### Key findings:

1. Sales show strong seasonal patterns with [describe patterns]
2. Most important features are [list top features]
3. The ensemble model achieves [state performance] accuracy


### Recommendations:

1. Sales show strong seasonal patterns with [describe patterns]
2. Most important features are [list top features]
3. The ensemble model achieves [state performance] accuracy