In [None]:

# Demand Forecasting AI Model

## Import Libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error
import matplotlib.pyplot as plt
from xgboost import XGBRegressor


## Load and Preprocess Dataset

def load_and_preprocess(filepath):
    df = pd.read_csv(filepath)
    # Basic cleaning
    df.fillna(method='ffill', inplace=True)
    return df


## Feature Engineering: Lag Features and Rolling Mean

def feature_engineering(df, target_col, lags=3, rolling_window=3):
    for lag in range(1, lags+1):
        df[f'{target_col}_lag_{lag}'] = df[target_col].shift(lag)
    df[f'{target_col}_rolling_mean'] = df[target_col].rolling(window=rolling_window).mean()
    df.dropna(inplace=True)  # drop NA created by shifting/rolling
    return df


## Train Model

def train_model(df, target_col):
    X = df.drop(columns=[target_col])
    y = df[target_col]
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)

    model = XGBRegressor(objective='reg:squarederror',
                         learning_rate=0.1,
                         n_estimators=100,
                         max_depth=5,
                         subsample=0.8,
                         colsample_bytree=0.8,
                         random_state=42)
    model.fit(X_train_scaled, y_train)

    y_pred = model.predict(X_test_scaled)

    mse = mean_squared_error(y_test, y_pred)
    mae = mean_absolute_error(y_test, y_pred)

    print(f'Mean Squared Error: {mse}')
    print(f'Mean Absolute Error: {mae}')

    # Plot actual vs predicted
    plt.figure(figsize=(10,6))
    plt.plot(y_test.values, label='Actual')
    plt.plot(y_pred, label='Predicted')
    plt.legend()
    plt.title('Actual vs Predicted Demand')
    plt.show()

    return model, scaler


# Example usage
# df = load_and_preprocess('your_dataset.csv')
# df = feature_engineering(df, target_col='demand')
# model, scaler = train_model(df, target_col='demand')


In [None]:
'''HI'''

In [None]:

from sklearn.model_selection import GridSearchCV
from sklearn.preprocessing import PolynomialFeatures


def advanced_feature_engineering(df, target_col, lags=3, rolling_window=3):
    df = feature_engineering(df, target_col, lags, rolling_window)
    # Exponential weighted mean
    df[f'{target_col}_ewm'] = df[target_col].ewm(alpha=0.3).mean()
    # Polynomial features for lagged values
    poly = PolynomialFeatures(degree=2, include_bias=False)
    lag_features = [f'{target_col}_lag_{i}' for i in range(1, lags+1)]
    poly_features = poly.fit_transform(df[lag_features])
    poly_feature_names = poly.get_feature_names_out(lag_features)
    poly_df = pd.DataFrame(poly_features, columns=poly_feature_names, index=df.index)
    df = pd.concat([df, poly_df], axis=1)
    return df


def hyperparameter_tuning(X_train, y_train):
    model = XGBRegressor(objective='reg:squarederror', random_state=42)
    param_grid = {
        'n_estimators': [50, 100],
        'max_depth': [3, 5],
        'learning_rate': [0.01, 0.1],
        'subsample': [0.8, 1],
        'colsample_bytree': [0.8, 1]
    }
    grid_search = GridSearchCV(estimator=model, param_grid=param_grid, cv=3, scoring='neg_mean_squared_error', verbose=1)
    grid_search.fit(X_train, y_train)
    print(f'Best parameters: {grid_search.best_params_}')
    return grid_search.best_estimator_


def train_model_advanced(df, target_col):
    X = df.drop(columns=[target_col])
    y = df[target_col]
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)

    model = hyperparameter_tuning(X_train_scaled, y_train)

    y_pred = model.predict(X_test_scaled)

    mse = mean_squared_error(y_test, y_pred)
    mae = mean_absolute_error(y_test, y_pred)

    print(f'Mean Squared Error: {mse}')
    print(f'Mean Absolute Error: {mae}')

    # Residual analysis
    residuals = y_test - y_pred
    plt.figure(figsize=(10,6))
    plt.scatter(y_pred, residuals)
    plt.axhline(y=0, color='r', linestyle='--')
    plt.xlabel('Predicted values')
    plt.ylabel('Residuals')
    plt.title('Residual Analysis')
    plt.show()

    # Plot actual vs predicted
    plt.figure(figsize=(10,6))
    plt.plot(y_test.values, label='Actual')
    plt.plot(y_pred, label='Predicted')
    plt.legend()
    plt.title('Actual vs Predicted Demand')
    plt.show()

    return model, scaler


# Example usage for advanced training
# df = load_and_preprocess('your_dataset.csv')
# df = advanced_feature_engineering(df, target_col='demand')
# model, scaler = train_model_advanced(df, target_col='demand')
