In [10]:
# FINAL WORKING ONE

import os
import joblib
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.preprocessing import MinMaxScaler, LabelEncoder
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

# Setting paths
MODEL_DIR = os.getenv("SM_MODEL_DIR", "model_output")
DATA_PATH = os.getenv("SM_CHANNEL_TRAIN", "demand_forecasting_data.csv")

def safe_save(obj, path):
    os.makedirs(os.path.dirname(path), exist_ok=True)
    joblib.dump(obj, path)

def load_and_preprocess():
    data = pd.read_csv(DATA_PATH)
    data['Marketing_Campaign'].fillna('None', inplace=True)
    
    # Date features
    data['Date'] = pd.to_datetime(data['Date'])
    data['Year'] = data['Date'].dt.year
    data['Month'] = data['Date'].dt.month
    data['Day'] = data['Date'].dt.day
    data['Weekday'] = data['Date'].dt.weekday
    data.drop(columns=['Date'], inplace=True)
    
    # Categorical encoding
    label_encoder = LabelEncoder()
    cat_cols = ['Marketing_Campaign', 'Product_ID', 'Seasonal_Trend']
    for col in cat_cols:
        data[col] = label_encoder.fit_transform(data[col])
    data['Public_Holiday'] = data['Public_Holiday'].astype(int)
    
    safe_save(label_encoder, f"{MODEL_DIR}/diffusion_price_label_encoder.joblib")
    
    # Train-test split
    train_data = data[data['Year'] < 2021]
    test_data = data[data['Year'] == 2021]
    
    # Feature selection for Price prediction
    features = ['Year', 'Month', 'Day', 'Weekday', 'Product_ID',
                'Marketing_Campaign', 'Seasonal_Trend', 'Stock_Availability',
                'Base_Sales', 'Marketing_Effect', 'Seasonal_Effect',
                'Discount', 'Competitor_Price', 'Public_Holiday', 'Demand']
    
    # Feature scaling
    X_scaler = MinMaxScaler(feature_range=(-1, 1))
    X_train = X_scaler.fit_transform(train_data[features])
    X_test = X_scaler.transform(test_data[features])
    safe_save(X_scaler, f"{MODEL_DIR}/diffusion_price_X_scaler.joblib")
    
    # Target scaling for Price
    y_scaler = MinMaxScaler(feature_range=(-1, 1))
    y_train = y_scaler.fit_transform(train_data['Price'].values.reshape(-1, 1))
    y_test = test_data['Price'].values  # Keep original prices for evaluation
    safe_save(y_scaler, f"{MODEL_DIR}/diffusion_price_y_scaler.joblib")
    
    return (X_train.astype(np.float32), X_test.astype(np.float32),
            y_train.astype(np.float32), y_test, features)

class DiffusionModel(nn.Module):
    def __init__(self, input_dim, latent_dim=10):
        super().__init__()
        self.encoder = nn.Sequential(
            nn.Linear(input_dim, 128),
            nn.ReLU(),
            nn.Linear(128, latent_dim),
            nn.ReLU()
        )
        self.decoder = nn.Sequential(
            nn.Linear(latent_dim, 128),
            nn.ReLU(),
            nn.Linear(128, 1)
        )

    def forward(self, x):
        z = self.encoder(x)
        return self.decoder(z)

def train():
    X_train, X_test, y_train, y_test, features = load_and_preprocess()
    
    model = DiffusionModel(input_dim=len(features))
    optimizer = optim.Adam(model.parameters(), lr=1e-3)
    criterion = nn.MSELoss()
    epochs = 500
    
    for epoch in range(epochs):
        model.train()
        permutation = np.random.permutation(len(X_train))
        for i in range(0, len(X_train), 64):
            indices = permutation[i:i+64]
            batch_x, batch_y = X_train[indices], y_train[indices]
            
            optimizer.zero_grad()
            outputs = model(torch.tensor(batch_x).float())
            loss = criterion(outputs, torch.tensor(batch_y).float())
            loss.backward()
            optimizer.step()
        
        if epoch % 50 == 0:
            print(f"Epoch {epoch}: Loss={loss.item():.6f}")

    model.eval()
    with torch.no_grad():
        predictions = model(torch.tensor(X_test).float()).numpy()
        y_pred = joblib.load(f"{MODEL_DIR}/diffusion_price_y_scaler.joblib").inverse_transform(predictions)
        print("MAE:", mean_absolute_error(y_test, y_pred))
        print("MSE:", mean_squared_error(y_test, y_pred))
        print("RMSE:", np.sqrt(mean_squared_error(y_test, y_pred)))
        print("R2:", r2_score(y_test, y_pred))
    
    # Save the model
    torch.save(model.state_dict(), os.path.join(MODEL_DIR, 'diffusion_price_model.pth'))
    print("Model saved.")

    predictions_price = y_pred.flatten()

    # Create DataFrame for saving
    df_price_preds = pd.DataFrame({
        "Date": test_data["Date"],
        "Diffusion_Predicted_Price": predictions_price
    })
    
    # Save to CSV
    output_path_price = os.path.join(MODEL_DIR, "diffusion_price_predictions.csv")
    df_price_preds.to_csv(output_path_price, index=False)
    
    print(f"Diffusion price predictions saved to {output_path_price}")
    
if __name__ == "__main__":
    train()


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  data['Marketing_Campaign'].fillna('None', inplace=True)


Epoch 0: Loss=0.007301
Epoch 50: Loss=0.002225
Epoch 100: Loss=0.000908
Epoch 150: Loss=0.002202
Epoch 200: Loss=0.001570
Epoch 250: Loss=0.000565
Epoch 300: Loss=0.000639
Epoch 350: Loss=0.000818
Epoch 400: Loss=0.000890
Epoch 450: Loss=0.001539
MAE: 1.6141762056007851
MSE: 4.786164563694214
RMSE: 2.187730459561738
R2: 0.9881627800578641
Model saved.
Diffusion price predictions saved to model_output/diffusion_price_predictions.csv


In [11]:
# Imports
import os
import joblib
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.preprocessing import MinMaxScaler, LabelEncoder
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

# Setting paths again
MODEL_DIR = os.getenv("SM_MODEL_DIR", "model_output")
DATA_PATH = os.getenv("SM_CHANNEL_TRAIN", "demand_forecasting_data.csv")

def safe_save(obj, path):
    os.makedirs(os.path.dirname(path), exist_ok=True)
    joblib.dump(obj, path)

# Load and Preprocess 
def load_and_preprocess():
    data = pd.read_csv(DATA_PATH)
    
    # log transform Demand and fill NA for Marketing_Campaign
    data['Demand'] = np.log1p(data['Demand'])
    data['Marketing_Campaign'].fillna('None', inplace=True)
    
    # Date features
    data['Date'] = pd.to_datetime(data['Date'])
    data['Year'] = data['Date'].dt.year
    data['Month'] = data['Date'].dt.month
    data['Day'] = data['Date'].dt.day
    data['Weekday'] = data['Date'].dt.weekday
    data = data.drop(columns=['Date'])
    
    # Categorical encoding
    label_encoder = LabelEncoder()
    cat_cols = ['Marketing_Campaign', 'Product_ID', 'Seasonal_Trend']
    for col in cat_cols:
        data[col] = label_encoder.fit_transform(data[col])
    data['Public_Holiday'] = data['Public_Holiday'].astype(int)
    
    safe_save(label_encoder, f"{MODEL_DIR}/diffusion_label_encoder.joblib")
    
    # Train-test split
    train_data = data[data['Year'] < 2021]
    test_data = data[data['Year'] == 2021]
    
    # Feature selection (for Demand model)
    features = ['Year', 'Month', 'Day', 'Weekday', 'Product_ID',
                'Marketing_Campaign', 'Seasonal_Trend', 'Stock_Availability',
                'Base_Sales', 'Marketing_Effect', 'Seasonal_Effect',
                'Discount', 'Competitor_Price', 'Public_Holiday', 'Price']
    
    # Use MinMaxScaler for X
    X_scaler = MinMaxScaler(feature_range=(-1, 1))
    X_train = X_scaler.fit_transform(train_data[features])
    X_test = X_scaler.transform(test_data[features])
    safe_save(X_scaler, f"{MODEL_DIR}/diffusion_X_scaler.joblib")
    
    # Use MinMaxScaler for y 
    y_scaler = MinMaxScaler(feature_range=(-1, 1))
    y_train = y_scaler.fit_transform(train_data['Demand'].values.reshape(-1, 1))
    safe_save(y_scaler, f"{MODEL_DIR}/diffusion_y_scaler.joblib")
    
    # Return test target in log-domain 
    return (X_train.astype(np.float32), 
            X_test.astype(np.float32),
            y_train.astype(np.float32),
            test_data['Demand'].values,  # log-domain values
            features)

# Model Architecture
class DiffusionModel(nn.Module):
    def __init__(self, input_dim, latent_dim=10):
        super().__init__()
        def init_weights(m):
            if isinstance(m, nn.Linear):
                nn.init.kaiming_normal_(m.weight, mode='fan_in', nonlinearity='relu')
                m.bias.data.fill_(0.01)
        self.encoder = nn.Sequential(
            nn.Linear(input_dim, 128),
            nn.ReLU(),
            nn.Linear(128, latent_dim),
            nn.ReLU()
        )
        self.decoder = nn.Sequential(
            nn.Linear(latent_dim, 128),
            nn.ReLU(),
            nn.Linear(128, 1)
        )
        self.apply(init_weights)
    
    def forward(self, x):
        return self.decoder(self.encoder(x))

# Training and Evaluation
def train():
    X_train, X_test, y_train, y_test, features = load_and_preprocess()
    
    # Convert to tensors
    X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
    y_train_tensor = torch.tensor(y_train, dtype=torch.float32)
    X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
    
    # Model setup
    model = DiffusionModel(input_dim=X_train.shape[1])
    optimizer = optim.Adam(model.parameters(), lr=1e-3, weight_decay=1e-5)
    criterion = nn.MSELoss()
    
    # Training loop with mini-batches
    epochs = 500
    batch_size = 64
    for epoch in range(epochs):
        model.train()
        perm = torch.randperm(X_train_tensor.size(0))
        epoch_loss = 0
        for i in range(0, X_train_tensor.size(0), batch_size):
            batch_x = X_train_tensor[perm[i:i+batch_size]]
            batch_y = y_train_tensor[perm[i:i+batch_size]]
            
            optimizer.zero_grad()
            outputs = model(batch_x)
            loss = criterion(outputs, batch_y)
            loss.backward()
            optimizer.step()
            epoch_loss += loss.item()
        if epoch % 50 == 0:
            print(f"Epoch {epoch}: Loss={epoch_loss/len(X_train_tensor):.6f}")
    
    # Evaluation
    model.eval()
    with torch.no_grad():
        # Load the scaler
        y_scaler = joblib.load(f"{MODEL_DIR}/diffusion_y_scaler.joblib")
        
        # Get predictions (they are in the scaled domain of log-transformed demand)
        y_pred = model(X_test_tensor).numpy()
        
        # Inverse scale to obtain predictions in the log-domain
        y_pred_log = y_scaler.inverse_transform(y_pred)
        
        # Convert predictions back to the original demand scale
        y_pred_original = np.expm1(y_pred_log)
        y_test_original = np.expm1(y_test)  # Since y_test is log1p-transformed
        
    # Compute metrics on original demand values
    print("=== Evaluation Metrics (Original Demand Scale) ===")
    print("MAE:", mean_absolute_error(y_test_original, y_pred_original))
    print("MSE:", mean_squared_error(y_test_original, y_pred_original))
    print("RMSE:", np.sqrt(mean_squared_error(y_test_original, y_pred_original)))
    print("R2:", r2_score(y_test_original, y_pred_original))
    
    # Save the model
    torch.save(model.state_dict(), f"{MODEL_DIR}/diffusion_demand_model.pth")
    print(f"Model saved to {MODEL_DIR}")
    
    # Save Diffusion Demand Predictions as CSV
    raw_data = pd.read_csv(DATA_PATH)
    raw_data["Date"] = pd.to_datetime(raw_data["Date"])
    # Select test data rows (Year == 2021)
    test_data = raw_data[raw_data["Date"].dt.year == 2021].copy()
    
    # The predictions are now in original demand scale; flatten the array
    predictions_demand = y_pred_original.flatten()
    
    # Create DataFrame for saving
    df_demand_preds = pd.DataFrame({
        "Date": test_data["Date"],
        "Diffusion_Predicted_Demand": predictions_demand
    })
    
    # Save to CSV
    output_path_demand = os.path.join(MODEL_DIR, "diffusion_demand_predictions.csv")
    df_demand_preds.to_csv(output_path_demand, index=False)
    print(f"Diffusion demand predictions saved to {output_path_demand}")

if __name__ == "__main__":
    train()


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  data['Marketing_Campaign'].fillna('None', inplace=True)


Epoch 0: Loss=0.000666
Epoch 50: Loss=0.000002
Epoch 100: Loss=0.000001
Epoch 150: Loss=0.000001
Epoch 200: Loss=0.000001
Epoch 250: Loss=0.000001
Epoch 300: Loss=0.000001
Epoch 350: Loss=0.000001
Epoch 400: Loss=0.000001
Epoch 450: Loss=0.000001
=== Evaluation Metrics (Original Demand Scale) ===
MAE: 3494.148477572301
MSE: 22836900.563300584
RMSE: 4778.796978665298
R2: 0.9841406631509436
Model saved to model_output
Diffusion demand predictions saved to model_output/diffusion_demand_predictions.csv
