In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from tensorflow.keras.optimizers import Adam
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score


url = "https://raw.githubusercontent.com/CharlesCLuo/Application-of-AI-in-Supply-Chain-Risk-Management-Series/main/Demand_Forecsting/comprehensive_supply_chain_data_with_weather_spike.csv"
df = pd.read_csv(url, parse_dates=['Timestamp'], index_col='Timestamp')

features = ['Inventory Levels', 'Lead Time (days)', 'Supplier Reliability (%)',
            'Economic Indicator (GDP Growth %)', 'Marketing Spend ($)',
            'Price Fluctuation (%)', 'Competitor Pricing ($)', 'Customer Sentiment',
            'Temperature (°C)', 'Weather Spike', 'Transportation Costs ($)',
            'Stockouts (Binary)', 'Promotions and Discounts (%)', 'Product Quality (%)']

X = df[features].values
y = df['Demand'].values

scaler = MinMaxScaler()
scaled_X = scaler.fit_transform(X)
scaled_y = scaler.fit_transform(y.reshape(-1, 1))

def create_sequences(data_X, data_y, window_size):
    X_seq, y_seq = [], []
    for i in range(len(data_X) - window_size):
        X_seq.append(data_X[i:i + window_size])
        y_seq.append(data_y[i + window_size])
    return np.array(X_seq), np.array(y_seq)


window_size = 30
X_seq, y_seq = create_sequences(scaled_X, scaled_y, window_size)

split = int(0.8 * len(X_seq))
X_train, X_test = X_seq[:split], X_seq[split:]
y_train, y_test = y_seq[:split], y_seq[split:]

model = Sequential()
model.add(LSTM(128, activation='relu', return_sequences=True, input_shape=(X_train.shape[1], X_train.shape[2])))
model.add(Dropout(0.2))
model.add(LSTM(64, activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(1))


optimizer = Adam(learning_rate=0.001)
model.compile(optimizer=optimizer, loss='mean_squared_error')

history = model.fit(X_train, y_train, epochs=50, batch_size=32, validation_data=(X_test, y_test), verbose=1)

y_pred = model.predict(X_test)

y_pred_rescaled = scaler.inverse_transform(np.hstack([np.zeros((y_pred.shape[0], scaled_X.shape[1] - 1)), y_pred]))[:, -1]
y_test_rescaled = scaler.inverse_transform(np.hstack([np.zeros((y_test.shape[0], scaled_X.shape[1] - 1)), y_test]))[:, -1]

mse = mean_squared_error(y_test_rescaled, y_pred_rescaled)
mae = mean_absolute_error(y_test_rescaled, y_pred_rescaled)
r2 = r2_score(y_test_rescaled, y_pred_rescaled)

print(f"Mean Squared Error (MSE): {mse}")
print(f"Mean Absolute Error (MAE): {mae}")
print(f"R-squared (R2 Score): {r2}")

plt.figure(figsize=(15, 5))


plt.subplot(1, 2, 1)
plt.plot(y_test_rescaled, label='Actual Demand', color='blue')
plt.plot(y_pred_rescaled, label='Predicted Demand', color='red')
plt.title('Figure 8.6 Actual vs Predicted Demand')
plt.legend()

residuals = y_test_rescaled - y_pred_rescaled
plt.subplot(1, 2, 2)
plt.plot(residuals, label='Residuals', color='green')
plt.title('Residuals')
plt.legend()

plt.tight_layout()
plt.show()

import statsmodels.api as sm
from statsmodels.stats.diagnostic import linear_harvey_collier


all_features = [
    'Inventory Levels', 'Lead Time (days)', 'Supplier Reliability (%)',
    'Economic Indicator (GDP Growth %)', 'Marketing Spend ($)',
    'Price Fluctuation (%)', 'Competitor Pricing ($)', 'Customer Sentiment',
    'Temperature (°C)', 'Weather Spike', 'Transportation Costs ($)',
    'Stockouts (Binary)', 'Promotions and Discounts (%)', 'Product Quality (%)'
]

for feature in all_features:
  X_feature = sm.add_constant(df[feature])
  model = sm.OLS(df['Demand'], X_feature).fit()
  test_result = linear_harvey_collier(model)
  print(f"{feature} - Harvey-Collier p-value: {test_result.pvalue}")