In [None]:
import pandas as pd
import numpy as np
import tensorflow as tf

# Load dataset
df = pd.read_csv("/content/Retail and wherehouse Sale.csv")  # Replace with actual filename

# Display dataset info
print(df.info())
print(df.head())


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 30000 entries, 0 to 29999
Data columns (total 9 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   YEAR              30000 non-null  int64  
 1   MONTH             30000 non-null  int64  
 2   SUPPLIER          29967 non-null  object 
 3   ITEM CODE         30000 non-null  object 
 4   ITEM DESCRIPTION  30000 non-null  object 
 5   ITEM TYPE         30000 non-null  object 
 6   RETAIL SALES      29999 non-null  float64
 7   RETAIL TRANSFERS  30000 non-null  float64
 8   WAREHOUSE SALES   30000 non-null  float64
dtypes: float64(3), int64(2), object(4)
memory usage: 2.1+ MB
None
   YEAR  MONTH                           SUPPLIER ITEM CODE  \
0  2020      1  REPUBLIC NATIONAL DISTRIBUTING CO    100009   
1  2020      1                          PWSWN INC    100024   
2  2020      1            RELIABLE CHURCHILL LLLP      1001   
3  2020      1          LANTERNA DISTRIBUTORS INC    100

# New Section

In [None]:
# Fill missing values for numerical columns with median
num_cols = ['RETAIL SALES', 'RETAIL TRANSFERS', 'WAREHOUSE SALES']
df[num_cols] = df[num_cols].fillna(df[num_cols].median())

# Fill missing values for categorical columns with mode
cat_cols = ['SUPPLIER', 'ITEM CODE', 'ITEM DESCRIPTION', 'ITEM TYPE']
df[cat_cols] = df[cat_cols].fillna(df[cat_cols].mode().iloc[0])

# Confirm no missing values
print(df.isnull().sum())


YEAR                0
MONTH               0
SUPPLIER            0
ITEM CODE           0
ITEM DESCRIPTION    0
ITEM TYPE           0
RETAIL SALES        0
RETAIL TRANSFERS    0
WAREHOUSE SALES     0
dtype: int64


In [None]:
from sklearn.preprocessing import MinMaxScaler, StandardScaler

# Apply Min-Max Scaling (Normalization)
scaler = MinMaxScaler()
df[num_cols] = scaler.fit_transform(df[num_cols])

# Save scaler for future use (needed for inference)
import joblib
joblib.dump(scaler, "scaler.pkl")

# Display scaled data
print(df.head())


   YEAR  MONTH                           SUPPLIER ITEM CODE  \
0  2020      1  REPUBLIC NATIONAL DISTRIBUTING CO    100009   
1  2020      1                          PWSWN INC    100024   
2  2020      1            RELIABLE CHURCHILL LLLP      1001   
3  2020      1          LANTERNA DISTRIBUTORS INC    100145   
4  2020      1               DIONYSOS IMPORTS INC    100293   

                      ITEM DESCRIPTION ITEM TYPE  RETAIL SALES  \
0                  BOOTLEG RED - 750ML      WINE      0.000153   
1            MOMENT DE PLAISIR - 750ML      WINE      0.000153   
2  S SMITH ORGANIC PEAR CIDER - 18.7OZ      BEER      0.000153   
3        SCHLINK HAUS KABINETT - 750ML      WINE      0.000153   
4       SANTORINI GAVALA WHITE - 750ML      WINE      0.000453   

   RETAIL TRANSFERS  WAREHOUSE SALES  
0          0.003966         0.179288  
1          0.004627         0.179378  
2          0.003966         0.179244  
3          0.003966         0.179244  
4          0.003966         0

**Feature Engineering**

In [None]:
# Create lag features for sales data (1-month and 3-month lags)
for lag in [1, 3]:
    df[f'RETAIL_SALES_LAG_{lag}'] = df['RETAIL SALES'].shift(lag)
    df[f'WAREHOUSE_SALES_LAG_{lag}'] = df['WAREHOUSE SALES'].shift(lag)

# Fill NaN values (first rows will be NaN due to shifting)
df.fillna(0, inplace=True)


In [None]:
# 3-month moving average for sales
df['RETAIL_SALES_MA_3'] = df['RETAIL SALES'].rolling(window=3).mean()
df['WAREHOUSE_SALES_MA_3'] = df['WAREHOUSE SALES'].rolling(window=3).mean()

# Fill NaN values created by rolling function
df.fillna(0, inplace=True)


In [None]:
def create_sequences(data, seq_length=12):
    X, y = [], []
    for i in range(len(data) - seq_length):
        X.append(data[i:i + seq_length])
        y.append(data[i + seq_length])
    return np.array(X), np.array(y)

# Select relevant numerical features for training
features = ['RETAIL SALES', 'RETAIL TRANSFERS', 'WAREHOUSE SALES',
            'RETAIL_SALES_LAG_1', 'WAREHOUSE_SALES_LAG_1',
            'RETAIL_SALES_LAG_3', 'WAREHOUSE_SALES_LAG_3',
            'RETAIL_SALES_MA_3', 'WAREHOUSE_SALES_MA_3']

# Convert dataset to sequences
X, y = create_sequences(df[features].values)

# Print shapes
print("X shape:", X.shape)  # (samples, sequence_length, features)
print("y shape:", y.shape)  # (samples,)


X shape: (29988, 12, 9)
y shape: (29988, 9)


** CNN+LSTM Hybrid Model**

In [None]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, LSTM, Dense, Flatten, Dropout, BatchNormalization

# Correct optimizer import for Keras 3
from tensorflow.keras.optimizers import Adam

# Define CNN+LSTM Model
def build_cnn_lstm(seq_length, num_features):
    model = Sequential()

    # Add explicit Input layer for Keras 3
    model.add(tf.keras.layers.Input(shape=(seq_length, num_features)))

    # 1D Convolutional Layer
    model.add(Conv1D(filters=64, kernel_size=3, activation='relu', padding='same'))
    model.add(BatchNormalization())
    model.add(Dropout(0.2))

    # LSTM Layer
    model.add(LSTM(100, return_sequences=False))
    model.add(Dropout(0.2))

    # Fully Connected Layers
    model.add(Dense(64, activation='relu'))
    model.add(Dense(32, activation='relu'))
    model.add(Dense(1))  # Output layer for prediction

    # optimizer is compatible with Keras 3
    model.compile(optimizer=Adam(learning_rate=0.001), loss='mse', metrics=['mae'])

    return model

# Dummy input shape (Replace X.shape with real dataset)
seq_length = 12  # Example: 12 time steps
num_features = 9

# Build model
cnn_lstm_model = build_cnn_lstm(seq_length, num_features)

# Model summary
cnn_lstm_model.summary()




In [None]:
print("Shape of X:", X.shape)  # Expected (samples, time_steps, num_features)
print("Number of Features (Expected):", num_features)
print("Number of Features (Actual in X):", X.shape[2])


Shape of X: (29988, 12, 9)
Number of Features (Expected): 9
Number of Features (Actual in X): 9


**CNN+LSTM Model Training**

In [None]:
# Modify input shape dynamically based on X.shape[-1]
seq_length = X.shape[1]  # Should be 12
num_features = X.shape[2]  # Should be 9

# Rebuild CNN+LSTM model with correct input shape
cnn_lstm_model = build_cnn_lstm(seq_length, num_features)

# Train the model
cnn_lstm_model.fit(X, y, epochs=50, batch_size=32, validation_split=0.2)



Epoch 1/50
[1m750/750[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m18s[0m 18ms/step - loss: 0.0081 - mae: 0.0876 - val_loss: 0.0078 - val_mae: 0.0872
Epoch 2/50
[1m750/750[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 15ms/step - loss: 0.0079 - mae: 0.0875 - val_loss: 0.0078 - val_mae: 0.0872
Epoch 3/50
[1m750/750[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 15ms/step - loss: 0.0078 - mae: 0.0874 - val_loss: 0.0078 - val_mae: 0.0874
Epoch 4/50
[1m750/750[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 16ms/step - loss: 0.0079 - mae: 0.0875 - val_loss: 0.0078 - val_mae: 0.0874
Epoch 5/50
[1m750/750[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 16ms/step - loss: 0.0079 - mae: 0.0875 - val_loss: 0.0078 - val_mae: 0.0878
Epoch 6/50
[1m750/750[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 16ms/step - loss: 0.0078 - mae: 0.0874 - val_loss: 0.0078 - val_mae: 0.0879
Epoch 7/50
[1m750/750[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0

<keras.src.callbacks.history.History at 0x7f5b05452710>

In [None]:
from sklearn.model_selection import train_test_split
import numpy as np
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

# 🛠 Ensure X and y are numpy arrays
X = np.array(X)
y = np.array(y)

# 🛠 Correctly split dataset into train and test (Ensures consistent sample sizes)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=True, random_state=42)

# 🛠 Check dataset sizes
print(f"X_train shape: {X_train.shape}, y_train shape: {y_train.shape}")
print(f"X_test shape: {X_test.shape}, y_test shape: {y_test.shape}")  # Should match `y_pred`


X_train shape: (23990, 12, 9), y_train shape: (23990, 9)
X_test shape: (5998, 12, 9), y_test shape: (5998, 9)


In [None]:
# ✅ Make predictions on the correct test set
y_pred = cnn_lstm_model.predict(X_test)

# ✅ Ensure shapes match
print(f"Shape of y_test before reshaping: {y_test.shape}")  # Should be (5998,)
print(f"Shape of y_pred before reshaping: {y_pred.shape}")  # Should be (5998, 1)

# Reshape y_test if needed
if len(y_test.shape) == 1:
    y_test = y_test.reshape(-1, 1)

print(f"✅ Shape of y_test after fixing: {y_test.shape}")  # Should match `y_pred`


[1m188/188[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step
Shape of y_test before reshaping: (5998, 9)
Shape of y_pred before reshaping: (5998, 1)
✅ Shape of y_test after fixing: (5998, 9)


In [None]:
# ✅ Compute Performance Metrics
mae = mean_absolute_error(y_test, y_pred)
rmse = np.sqrt(mean_squared_error(y_test, y_pred))
r2 = r2_score(y_test, y_pred)
mape = np.mean(np.abs((y_test - y_pred) / y_test)) * 100

# ✅ Print Metrics
print(f"📊 Model Performance Metrics:")
print(f"✅ MAE  : {mae:.4f}")
print(f"✅ RMSE : {rmse:.4f}")
print(f"✅ R² Score : {r2:.4f}")
print(f"✅ MAPE : {mape:.2f}%")


ValueError: y_true and y_pred have different number of output (9!=1)

Positional Encoding

**Transformer Encoder Block**

**Transformer-Based LSTM Combined Model**