In [4]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout,SimpleRNN,Conv1D,MaxPooling1D,Flatten,Reshape
from tensorflow.keras.callbacks import EarlyStopping

# Load and process the dataset
data = pd.read_csv(r'C:\Users\shash\fetch_assessment\artifacts\data_ingestion\data_daily.csv')

# Adding additional features
data['# Date'] = pd.to_datetime(data['# Date'])
data['Day_of_Week'] = data['# Date'].dt.dayofweek
data['Month'] = data['# Date'].dt.month
data['Day'] = data['# Date'].dt.day
data['Year'] = data['# Date'].dt.year
data['Lag_1'] = data['Receipt_Count'].shift(1)
data['Lag_2'] = data['Receipt_Count'].shift(2)
data['Lag_3'] = data['Receipt_Count'].shift(3)

# Dropping rows with NaN values after adding lag features
data.dropna(inplace=True)

# Scaling the features
scaler = MinMaxScaler(feature_range=(0, 1))
scaled_data = scaler.fit_transform(data.drop(['# Date'], axis=1))

# Function to create dataset
def create_dataset(data, look_back):
    X, y = [], []
    for i in range(len(data) - look_back):
        X.append(data[i:(i + look_back), :])
        y.append(data[i + look_back, 0])  # 0 index for 'Receipt_Count'
    return np.array(X), np.array(y)

# Creating the dataset with look back
look_back = 7
X, y = create_dataset(scaled_data, look_back)

# Splitting the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

print(X_train.shape, y_train.shape,X_test.shape,y_test.shape)

# Define the LSTM model
model = Sequential()
# CNN Layer
model.add(Conv1D(filters=64, kernel_size=2, activation='relu', input_shape=(look_back, X.shape[2])))
# Removing MaxPooling1D layer to maintain the sequence length
# LSTM Layer
model.add(LSTM(50, activation='relu', return_sequences=True))
model.add(Dropout(0.2))
model.add(LSTM(50, activation='relu'))
model.add(Dropout(0.2))
# Output Layer
model.add(Dense(1))

# Compile the model
model.compile(optimizer='adam', loss='mean_squared_error')

# Early stopping callback
early_stopping = EarlyStopping(monitor='val_loss', min_delta=0.001, patience=10, restore_best_weights=True)

# Train the model
history = model.fit(X_train, y_train, epochs=100, batch_size=32, validation_split=0.2, callbacks=[early_stopping])
# Predicting on the test set
y_pred = model.predict(X_test)

# Calculating RMSE
rmse = np.sqrt(mean_squared_error(y_test, y_pred))
print("Test RMSE:", rmse)
# Reshape y_pred_scaled for inverse transformation
temp_shape = np.zeros((len(y_pred), scaled_data.shape[1]))
temp_shape[:, 0] = y_pred[:, 0]
y_pred = scaler.inverse_transform(temp_shape)[:, 0]

# Reshape y_test for inverse transformation
y_test_temp_shape = np.zeros((len(y_test), scaled_data.shape[1]))
y_test_temp_shape[:, 0] = y_test
y_test_rescaled = scaler.inverse_transform(y_test_temp_shape)[:, 0]

# Calculating RMSE on the rescaled data
rmse = np.sqrt(mean_squared_error(y_test_rescaled, y_pred))
print("Test RMSE on original scale:", rmse)


Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Test RMSE: 0.07079313403373098
Test RMSE on original scale: 250903.46819340062


## LSTM CNN:0.07481992549716468
## LSTM: 0.08222140069384448
## RNN: 0.08422675959263284
