In [1]:
# Import libraries 
#import tqdm
import tensorflow as tf 
import pandas as pd 
import numpy as np

import sklearn.metrics as metrics
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor

# Data Preparation

In [2]:
# If you are working on local GPU, uncomment the code below
print("Num GPUs Available: ", len(tf.config.experimental.list_physical_devices('GPU')))

Num GPUs Available:  0


In [3]:
# Get the preprocessed data 
train_df = pd.read_csv(r"preprocessed_dataset\train_dataset.csv")
val_df = pd.read_csv(r"preprocessed_dataset/validation_dataset.csv")
test_df = pd.read_csv(r"preprocessed_dataset\test_dataset.csv")

In [4]:
X_train = train_df.drop(["sales"], axis=1).to_numpy()
y_train = train_df["sales"].to_numpy()

X_val = val_df.drop(["sales"], axis=1).to_numpy()
y_val = val_df["sales"].to_numpy()

X_train.shape, y_train.shape

((1643131, 3), (1643131,))

In [5]:
# Optimize the data 
train_dataset = tf.data.Dataset.from_tensor_slices((X_train, y_train)) 
validation_dataset = tf.data.Dataset.from_tensor_slices((X_val, y_val))

train_dataset.batch(32).prefetch(tf.data.AUTOTUNE)
validation_dataset.batch(32).prefetch(tf.data.AUTOTUNE)

<PrefetchDataset element_spec=(TensorSpec(shape=(None, 3), dtype=tf.int64, name=None), TensorSpec(shape=(None,), dtype=tf.float64, name=None))>

# Model 1: Random Forest Regressor

In [6]:

# The accuracy will be lesssssss
model_1 = RandomForestRegressor(n_estimators=100, max_depth=10, random_state=42).fit(X_train, y_train)

In [7]:
# Predict
predictions = model_1.predict(X_val)

# Evaluate 
RF_MAE = metrics.mean_absolute_error(y_val, predictions)
RF_MSE = metrics.mean_squared_error(y_val, predictions)
RF_RMSE = metrics.root_mean_squared_error(y_val, predictions)
RF_RMSLE = metrics.root_mean_squared_log_error(y_val, predictions)

model_1_results = {
    "RF_MAE": RF_MAE,
    "RF_MSE": RF_MSE,
    "RF_RMSE": RF_RMSE}
model_1_results

{'RF_MAE': 158.83819765428268,
 'RF_MSE': 297578.63178024115,
 'RF_RMSE': 545.5076826042335}

# Model 2: LSTM Model

In [8]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense

model_2 = Sequential([
    LSTM(128, activation="relu", return_sequences=True, input_shape=(X_train.shape[1], 1)),
    LSTM(64, return_sequences=False, activation = "relu"),
    Dense(25, activation="relu"),
    Dense(1)
])

model_2.compile(loss="mean_squared_error", optimizer="adam", metrics=["mae"])

model_2_history = model_2.fit(X_train,
                              y_train,
                              steps_per_epoch=len(X_train) * 0.01, # Train on 1% of the data
                              epochs=10,
                              validation_data=(X_val, y_val),
                              callbacks=[tf.keras.callbacks.ModelCheckpoint(model_2.name, monitor="mae", save_best_only=True)])

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10

In [None]:
# Predict
predictions = model_2.predict(X_val)

# Evaluate 
LSTM_RMSE = metrics.root_mean_squared_error(y_val, predictions)

model_2_results = {
    "LSTM_MAE": LSTM_MAE,
    "LSTM_MSE": LSTM_MSE,
    "LSTM_RMSE": LSTM_RMSE}
model_2_results

In [None]:
# Model 3: 


# Think outside the box 
**Or create your own training algorithm**

In [None]:
# Your critical thinking goes here...