## LSTM

In [2]:
import pandas as pd
import numpy as np

# Example dataset, already encoded
data = {
    'cat_feature_1': [0, 1, 0, 1, 0, 1],  # Categorical feature 1 encoded (e.g., 0 and 1)
    'cat_feature_2': [0, 1, 0, 1, 0, 1],  # Categorical feature 2 encoded (e.g., 0 and 1)
    'num_feature_1': [1.2, 3.4, 2.3, 4.5, 5.6, 6.7],  # Numerical feature 1
    'num_feature_2': [7.8, 8.9, 7.6, 6.5, 5.4, 4.3],  # Numerical feature 2
    'target': [10, 15, 14, 19, 18, 25]  # Target variable (continuous)
}

df = pd.DataFrame(data)

# Display the dataframe
print(df)

   cat_feature_1  cat_feature_2  num_feature_1  num_feature_2  target
0              0              0            1.2            7.8      10
1              1              1            3.4            8.9      15
2              0              0            2.3            7.6      14
3              1              1            4.5            6.5      19
4              0              0            5.6            5.4      18
5              1              1            6.7            4.3      25


In [3]:
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

# Normalize numerical features (scaling them)
scaler = StandardScaler()
num_features = scaler.fit_transform(df[['num_feature_1', 'num_feature_2']])

# Combine the encoded categorical features with the normalized numerical features
X = np.hstack((df[['cat_feature_1', 'cat_feature_2']].values, num_features))

# Target variable
y = df['target'].values

# Reshape the data to fit LSTM input format (samples, time_steps, features)
X = X.reshape((X.shape[0], 1, X.shape[1]))  # 1 time step per sample

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

print(f"Training data shape: {X_train.shape}")
print(f"Testing data shape: {X_test.shape}")

Training data shape: (4, 1, 4)
Testing data shape: (2, 1, 4)


In [5]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense

# Define the LSTM model
model = Sequential()

# LSTM layer
model.add(LSTM(units=50, activation='relu', input_shape=(X_train.shape[1], X_train.shape[2])))

# Output layer (for regression, single output neuron)
model.add(Dense(1))

# Compile the model
model.compile(optimizer='adam', loss='mean_squared_error')

# Display model summary
model.summary()

In [6]:
# Train the model
history = model.fit(X_train, y_train, epochs=100, batch_size=16, validation_data=(X_test, y_test), verbose=1)

Epoch 1/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3s/step - loss: 376.4146 - val_loss: 163.2334
Epoch 2/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 120ms/step - loss: 376.2198 - val_loss: 163.1132
Epoch 3/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 112ms/step - loss: 376.0245 - val_loss: 162.9949
Epoch 4/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 115ms/step - loss: 375.8313 - val_loss: 162.8746
Epoch 5/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 124ms/step - loss: 375.6359 - val_loss: 162.7540
Epoch 6/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 129ms/step - loss: 375.4392 - val_loss: 162.6332
Epoch 7/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 127ms/step - loss: 375.2448 - val_loss: 162.5121
Epoch 8/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 137ms/step - loss: 375.0497 - val_loss: 162.3888
Epoch 9/100
[1m1/1

In [7]:
# Evaluate the model on the test set
test_loss = model.evaluate(X_test, y_test, verbose=0)
print(f"Test Loss (MSE): {test_loss}")

# Make predictions on the test set
y_pred = model.predict(X_test)

# Compare predictions with actual values
print(f"Predictions: {y_pred.flatten()}")
print(f"True values: {y_test}")

Test Loss (MSE): 145.30303955078125
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 318ms/step
Predictions: [0.57971936 0.79210126]
True values: [10 15]
