In [37]:
import pandas as pd
from src.utils import evaluate

In [68]:
df = pd.read_csv('data/STORM_preprocessed_medianfill_1.csv', index_col=0) # 200 column
from src.const import CATEGORICAL_TARGETS, ATTRIBUTES, LINEAR_TARGETS
evaluate_dict = dict()

| Mạng Deep Learning     | Điểm Mạnh                                                 | Điểm Yếu                                               | Khi Nào Dùng                                      |
|------------------------|-----------------------------------------------------------|--------------------------------------------------------|--------------------------------------------------|
| **MLP (Multi-Layer Perceptron)** | - Đơn giản, dễ triển khai<br>- Hiệu quả cho dữ liệu nhỏ, đơn giản | - Không tốt cho dữ liệu có cấu trúc phức tạp<br>- Dễ overfitting với dữ liệu lớn | - Dữ liệu tabular (bảng)<br>- Khi không cần xử lý dữ liệu tuần tự hoặc không gian |
| **RNN (Recurrent Neural Network)** | - Tốt cho dữ liệu tuần tự<br>- Phân tích chuỗi thời gian hiệu quả | - Khó huấn luyện do vanishing gradient<br>- Chậm khi xử lý chuỗi dài | - Dự báo chuỗi thời gian<br>- Phân tích lịch sử giao dịch hoặc chuỗi sự kiện |
| **LSTM (Long Short-Term Memory)** | - Giải quyết vanishing gradient của RNN<br>- Ghi nhớ thông tin dài hạn tốt | - Tốn nhiều tài nguyên tính toán<br>- Khó tinh chỉnh | - Chuỗi thời gian dài<br>- Khi cần ghi nhớ các sự kiện quan trọng từ xa |
| **GRU (Gated Recurrent Unit)** | - Nhẹ và nhanh hơn LSTM<br>- Hiệu quả với chuỗi ngắn | - Khả năng biểu diễn thông tin dài hạn kém hơn LSTM | - Khi cần tốc độ nhanh hơn LSTM<br>- Chuỗi thời gian ngắn |
| **CNN (Convolutional Neural Network)** | - Khả năng trích xuất đặc trưng mạnh<br>- Phù hợp với dữ liệu hình ảnh và không gian | - Không hiệu quả với dữ liệu tuần tự | - Hồi quy trên hình ảnh (VD: dự đoán giá từ ảnh)<br>- Phân tích dữ liệu không gian |
| **TabTransformer** | - Mô hình hóa quan hệ phức tạp với cơ chế attention.<br>- Tự động hóa trong việc học tham số. | - Tốn nhiều tài nguyên tính toán<br>- Cần nhiều dữ liệu để tránh overfitting | - Khi cần xây dựng mạng sâu<br>- Khi cần độ chính xác cao. |
| **TabNet**             | - Hiệu quả với dữ liệu bảng (tabular)<br>- Có thể giải thích mô hình nhờ cơ chế chú ý (attention) | - Khó tinh chỉnh và tối ưu<br>- Cần nhiều dữ liệu hơn so với MLP | - Khi cần mô hình vừa mạnh vừa có thể giải thích<br>- Phù hợp với các bài toán dữ liệu bảng phức tạp |


# Model Selection for Hurricane Data Regression

1. **`TabNet`** 
- TabNet is highly effective for **structured tabular data**, such as historical hurricane records where features may include wind speed, pressure, sea surface temperature, and atmospheric conditions.
- It uses a **sequential attention mechanism** that allows the model to focus on relevant features at different steps, improving interpretability.
- Unlike traditional neural networks, TabNet balances both **accuracy and interpretability**, making it useful when understanding the contribution of each feature to predictions is essential.

2. **`TabTransformer`**
- TabTransformer is a powerful architecture for **tabular data**, designed to learn complex relationships between features through the **attention mechanism**.
- It allows the model to automatically identify and learn interactions between different features, enhancing predictive capability for regression and classification tasks.
- With the ability to handle **non-linear relationships** and **various types of data**, TabTransformer is a good choice for tasks that require high accuracy and deep understanding of the data.


3. **`LSTM (Long Short-Term Memory)`**
- LSTM networks are ideal for **time-series data** because they can **remember long-term dependencies** and handle sequential relationships effectively. 
- Unlike standard RNNs, LSTM avoids the **vanishing gradient problem**, making it suitable for learning patterns in long-term hurricane data.
- Since hurricanes are influenced by **seasonal cycles and long-term climatic trends**, LSTM is a great choice to capture these **temporal dependencies** across years.


## 1. Target 1 : TotalDeaths

In [69]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler


X = df[ATTRIBUTES + CATEGORICAL_TARGETS]
y = df[LINEAR_TARGETS[0]]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1)

# standardize
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

### 1.1. TabNet

In [40]:
import numpy as np
import torch
from pytorch_tabnet.tab_model import TabNetRegressor
from sklearn.metrics import mean_squared_error

# Convert data to NumPy arrays (if they are not already)
X_train_np = X_train_scaled
X_test_np = X_test_scaled
y_train_np = y_train.values.reshape(-1, 1)
y_test_np = y_test.values.reshape(-1, 1)

# Define the TabNet Regressor
tabnet_model = TabNetRegressor()

# Train the model with verbose set to 0
tabnet_model.fit(
    X_train_np, y_train_np,
    eval_set=[(X_test_np, y_test_np)],
    eval_metric=['rmse'],
    max_epochs=1000,
    patience=100,
    batch_size=32,
    virtual_batch_size=8
)



epoch 0  | loss: 376441.50232| val_0_rmse: 100.54486|  0:00:00s
epoch 1  | loss: 375458.58862| val_0_rmse: 99.87958|  0:00:00s
epoch 2  | loss: 274127.84131| val_0_rmse: 99.18263|  0:00:00s
epoch 3  | loss: 373156.69751| val_0_rmse: 98.65387|  0:00:00s
epoch 4  | loss: 369642.80573| val_0_rmse: 98.07378|  0:00:00s
epoch 5  | loss: 377426.93372| val_0_rmse: 96.40723|  0:00:00s
epoch 6  | loss: 372532.89429| val_0_rmse: 94.22036|  0:00:01s
epoch 7  | loss: 374651.86304| val_0_rmse: 94.0992 |  0:00:01s
epoch 8  | loss: 313433.85742| val_0_rmse: 90.95168|  0:00:01s
epoch 9  | loss: 367165.26538| val_0_rmse: 89.04234|  0:00:01s
epoch 10 | loss: 360553.55435| val_0_rmse: 88.12437|  0:00:01s
epoch 11 | loss: 370285.04657| val_0_rmse: 88.4688 |  0:00:01s
epoch 12 | loss: 365319.63763| val_0_rmse: 87.54145|  0:00:01s
epoch 13 | loss: 363844.06989| val_0_rmse: 85.31394|  0:00:01s
epoch 14 | loss: 355700.70096| val_0_rmse: 83.03426|  0:00:02s
epoch 15 | loss: 350343.12427| val_0_rmse: 82.41663|  



In [41]:
eval_values = evaluate(tabnet_model, X_test_np, y_test_np, threshold=0.3, mode="regression")
evaluate_dict = {}
evaluate_dict["TabNet"] = eval_values

eval_values

{'mae': 48.32,
 'mse': 6010.4,
 'rmse': 77.53,
 'mae_upperbound_tolerance': -32.68,
 'rmse_upperbound_tolerance': -51.47,
 'mse_upperbound_tolerance': -3747.03}

### 1.2. TabTransformer

In [42]:
import torch
import torch.nn as nn
import torch.optim as optim

In [43]:
# Define the TabTransformer model
class TabTransformer(nn.Module):
    def __init__(self, num_features, dim_embedding=64, num_heads=4, num_layers=4):
        super(TabTransformer, self).__init__()
        self.embedding = nn.Linear(num_features, dim_embedding)
        encoder_layer = nn.TransformerEncoderLayer(d_model=dim_embedding, nhead=num_heads, batch_first=True)
        self.transformer = nn.TransformerEncoder(encoder_layer, num_layers=num_layers)
        self.regressor = nn.Linear(dim_embedding, 1)  # Change to regression output (1 value)

    def forward(self, x):
        x = self.embedding(x)
        x = x.unsqueeze(1)  # Adding a sequence length dimension
        x = self.transformer(x)
        x = torch.mean(x, dim=1)  # Pooling
        x = self.regressor(x)  # Single output for regression
        return x

    # Define a predict function compatible with scikit-learn style
    def predict(model, X):
        model.eval()  # Put model in evaluation mode
        with torch.no_grad():
            X_tensor = torch.FloatTensor(X)
            predictions = model(X_tensor).cpu().numpy()  # Convert predictions to numpy
        return predictions


In [44]:
import torch.optim as optim
from torch.optim.lr_scheduler import StepLR


In [45]:
# Model parameters
num_features = X_train_scaled.shape[1]

# Initialize the model, loss, and optimizer
model = TabTransformer(num_features).to(torch.device('cuda' if torch.cuda.is_available() else 'cpu'))
criterion = nn.MSELoss()  # Use MSELoss for regression
optimizer = optim.Adam(model.parameters(), lr=0.001)

scheduler = StepLR(optimizer, step_size=100, gamma=0.001)

# Converting data to tensors
X_train_tensor = torch.FloatTensor(X_train_scaled)
y_train_tensor = torch.FloatTensor(y_train.values).unsqueeze(1)  # Ensure target is of shape [batch_size, 1]

# Training loop
for epoch in range(1000):
    model.train()  # Set model to training mode
    optimizer.zero_grad()
    output = model(X_train_tensor)
    loss = criterion(output, y_train_tensor)
    loss.backward()
    optimizer.step()
    scheduler.step()

    # if epoch % 10 == 0:
    #     print(f'Epoch {epoch}, Loss: {loss.item()}')

In [46]:
# Evaluation using evaluate function
y_test_tensor = y_test.values  # Ensure y_test is in NumPy array format

# Call the evaluate function with the predict method we defined
eval_values = evaluate(
    model=model, 
    X_test=X_test_scaled, 
    y_test=y_test_tensor, 
    threshold=0.3,  # Adjust as needed
    mode="regression"
)

evaluate_dict["TabTransformer"] = eval_values

eval_values

{'mae': 47.11,
 'mse': 9098.51,
 'rmse': 95.39,
 'mae_upperbound_tolerance': -31.47,
 'rmse_upperbound_tolerance': -69.33,
 'mse_upperbound_tolerance': -6835.14}

### 1.3 LSTM

In [47]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout, BatchNormalization
from tensorflow.keras.callbacks import EarlyStopping

In [48]:
# Reshape the data for LSTM
def create_dataset(X, y, time_steps=1):
    Xs, ys = [], []
    for i in range(len(X) - time_steps):
        Xs.append(X[i:(i + time_steps)])
        ys.append(y.iloc[i + time_steps])  # Corresponding y value
    return np.array(Xs), np.array(ys)

time_steps = 10
X_train_seq, y_train_seq = create_dataset(X_train_scaled, y_train, time_steps)
X_test_seq, y_test_seq = create_dataset(X_test_scaled, y_test, time_steps)

# Build the LSTM model
model = Sequential([
    # Layer 1
    LSTM(128, activation='relu', input_shape=(time_steps, X_train.shape[1]), 
         return_sequences=True),
    BatchNormalization(),
    Dropout(0.2),
    
    # Layer 2
    LSTM(64, activation='relu', return_sequences=True),
    BatchNormalization(),
    Dropout(0.2),
    
    # Layer 3
    LSTM(32, activation='relu', return_sequences=True),
    BatchNormalization(),
    Dropout(0.2),
    
    # Layer 4
    LSTM(16, activation='relu'),
    BatchNormalization(),
    Dropout(0.2),
    
    # Dense layers
    Dense(32, activation='relu'),
    BatchNormalization(),
    Dropout(0.1),
    
    Dense(16, activation='relu'),
    BatchNormalization(),
    Dropout(0.1),
    
    Dense(8, activation='relu'),
    BatchNormalization(),
    
    Dense(1)
])

# Compile mô hình với learning rate schedule
model.compile(optimizer='adam', loss='mse', metrics=['mae'])

# Callbacks
early_stopping = EarlyStopping(
    monitor='val_loss',
    patience=100,
    restore_best_weights=True
)


history = model.fit(
    X_train_seq, y_train_seq,
    epochs=1000,
    batch_size=32,
    validation_data=(X_test_seq, y_test_seq),
    callbacks=[early_stopping],
    verbose=0
)

  super().__init__(**kwargs)


In [49]:
eval_values = evaluate(model, X_test_seq, y_test_seq, threshold=0.3, mode="regression")
evaluate_dict["LSTM"] = eval_values

eval_values

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1s/step


{'mae': 48.41,
 'mse': 11088.3,
 'rmse': 105.3,
 'mae_upperbound_tolerance': -33.24,
 'rmse_upperbound_tolerance': -77.18,
 'mse_upperbound_tolerance': -8452.82}

In [50]:
# compare metrics value
def highlight_max(s):
    is_max = s == s.max()
    return ['color: red' if v else '' for v in is_max]

def highlight_min(s):
    is_min = s == s.min()
    return ['color: red' if v else '' for v in is_min]

def highlight_row(row, selected_method):
    return ['background-color: black;' if row['Method'] in selected_method else ''
            for _ in row]

selected_method = [model.__class__.__name__]
eval_value_df = pd.DataFrame(evaluate_dict).T.reset_index().rename(columns={"index":"Method"})

eval_value_df = (
    eval_value_df.style
    .apply(highlight_max, subset=["mae_upperbound_tolerance", "rmse_upperbound_tolerance", "mse_upperbound_tolerance"])
    .apply(highlight_min, subset=["mae", "mse", "rmse"])
    .apply(lambda row: highlight_row(row, selected_method), axis=1 )
    .format(precision=2)
)

eval_value_df

Unnamed: 0,Method,mae,mse,rmse,mae_upperbound_tolerance,rmse_upperbound_tolerance,mse_upperbound_tolerance
0,TabNet,48.32,6010.4,77.53,-32.68,-51.47,-3747.03
1,TabTransformer,47.11,9098.51,95.39,-31.47,-69.33,-6835.14
2,LSTM,48.41,11088.3,105.3,-33.24,-77.18,-8452.82


## 2. Target 2 : NoInjured

In [71]:
X = df[ATTRIBUTES + CATEGORICAL_TARGETS]
y = df[LINEAR_TARGETS[1]]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1)

# standardize
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

### 2.1. TabNet

In [52]:
# Convert data to NumPy arrays (if they are not already)
X_train_np = X_train_scaled
X_test_np = X_test_scaled
y_train_np = y_train.values.reshape(-1, 1)
y_test_np = y_test.values.reshape(-1, 1)

# Define the TabNet Regressor
tabnet_model = TabNetRegressor()

# Train the model with verbose set to 0
tabnet_model.fit(
    X_train_np, y_train_np,
    eval_set=[(X_test_np, y_test_np)],
    eval_metric=['rmse'],
    max_epochs=1000,
    patience=100,
    batch_size=32,
    virtual_batch_size=8
)



epoch 0  | loss: 51176.87402| val_0_rmse: 419.46671|  0:00:00s
epoch 1  | loss: 41880.02051| val_0_rmse: 419.15594|  0:00:00s
epoch 2  | loss: 43763.24365| val_0_rmse: 418.67334|  0:00:00s
epoch 3  | loss: 32999.54761| val_0_rmse: 417.40374|  0:00:00s
epoch 4  | loss: 41008.11987| val_0_rmse: 415.96654|  0:00:00s
epoch 5  | loss: 43509.63452| val_0_rmse: 415.42674|  0:00:00s
epoch 6  | loss: 47634.48047| val_0_rmse: 414.59451|  0:00:00s
epoch 7  | loss: 35576.15259| val_0_rmse: 416.31958|  0:00:00s
epoch 8  | loss: 35163.25983| val_0_rmse: 415.82687|  0:00:01s
epoch 9  | loss: 41436.69873| val_0_rmse: 414.20771|  0:00:01s
epoch 10 | loss: 35672.5542| val_0_rmse: 412.48017|  0:00:01s
epoch 11 | loss: 42418.5011| val_0_rmse: 410.94681|  0:00:01s
epoch 12 | loss: 42225.78857| val_0_rmse: 410.6045|  0:00:01s
epoch 13 | loss: 29692.9375| val_0_rmse: 410.91237|  0:00:01s
epoch 14 | loss: 37681.67114| val_0_rmse: 411.19666|  0:00:01s
epoch 15 | loss: 36882.25058| val_0_rmse: 410.53452|  0:00:



In [53]:
eval_values = evaluate(tabnet_model, X_test_np, y_test_np, threshold=0.3, mode="regression")
evaluate_dict = {}
evaluate_dict["TabNet"] = eval_values

eval_values

{'mae': 122.22,
 'mse': 154905.79,
 'rmse': 393.58,
 'mae_upperbound_tolerance': -88.02,
 'rmse_upperbound_tolerance': -272.35,
 'mse_upperbound_tolerance': -105915.95}

### 2.2. TabTransformer

In [54]:
# Model parameters
num_features = X_train_scaled.shape[1]

# Initialize the model, loss, and optimizer
model = TabTransformer(num_features).to(torch.device('cuda' if torch.cuda.is_available() else 'cpu'))
criterion = nn.MSELoss()  # Use MSELoss for regression
optimizer = optim.Adam(model.parameters(), lr=0.001)

scheduler = StepLR(optimizer, step_size=100, gamma=0.001)

# Converting data to tensors
X_train_tensor = torch.FloatTensor(X_train_scaled)
y_train_tensor = torch.FloatTensor(y_train.values).unsqueeze(1)  # Ensure target is of shape [batch_size, 1]

# Training loop
for epoch in range(1000):
    model.train()  # Set model to training mode
    optimizer.zero_grad()
    output = model(X_train_tensor)
    loss = criterion(output, y_train_tensor)
    loss.backward()
    optimizer.step()
    scheduler.step()

In [55]:
# Evaluation using evaluate function
y_test_tensor = y_test.values  # Ensure y_test is in NumPy array format

# Call the evaluate function with the predict method we defined
eval_values = evaluate(
    model=model, 
    X_test=X_test_scaled, 
    y_test=y_test_tensor, 
    threshold=0.3,  # Adjust as needed
    mode="regression"
)

evaluate_dict["TabTransformer"] = eval_values

eval_values

{'mae': 104.48,
 'mse': 173420.46,
 'rmse': 416.44,
 'mae_upperbound_tolerance': -70.28,
 'rmse_upperbound_tolerance': -295.21,
 'mse_upperbound_tolerance': -124430.62}

### 2.3 LSTM

In [56]:
# Reshape the data for LSTM
def create_dataset(X, y, time_steps=1):
    Xs, ys = [], []
    for i in range(len(X) - time_steps):
        Xs.append(X[i:(i + time_steps)])
        ys.append(y.iloc[i + time_steps])  # Corresponding y value
    return np.array(Xs), np.array(ys)

time_steps = 10
X_train_seq, y_train_seq = create_dataset(X_train_scaled, y_train, time_steps)
X_test_seq, y_test_seq = create_dataset(X_test_scaled, y_test, time_steps)

# Build the LSTM model
model = Sequential([
    # Layer 1
    LSTM(128, activation='relu', input_shape=(time_steps, X_train.shape[1]), 
         return_sequences=True),
    BatchNormalization(),
    Dropout(0.2),
    
    # Layer 2
    LSTM(64, activation='relu', return_sequences=True),
    BatchNormalization(),
    Dropout(0.2),
    
    # Layer 3
    LSTM(32, activation='relu', return_sequences=True),
    BatchNormalization(),
    Dropout(0.2),
    
    # Layer 4
    LSTM(16, activation='relu'),
    BatchNormalization(),
    Dropout(0.2),
    
    # Dense layers
    Dense(32, activation='relu'),
    BatchNormalization(),
    Dropout(0.1),
    
    Dense(16, activation='relu'),
    BatchNormalization(),
    Dropout(0.1),
    
    Dense(8, activation='relu'),
    BatchNormalization(),
    
    Dense(1)
])

# Compile mô hình với learning rate schedule
model.compile(optimizer='adam', loss='mse', metrics=['mae'])

# Callbacks
early_stopping = EarlyStopping(
    monitor='val_loss',
    patience=100,
    restore_best_weights=True
)


history = model.fit(
    X_train_seq, y_train_seq,
    epochs=1000,
    batch_size=32,
    validation_data=(X_test_seq, y_test_seq),
    callbacks=[early_stopping],
    verbose=0
)

  super().__init__(**kwargs)


In [57]:
eval_values = evaluate(model, X_test_seq, y_test_seq, threshold=0.3, mode="regression")
evaluate_dict["LSTM"] = eval_values

eval_values

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 605ms/step


{'mae': 142.01,
 'mse': 239771.11,
 'rmse': 489.66,
 'mae_upperbound_tolerance': -99.36,
 'rmse_upperbound_tolerance': -349.08,
 'mse_upperbound_tolerance': -173887.45}

In [58]:
# compare metrics value
def highlight_max(s):
    is_max = s == s.max()
    return ['color: red' if v else '' for v in is_max]

def highlight_min(s):
    is_min = s == s.min()
    return ['color: red' if v else '' for v in is_min]

def highlight_row(row, selected_method):
    return ['background-color: black;' if row['Method'] in selected_method else ''
            for _ in row]

selected_method = [model.__class__.__name__]
eval_value_df = pd.DataFrame(evaluate_dict).T.reset_index().rename(columns={"index":"Method"})

eval_value_df = (
    eval_value_df.style
    .apply(highlight_max, subset=["mae_upperbound_tolerance", "rmse_upperbound_tolerance", "mse_upperbound_tolerance"])
    .apply(highlight_min, subset=["mae", "mse", "rmse"])
    .apply(lambda row: highlight_row(row, selected_method), axis=1 )
    .format(precision=2)
)

eval_value_df

Unnamed: 0,Method,mae,mse,rmse,mae_upperbound_tolerance,rmse_upperbound_tolerance,mse_upperbound_tolerance
0,TabNet,122.22,154905.79,393.58,-88.02,-272.35,-105915.95
1,TabTransformer,104.48,173420.46,416.44,-70.28,-295.21,-124430.62
2,LSTM,142.01,239771.11,489.66,-99.36,-349.08,-173887.45


## 3. Target 3 : TotalDamageAdjusted(000US$)

In [72]:
X = df[ATTRIBUTES + CATEGORICAL_TARGETS]
y = df[LINEAR_TARGETS[2]]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1)

# standardize
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

### 3.1 TabNet

In [60]:
# Convert data to NumPy arrays (if they are not already)
X_train_np = X_train_scaled
X_test_np = X_test_scaled
y_train_np = y_train.values.reshape(-1, 1)
y_test_np = y_test.values.reshape(-1, 1)

# Define the TabNet Regressor
tabnet_model = TabNetRegressor()

# Train the model with verbose set to 0
tabnet_model.fit(
    X_train_np, y_train_np,
    eval_set=[(X_test_np, y_test_np)],
    eval_metric=['rmse'],
    max_epochs=1000,
    patience=100,
    batch_size=32,
    virtual_batch_size=8
)



epoch 0  | loss: 80834277376.0| val_0_rmse: 111230.12582|  0:00:00s
epoch 1  | loss: 71588611072.0| val_0_rmse: 111229.40948|  0:00:00s
epoch 2  | loss: 74787424256.0| val_0_rmse: 111228.98446|  0:00:00s
epoch 3  | loss: 58221058048.0| val_0_rmse: 111227.06913|  0:00:00s
epoch 4  | loss: 66301756416.0| val_0_rmse: 111226.37414|  0:00:00s
epoch 5  | loss: 69256152064.0| val_0_rmse: 111224.56016|  0:00:00s
epoch 6  | loss: 79422047232.0| val_0_rmse: 111222.99023|  0:00:00s
epoch 7  | loss: 67533618176.0| val_0_rmse: 111219.85812|  0:00:01s
epoch 8  | loss: 66835597824.0| val_0_rmse: 111219.45679|  0:00:01s
epoch 9  | loss: 71169016320.0| val_0_rmse: 111216.7922|  0:00:01s
epoch 10 | loss: 65461849088.0| val_0_rmse: 111213.91041|  0:00:01s
epoch 11 | loss: 72556908032.0| val_0_rmse: 111209.91132|  0:00:01s
epoch 12 | loss: 71420956160.0| val_0_rmse: 111205.8656|  0:00:01s
epoch 13 | loss: 75314334208.0| val_0_rmse: 111203.0334|  0:00:01s
epoch 14 | loss: 72367283456.0| val_0_rmse: 111198.



In [61]:
eval_values = evaluate(tabnet_model, X_test_np, y_test_np, threshold=0.3, mode="regression")
evaluate_dict = {}
evaluate_dict["TabNet"] = eval_values

eval_values

{'mae': 51982.95,
 'mse': 7294618633.08,
 'rmse': 85408.54,
 'mae_upperbound_tolerance': -34091.39,
 'rmse_upperbound_tolerance': -57241.09,
 'mse_upperbound_tolerance': -4649934043.64}

### 3.2 TabTransformer

In [62]:
# Model parameters
num_features = X_train_scaled.shape[1]

# Initialize the model, loss, and optimizer
model = TabTransformer(num_features).to(torch.device('cuda' if torch.cuda.is_available() else 'cpu'))
criterion = nn.MSELoss()  # Use MSELoss for regression
optimizer = optim.Adam(model.parameters(), lr=0.001)

scheduler = StepLR(optimizer, step_size=100, gamma=0.001)

# Converting data to tensors
X_train_tensor = torch.FloatTensor(X_train_scaled)
y_train_tensor = torch.FloatTensor(y_train.values).unsqueeze(1)  # Ensure target is of shape [batch_size, 1]

# Training loop
for epoch in range(1000):
    model.train()  # Set model to training mode
    optimizer.zero_grad()
    output = model(X_train_tensor)
    loss = criterion(output, y_train_tensor)
    loss.backward()
    optimizer.step()
    scheduler.step()

In [63]:
# Evaluation using evaluate function
y_test_tensor = y_test.values  # Ensure y_test is in NumPy array format

# Call the evaluate function with the predict method we defined
eval_values = evaluate(
    model=model, 
    X_test=X_test_scaled, 
    y_test=y_test_tensor, 
    threshold=0.3,  # Adjust as needed
    mode="regression"
)

evaluate_dict["TabTransformer"] = eval_values

eval_values

{'mae': 59625.35,
 'mse': 12370708007.22,
 'rmse': 111223.68,
 'mae_upperbound_tolerance': -41733.79,
 'rmse_upperbound_tolerance': -83056.23,
 'mse_upperbound_tolerance': -9726023417.77}

### 3.3 LSTM

In [64]:
# Reshape the data for LSTM
def create_dataset(X, y, time_steps=1):
    Xs, ys = [], []
    for i in range(len(X) - time_steps):
        Xs.append(X[i:(i + time_steps)])
        ys.append(y.iloc[i + time_steps])  # Corresponding y value
    return np.array(Xs), np.array(ys)

time_steps = 10
X_train_seq, y_train_seq = create_dataset(X_train_scaled, y_train, time_steps)
X_test_seq, y_test_seq = create_dataset(X_test_scaled, y_test, time_steps)

# Build the LSTM model
model = Sequential([
    # Layer 1
    LSTM(128, activation='relu', input_shape=(time_steps, X_train.shape[1]), 
         return_sequences=True),
    BatchNormalization(),
    Dropout(0.2),
    
    # Layer 2
    LSTM(64, activation='relu', return_sequences=True),
    BatchNormalization(),
    Dropout(0.2),
    
    # Layer 3
    LSTM(32, activation='relu', return_sequences=True),
    BatchNormalization(),
    Dropout(0.2),
    
    # Layer 4
    LSTM(16, activation='relu'),
    BatchNormalization(),
    Dropout(0.2),
    
    # Dense layers
    Dense(32, activation='relu'),
    BatchNormalization(),
    Dropout(0.1),
    
    Dense(16, activation='relu'),
    BatchNormalization(),
    Dropout(0.1),
    
    Dense(8, activation='relu'),
    BatchNormalization(),
    
    Dense(1)
])

# Compile mô hình với learning rate schedule
model.compile(optimizer='adam', loss='mse', metrics=['mae'])

# Callbacks
early_stopping = EarlyStopping(
    monitor='val_loss',
    patience=100,
    restore_best_weights=True
)


history = model.fit(
    X_train_seq, y_train_seq,
    epochs=1000,
    batch_size=32,
    validation_data=(X_test_seq, y_test_seq),
    callbacks=[early_stopping],
    verbose=0
)

  super().__init__(**kwargs)


In [65]:
eval_values = evaluate(model, X_test_seq, y_test_seq, threshold=0.3, mode="regression")
evaluate_dict["LSTM"] = eval_values

eval_values

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 586ms/step


{'mae': 54755.88,
 'mse': 11340180368.68,
 'rmse': 106490.28,
 'mae_upperbound_tolerance': -38323.33,
 'rmse_upperbound_tolerance': -79089.06,
 'mse_upperbound_tolerance': -8837423844.39}

In [66]:
# compare metrics value
def highlight_max(s):
    is_max = s == s.max()
    return ['color: red' if v else '' for v in is_max]

def highlight_min(s):
    is_min = s == s.min()
    return ['color: red' if v else '' for v in is_min]

def highlight_row(row, selected_method):
    return ['background-color: black;' if row['Method'] in selected_method else ''
            for _ in row]

selected_method = [model.__class__.__name__]
eval_value_df = pd.DataFrame(evaluate_dict).T.reset_index().rename(columns={"index":"Method"})

eval_value_df = (
    eval_value_df.style
    .apply(highlight_max, subset=["mae_upperbound_tolerance", "rmse_upperbound_tolerance", "mse_upperbound_tolerance"])
    .apply(highlight_min, subset=["mae", "mse", "rmse"])
    .apply(lambda row: highlight_row(row, selected_method), axis=1 )
    .format(precision=2)
)

eval_value_df

Unnamed: 0,Method,mae,mse,rmse,mae_upperbound_tolerance,rmse_upperbound_tolerance,mse_upperbound_tolerance
0,TabNet,51982.95,7294618633.08,85408.54,-34091.39,-57241.09,-4649934043.64
1,TabTransformer,59625.35,12370708007.22,111223.68,-41733.79,-83056.23,-9726023417.77
2,LSTM,54755.88,11340180368.68,106490.28,-38323.33,-79089.06,-8837423844.39


TabNet hoạt động tốt với target 1, target 2 và 3