In [1]:
import numpy as np
import seaborn as sns
import pandas as pd
import matplotlib.pyplot as plt
import math
import time
from scipy import stats

from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import OneHotEncoder, StandardScaler

from sklearn.metrics import mean_squared_error, mean_absolute_error

from transformers import BertTokenizer, BertModel
from tqdm import tqdm

import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader

pd.set_option('display.max_columns', 300)
pd.set_option('display.max_rows', 300)

In [2]:
train_df = pd.read_csv('clean_data.csv')
val_df = pd.read_csv('clean_val.csv')
test_df = pd.read_csv('clean_test.csv')

In [3]:
train_df.dropna(inplace = True)

In [4]:
with open("numerical.txt", "r", encoding="utf-8") as file:
    numeric_features = [line.strip() for line in file.readlines()]
    
with open("cat.txt", "r", encoding="utf-8") as file:
    cat_features = [line.strip() for line in file.readlines()]
    
with open("text.txt", "r", encoding="utf-8") as file:
    text_features = [line.strip() for line in file.readlines()]

In [5]:
print('Total number of Numerical Features and one target:', len(numeric_features))
print('Total number of Categorical Features:',len(cat_features))
print('Total number of Text Features:',len(text_features))

Total number of Numerical Features and one target: 9
Total number of Categorical Features: 70
Total number of Text Features: 2


In [6]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

tokenizer = BertTokenizer.from_pretrained('bert-base-multilingual-cased')
embedding_model = BertModel.from_pretrained('bert-base-multilingual-cased')
embedding_model.to(device)

Some weights of the model checkpoint at bert-base-multilingual-cased were not used when initializing BertModel: ['cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


BertModel(
  (embeddings): BertEmbeddings(
    (word_embeddings): Embedding(119547, 768, padding_idx=0)
    (position_embeddings): Embedding(512, 768)
    (token_type_embeddings): Embedding(2, 768)
    (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
    (dropout): Dropout(p=0.1, inplace=False)
  )
  (encoder): BertEncoder(
    (layer): ModuleList(
      (0): BertLayer(
        (attention): BertAttention(
          (self): BertSelfAttention(
            (query): Linear(in_features=768, out_features=768, bias=True)
            (key): Linear(in_features=768, out_features=768, bias=True)
            (value): Linear(in_features=768, out_features=768, bias=True)
            (dropout): Dropout(p=0.1, inplace=False)
          )
          (output): BertSelfOutput(
            (dense): Linear(in_features=768, out_features=768, bias=True)
            (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
            (dropout): Dropout(p=0.1, inplace=False)
         

In [7]:
class BoxCoxTransformer(BaseEstimator, TransformerMixin):
    def __init__(self):
        self.lambdas_ = {}

    def fit(self, X, y=None):
        X = X.copy()
        for column in X.columns:
            if any(X[column] <= 0):
                X[column] = X[column] + abs(X[column].min()) + 1
            _, lambda_optimal = stats.boxcox(X[column])
            self.lambdas_[column] = lambda_optimal
        return self

    def transform(self, X):
        X = X.copy()
        for column, lambda_optimal in self.lambdas_.items():
            if any(X[column] <= 0):
                X[column] = X[column] + abs(X[column].min()) + 1
            X[column] = stats.boxcox(X[column], lmbda=lambda_optimal)
        return X

def get_bert_embeddings(text_series, batch_size):
    embeddings = []
    embedding_model.eval()  
    
    for i in range(0, len(text_series), batch_size):
        batch_texts = text_series[i:i + batch_size]
        
        if not batch_texts: 
            continue
        
        inputs = tokenizer(batch_texts, return_tensors='pt', truncation=True, padding=True, max_length=128)
        inputs = {key: value.to(device) for key, value in inputs.items()}

        with torch.no_grad():
            outputs = embedding_model(**inputs)
            batch_embeddings = outputs.last_hidden_state[:, 0, :].cpu().numpy()
            embeddings.append(batch_embeddings)
    
    if embeddings:
        return np.vstack(embeddings)
    else:
        return np.array([])
    
def concat(df, text_features, batch_size, pooling, is_train = False):

    embeddings_list = []

    for feature in text_features:
        print(feature)
        text_embeddings = get_bert_embeddings(df[feature].tolist(), batch_size)

        if pooling == 'max':
            text_embeddings = np.max(text_embeddings, axis=1)  
        elif pooling == 'mean':
            text_embeddings = np.mean(text_embeddings, axis=1)  

        if text_embeddings.ndim == 1:
            text_embeddings = text_embeddings.reshape(-1, 1)

        embeddings_list.append(text_embeddings)
        
    if is_train:
        transformed_features = preprocessor.fit_transform(df).toarray()
    else:
        transformed_features = preprocessor.transform(df).toarray()

    combined_features = np.concatenate(embeddings_list + [transformed_features], axis=1)

    return combined_features

In [8]:
class MyDataset(Dataset):
    def __init__(self, combined_features, norm_target, target):
        self.combined_features = np.array(combined_features)
        self.norm_target = np.array(norm_target)
        self.target = np.array(target)

    def __len__(self):
        return len(self.target)

    def __getitem__(self, index):
        combined_features = self.combined_features[index]
        norm_target = self.norm_target[index] 
        target = self.target[index] 
        
        return {'combined_features': torch.tensor(combined_features, dtype=torch.float32),
                'norm_target':torch.tensor(norm_target, dtype=torch.float32),
                'target':torch.tensor(target, dtype=torch.float32)
        }

In [9]:
def normalize(x):
    return (x - train_df['price'].min()) / (train_df['price'].max() - train_df['price'].min())

def denormalize(x):
    return x * (train_df['price'].max() - train_df['price'].min()) + train_df['price'].min()

In [10]:
train_df['normalized_price'] = train_df['price'].apply(normalize)
val_df['normalized_price'] = val_df['price'].apply(normalize)
test_df['normalized_price'] = test_df['price'].apply(normalize)

In [11]:
numeric_transformer = Pipeline([
    ('boxcox', BoxCoxTransformer()),
    ('scaler', StandardScaler())
])


preprocessor = ColumnTransformer(
    transformers=[
        ('num', numeric_transformer, numeric_features),
        ('cat', OneHotEncoder(handle_unknown='ignore'), cat_features)
    ]
)

In [12]:
train_combined_features = concat(train_df, text_features, batch_size = 32, pooling = 'mean', is_train = True)
val_combined_features = concat(val_df, text_features, batch_size = 32, pooling = 'mean', is_train = False)
test_combined_features = concat(test_df, text_features, batch_size = 32, pooling = 'mean', is_train = False)

summary
host_about
summary
host_about
summary
host_about


In [13]:
batch_size = 32

train_norm_y = train_df['normalized_price']
train_y = train_df['price']

val_norm_y = val_df['normalized_price']
val_y = val_df['price']

test_norm_y = test_df['normalized_price']
test_y = test_df['price']

train_dataset = MyDataset(train_combined_features, train_norm_y, train_y)
val_dataset = MyDataset(val_combined_features, val_norm_y, val_y)
test_dataset = MyDataset(test_combined_features, test_norm_y, test_y)

train_loader = DataLoader(train_dataset, batch_size = batch_size, shuffle = True)
val_loader = DataLoader(val_dataset, batch_size = batch_size, shuffle = True)
test_loader = DataLoader(test_dataset, batch_size = batch_size, shuffle = True)

In [20]:
class MLPRegressionModel(nn.Module):
    def __init__(self, input_dim, hidden_dim=64):
        super(MLPRegressionModel, self).__init__()
        
        self.fc1 = nn.Linear(input_dim, 64)
        self.bn1 = nn.BatchNorm1d(64)
        
        self.fc2 = nn.Linear(64, 32)
        self.bn2 = nn.BatchNorm1d(32)
        
        self.fc3 = nn.Linear(32, 1)
        
        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(0.2)
        
    def forward(self, x):
        
        x = self.fc1(x)
        x = self.bn1(x)        
        x = self.relu(x)
        x = self.dropout(x)
        
        x = self.fc2(x)
        x = self.bn2(x)        
        x = self.relu(x)
        x = self.dropout(x)
        
        x = self.fc3(x)
        return x

In [21]:
input_dim = train_combined_features.shape[1]
model = MLPRegressionModel(input_dim=input_dim)
model = model.to(device)

In [22]:
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
criterion = nn.MSELoss()

In [23]:
from tqdm import tqdm

def rmse(y, y_predict):
    mse = mean_squared_error(y, y_predict)
    return math.sqrt(mse)

def train(model, data_loader, optimizer, criterion, device):
    model.train()
    total_loss = 0
    all_preds = []
    all_labels = []
    
    start_time = time.time()
    
    for batch in tqdm(data_loader, desc="Training", unit="batch"):
        inputs = batch['combined_features'].to(device)
        norm_target = batch['norm_target'].to(device)
        targets = batch['target'].to(device)

        optimizer.zero_grad()
        outputs = model(inputs)
        
        loss = criterion(outputs, norm_target.unsqueeze(1))
        total_loss += loss.item()
        
        loss.backward()
        optimizer.step()
        
        outputs = denormalize(outputs)
        
        all_preds.extend(outputs.cpu().tolist())
        all_labels.extend(targets.cpu().tolist())
        
    avg_loss = total_loss/len(data_loader)
    train_mae = mean_absolute_error(all_labels, all_preds)
    train_rmse = rmse(all_labels, all_preds)

    end_time = time.time()  
    epoch_duration = end_time - start_time  
    
    print(f'Training Loss: {avg_loss:.4f}, Train MAE: {train_mae:.4f}, Train RMSE: {train_rmse:.4f}, Time: {epoch_duration:.2f} seconds')
    return avg_loss, train_mae

def evaluate(model, data_loader, criterion, device):
    
    model.eval()
    total_loss = 0
    all_preds = []
    all_labels = []
    
    start_time = time.time()
    
    with torch.no_grad():
        for batch in tqdm(data_loader, desc="Validation", unit="batch"):
            
            inputs = batch['combined_features'].to(device)
            norm_target = batch['norm_target'].to(device)
            targets = batch['target'].to(device)
            
            outputs = model(inputs)
            
            loss = criterion(outputs, norm_target.unsqueeze(1))
            total_loss += loss.item()
            
            outputs = denormalize(outputs)
            all_preds.extend(outputs.cpu().tolist())
            all_labels.extend(targets.cpu().tolist())
    

    avg_loss = total_loss/len(data_loader)
    val_mae = mean_absolute_error(all_labels, all_preds)
    val_rmse = rmse(all_labels, all_preds)

    end_time = time.time()  
    epoch_duration = end_time - start_time  
    
    print(f'Validation Loss: {avg_loss:.4f}, Validation MAE: {val_mae:.4f}, Validation RMSE: {val_rmse:.4f}, Time: {epoch_duration:.2f} seconds')
    
    return avg_loss, val_mae

In [24]:
best_val_loss = float('inf')  
best_val_mae = float('inf')              
model_save_path = "9_.pth"

num_epochs = 50

train_losses = []
val_losses = []
train_mae_scores = []
val_mae_scores = []

for epoch in range(num_epochs):
    print(f"Epoch {epoch + 1}/{num_epochs}")
    
    train_loss, train_mae = train(model, train_loader, optimizer, criterion, device)
    val_loss, val_mae = evaluate(model, val_loader, criterion, device)
    
    train_losses.append(train_loss)  
    train_mae_scores.append(train_mae)  
    val_losses.append(val_loss)  
    val_mae_scores.append(val_mae)  
    
    if val_mae < best_val_mae:
        best_val_mae = val_mae
        torch.save(model.state_dict(), model_save_path)   
        print(f"Best model saved with validation MAE: {best_val_mae:.4f}")

    print('-' * 100)

Epoch 1/50


Training: 100%|██████████████████████████████████████████████████████████████████| 561/561 [00:03<00:00, 170.15batch/s]


Training Loss: 0.0301, Train MAE: 93.7624, Train RMSE: 139.2252, Time: 3.33 seconds


Validation: 100%|██████████████████████████████████████████████████████████████████| 72/72 [00:00<00:00, 543.32batch/s]


Validation Loss: 0.1213, Validation MAE: 59.8697, Validation RMSE: 280.4562, Time: 0.14 seconds
Best model saved with validation MAE: 59.8697
----------------------------------------------------------------------------------------------------
Epoch 2/50


Training: 100%|██████████████████████████████████████████████████████████████████| 561/561 [00:03<00:00, 173.27batch/s]


Training Loss: 0.0093, Train MAE: 48.7780, Train RMSE: 77.3471, Time: 3.27 seconds


Validation: 100%|██████████████████████████████████████████████████████████████████| 72/72 [00:00<00:00, 486.38batch/s]


Validation Loss: 0.1212, Validation MAE: 55.2516, Validation RMSE: 280.6017, Time: 0.16 seconds
Best model saved with validation MAE: 55.2516
----------------------------------------------------------------------------------------------------
Epoch 3/50


Training: 100%|██████████████████████████████████████████████████████████████████| 561/561 [00:04<00:00, 122.27batch/s]


Training Loss: 0.0083, Train MAE: 45.2995, Train RMSE: 73.2129, Time: 4.62 seconds


Validation: 100%|██████████████████████████████████████████████████████████████████| 72/72 [00:00<00:00, 505.21batch/s]


Validation Loss: 0.1198, Validation MAE: 53.9530, Validation RMSE: 278.9222, Time: 0.15 seconds
Best model saved with validation MAE: 53.9530
----------------------------------------------------------------------------------------------------
Epoch 4/50


Training: 100%|██████████████████████████████████████████████████████████████████| 561/561 [00:04<00:00, 124.47batch/s]


Training Loss: 0.0081, Train MAE: 44.4819, Train RMSE: 72.1780, Time: 4.54 seconds


Validation: 100%|██████████████████████████████████████████████████████████████████| 72/72 [00:00<00:00, 539.28batch/s]


Validation Loss: 0.1204, Validation MAE: 53.6960, Validation RMSE: 279.6000, Time: 0.14 seconds
Best model saved with validation MAE: 53.6960
----------------------------------------------------------------------------------------------------
Epoch 5/50


Training: 100%|██████████████████████████████████████████████████████████████████| 561/561 [00:03<00:00, 174.80batch/s]


Training Loss: 0.0080, Train MAE: 44.3000, Train RMSE: 71.8017, Time: 3.24 seconds


Validation: 100%|██████████████████████████████████████████████████████████████████| 72/72 [00:00<00:00, 551.66batch/s]


Validation Loss: 0.1210, Validation MAE: 54.2075, Validation RMSE: 278.7658, Time: 0.14 seconds
----------------------------------------------------------------------------------------------------
Epoch 6/50


Training: 100%|██████████████████████████████████████████████████████████████████| 561/561 [00:03<00:00, 163.18batch/s]


Training Loss: 0.0074, Train MAE: 42.7206, Train RMSE: 69.2564, Time: 3.47 seconds


Validation: 100%|██████████████████████████████████████████████████████████████████| 72/72 [00:00<00:00, 491.39batch/s]


Validation Loss: 0.1200, Validation MAE: 52.2260, Validation RMSE: 278.5433, Time: 0.15 seconds
Best model saved with validation MAE: 52.2260
----------------------------------------------------------------------------------------------------
Epoch 7/50


Training: 100%|██████████████████████████████████████████████████████████████████| 561/561 [00:03<00:00, 147.99batch/s]


Training Loss: 0.0074, Train MAE: 42.8565, Train RMSE: 69.1845, Time: 3.83 seconds


Validation: 100%|██████████████████████████████████████████████████████████████████| 72/72 [00:00<00:00, 543.31batch/s]


Validation Loss: 0.1200, Validation MAE: 51.7789, Validation RMSE: 279.0835, Time: 0.14 seconds
Best model saved with validation MAE: 51.7789
----------------------------------------------------------------------------------------------------
Epoch 8/50


Training: 100%|██████████████████████████████████████████████████████████████████| 561/561 [00:03<00:00, 166.17batch/s]


Training Loss: 0.0072, Train MAE: 42.1274, Train RMSE: 68.3610, Time: 3.41 seconds


Validation: 100%|██████████████████████████████████████████████████████████████████| 72/72 [00:00<00:00, 547.42batch/s]


Validation Loss: 0.1187, Validation MAE: 52.1607, Validation RMSE: 277.6465, Time: 0.14 seconds
----------------------------------------------------------------------------------------------------
Epoch 9/50


Training: 100%|██████████████████████████████████████████████████████████████████| 561/561 [00:03<00:00, 179.28batch/s]


Training Loss: 0.0072, Train MAE: 42.3126, Train RMSE: 68.2864, Time: 3.16 seconds


Validation: 100%|██████████████████████████████████████████████████████████████████| 72/72 [00:00<00:00, 527.39batch/s]


Validation Loss: 0.1189, Validation MAE: 52.7686, Validation RMSE: 277.8707, Time: 0.14 seconds
----------------------------------------------------------------------------------------------------
Epoch 10/50


Training: 100%|██████████████████████████████████████████████████████████████████| 561/561 [00:04<00:00, 134.51batch/s]


Training Loss: 0.0070, Train MAE: 41.3743, Train RMSE: 67.1162, Time: 4.20 seconds


Validation: 100%|██████████████████████████████████████████████████████████████████| 72/72 [00:00<00:00, 491.40batch/s]


Validation Loss: 0.1183, Validation MAE: 51.3325, Validation RMSE: 277.2315, Time: 0.15 seconds
Best model saved with validation MAE: 51.3325
----------------------------------------------------------------------------------------------------
Epoch 11/50


Training: 100%|██████████████████████████████████████████████████████████████████| 561/561 [00:04<00:00, 127.77batch/s]


Training Loss: 0.0069, Train MAE: 41.1639, Train RMSE: 66.8182, Time: 4.42 seconds


Validation: 100%|██████████████████████████████████████████████████████████████████| 72/72 [00:00<00:00, 523.43batch/s]


Validation Loss: 0.1188, Validation MAE: 51.1509, Validation RMSE: 277.6438, Time: 0.15 seconds
Best model saved with validation MAE: 51.1509
----------------------------------------------------------------------------------------------------
Epoch 12/50


Training: 100%|██████████████████████████████████████████████████████████████████| 561/561 [00:03<00:00, 174.69batch/s]


Training Loss: 0.0067, Train MAE: 40.8414, Train RMSE: 65.8548, Time: 3.24 seconds


Validation: 100%|██████████████████████████████████████████████████████████████████| 72/72 [00:00<00:00, 557.99batch/s]


Validation Loss: 0.1182, Validation MAE: 53.0851, Validation RMSE: 277.0104, Time: 0.14 seconds
----------------------------------------------------------------------------------------------------
Epoch 13/50


Training: 100%|██████████████████████████████████████████████████████████████████| 561/561 [00:03<00:00, 149.52batch/s]


Training Loss: 0.0066, Train MAE: 40.5346, Train RMSE: 65.3054, Time: 3.78 seconds


Validation: 100%|██████████████████████████████████████████████████████████████████| 72/72 [00:00<00:00, 501.69batch/s]


Validation Loss: 0.1180, Validation MAE: 51.1446, Validation RMSE: 276.6667, Time: 0.15 seconds
Best model saved with validation MAE: 51.1446
----------------------------------------------------------------------------------------------------
Epoch 14/50


Training: 100%|██████████████████████████████████████████████████████████████████| 561/561 [00:04<00:00, 139.20batch/s]


Training Loss: 0.0064, Train MAE: 40.1439, Train RMSE: 64.3792, Time: 4.06 seconds


Validation: 100%|██████████████████████████████████████████████████████████████████| 72/72 [00:00<00:00, 535.26batch/s]


Validation Loss: 0.1184, Validation MAE: 50.9377, Validation RMSE: 277.1622, Time: 0.14 seconds
Best model saved with validation MAE: 50.9377
----------------------------------------------------------------------------------------------------
Epoch 15/50


Training: 100%|██████████████████████████████████████████████████████████████████| 561/561 [00:03<00:00, 171.04batch/s]


Training Loss: 0.0064, Train MAE: 39.9626, Train RMSE: 64.3559, Time: 3.31 seconds


Validation: 100%|██████████████████████████████████████████████████████████████████| 72/72 [00:00<00:00, 545.30batch/s]


Validation Loss: 0.1186, Validation MAE: 50.2738, Validation RMSE: 277.4606, Time: 0.14 seconds
Best model saved with validation MAE: 50.2738
----------------------------------------------------------------------------------------------------
Epoch 16/50


Training: 100%|██████████████████████████████████████████████████████████████████| 561/561 [00:03<00:00, 181.24batch/s]


Training Loss: 0.0065, Train MAE: 40.2525, Train RMSE: 64.5906, Time: 3.13 seconds


Validation: 100%|██████████████████████████████████████████████████████████████████| 72/72 [00:00<00:00, 519.79batch/s]


Validation Loss: 0.1189, Validation MAE: 51.6383, Validation RMSE: 277.7991, Time: 0.15 seconds
----------------------------------------------------------------------------------------------------
Epoch 17/50


Training: 100%|██████████████████████████████████████████████████████████████████| 561/561 [00:04<00:00, 120.07batch/s]


Training Loss: 0.0062, Train MAE: 39.6263, Train RMSE: 63.4793, Time: 4.71 seconds


Validation: 100%|██████████████████████████████████████████████████████████████████| 72/72 [00:00<00:00, 519.10batch/s]


Validation Loss: 0.1184, Validation MAE: 50.8234, Validation RMSE: 277.2400, Time: 0.15 seconds
----------------------------------------------------------------------------------------------------
Epoch 18/50


Training: 100%|██████████████████████████████████████████████████████████████████| 561/561 [00:04<00:00, 137.60batch/s]


Training Loss: 0.0060, Train MAE: 38.9443, Train RMSE: 62.4231, Time: 4.11 seconds


Validation: 100%|██████████████████████████████████████████████████████████████████| 72/72 [00:00<00:00, 531.27batch/s]


Validation Loss: 0.1179, Validation MAE: 50.7314, Validation RMSE: 276.7747, Time: 0.14 seconds
----------------------------------------------------------------------------------------------------
Epoch 19/50


Training: 100%|██████████████████████████████████████████████████████████████████| 561/561 [00:03<00:00, 175.76batch/s]


Training Loss: 0.0061, Train MAE: 39.3935, Train RMSE: 62.6768, Time: 3.22 seconds


Validation: 100%|██████████████████████████████████████████████████████████████████| 72/72 [00:00<00:00, 527.43batch/s]


Validation Loss: 0.1188, Validation MAE: 52.0653, Validation RMSE: 277.7837, Time: 0.14 seconds
----------------------------------------------------------------------------------------------------
Epoch 20/50


Training: 100%|██████████████████████████████████████████████████████████████████| 561/561 [00:03<00:00, 153.34batch/s]


Training Loss: 0.0061, Train MAE: 39.3546, Train RMSE: 62.7950, Time: 3.69 seconds


Validation: 100%|██████████████████████████████████████████████████████████████████| 72/72 [00:00<00:00, 557.62batch/s]


Validation Loss: 0.1178, Validation MAE: 50.6947, Validation RMSE: 276.5936, Time: 0.14 seconds
----------------------------------------------------------------------------------------------------
Epoch 21/50


Training: 100%|██████████████████████████████████████████████████████████████████| 561/561 [00:04<00:00, 137.19batch/s]


Training Loss: 0.0059, Train MAE: 38.8256, Train RMSE: 61.5774, Time: 4.12 seconds


Validation: 100%|██████████████████████████████████████████████████████████████████| 72/72 [00:00<00:00, 551.69batch/s]


Validation Loss: 0.1175, Validation MAE: 51.3237, Validation RMSE: 276.2944, Time: 0.14 seconds
----------------------------------------------------------------------------------------------------
Epoch 22/50


Training: 100%|██████████████████████████████████████████████████████████████████| 561/561 [00:03<00:00, 165.98batch/s]


Training Loss: 0.0059, Train MAE: 38.8025, Train RMSE: 61.8768, Time: 3.41 seconds


Validation: 100%|██████████████████████████████████████████████████████████████████| 72/72 [00:00<00:00, 539.24batch/s]


Validation Loss: 0.1179, Validation MAE: 53.6375, Validation RMSE: 276.6250, Time: 0.14 seconds
----------------------------------------------------------------------------------------------------
Epoch 23/50


Training: 100%|██████████████████████████████████████████████████████████████████| 561/561 [00:03<00:00, 185.70batch/s]


Training Loss: 0.0059, Train MAE: 39.0472, Train RMSE: 61.9343, Time: 3.05 seconds


Validation: 100%|██████████████████████████████████████████████████████████████████| 72/72 [00:00<00:00, 508.75batch/s]


Validation Loss: 0.1183, Validation MAE: 52.8624, Validation RMSE: 277.1772, Time: 0.15 seconds
----------------------------------------------------------------------------------------------------
Epoch 24/50


Training: 100%|██████████████████████████████████████████████████████████████████| 561/561 [00:03<00:00, 147.75batch/s]


Training Loss: 0.0057, Train MAE: 38.5273, Train RMSE: 60.6630, Time: 3.83 seconds


Validation: 100%|██████████████████████████████████████████████████████████████████| 72/72 [00:00<00:00, 531.30batch/s]


Validation Loss: 0.1176, Validation MAE: 51.7838, Validation RMSE: 276.4161, Time: 0.14 seconds
----------------------------------------------------------------------------------------------------
Epoch 25/50


Training: 100%|██████████████████████████████████████████████████████████████████| 561/561 [00:03<00:00, 147.79batch/s]


Training Loss: 0.0056, Train MAE: 38.0998, Train RMSE: 60.0866, Time: 3.83 seconds


Validation: 100%|██████████████████████████████████████████████████████████████████| 72/72 [00:00<00:00, 547.44batch/s]


Validation Loss: 0.1171, Validation MAE: 52.3527, Validation RMSE: 275.7460, Time: 0.14 seconds
----------------------------------------------------------------------------------------------------
Epoch 26/50


Training: 100%|██████████████████████████████████████████████████████████████████| 561/561 [00:03<00:00, 176.65batch/s]


Training Loss: 0.0057, Train MAE: 38.5418, Train RMSE: 60.4929, Time: 3.21 seconds


Validation: 100%|██████████████████████████████████████████████████████████████████| 72/72 [00:00<00:00, 555.90batch/s]


Validation Loss: 0.1176, Validation MAE: 51.3783, Validation RMSE: 276.3436, Time: 0.14 seconds
----------------------------------------------------------------------------------------------------
Epoch 27/50


Training: 100%|██████████████████████████████████████████████████████████████████| 561/561 [00:03<00:00, 172.19batch/s]


Training Loss: 0.0056, Train MAE: 38.0819, Train RMSE: 60.0211, Time: 3.29 seconds


Validation: 100%|██████████████████████████████████████████████████████████████████| 72/72 [00:00<00:00, 504.20batch/s]


Validation Loss: 0.1192, Validation MAE: 51.3862, Validation RMSE: 278.2957, Time: 0.15 seconds
----------------------------------------------------------------------------------------------------
Epoch 28/50


Training: 100%|██████████████████████████████████████████████████████████████████| 561/561 [00:03<00:00, 163.60batch/s]


Training Loss: 0.0054, Train MAE: 37.2941, Train RMSE: 58.8140, Time: 3.46 seconds


Validation: 100%|██████████████████████████████████████████████████████████████████| 72/72 [00:00<00:00, 529.29batch/s]


Validation Loss: 0.1182, Validation MAE: 51.4588, Validation RMSE: 277.0825, Time: 0.14 seconds
----------------------------------------------------------------------------------------------------
Epoch 29/50


Training: 100%|██████████████████████████████████████████████████████████████████| 561/561 [00:03<00:00, 169.81batch/s]


Training Loss: 0.0056, Train MAE: 38.0063, Train RMSE: 60.3610, Time: 3.34 seconds


Validation: 100%|██████████████████████████████████████████████████████████████████| 72/72 [00:00<00:00, 533.19batch/s]


Validation Loss: 0.1180, Validation MAE: 52.0319, Validation RMSE: 276.8269, Time: 0.14 seconds
----------------------------------------------------------------------------------------------------
Epoch 30/50


Training: 100%|██████████████████████████████████████████████████████████████████| 561/561 [00:03<00:00, 179.91batch/s]


Training Loss: 0.0055, Train MAE: 37.8996, Train RMSE: 59.6829, Time: 3.15 seconds


Validation: 100%|██████████████████████████████████████████████████████████████████| 72/72 [00:00<00:00, 555.91batch/s]


Validation Loss: 0.1173, Validation MAE: 51.1603, Validation RMSE: 276.0247, Time: 0.14 seconds
----------------------------------------------------------------------------------------------------
Epoch 31/50


Training: 100%|██████████████████████████████████████████████████████████████████| 561/561 [00:03<00:00, 168.48batch/s]


Training Loss: 0.0053, Train MAE: 37.3026, Train RMSE: 58.5227, Time: 3.36 seconds


Validation: 100%|██████████████████████████████████████████████████████████████████| 72/72 [00:00<00:00, 521.82batch/s]


Validation Loss: 0.1174, Validation MAE: 51.5893, Validation RMSE: 276.1093, Time: 0.14 seconds
----------------------------------------------------------------------------------------------------
Epoch 32/50


Training: 100%|██████████████████████████████████████████████████████████████████| 561/561 [00:04<00:00, 136.27batch/s]


Training Loss: 0.0054, Train MAE: 37.5227, Train RMSE: 58.7955, Time: 4.15 seconds


Validation: 100%|██████████████████████████████████████████████████████████████████| 72/72 [00:00<00:00, 535.25batch/s]


Validation Loss: 0.1185, Validation MAE: 52.2091, Validation RMSE: 277.2760, Time: 0.14 seconds
----------------------------------------------------------------------------------------------------
Epoch 33/50


Training: 100%|██████████████████████████████████████████████████████████████████| 561/561 [00:03<00:00, 159.64batch/s]


Training Loss: 0.0053, Train MAE: 37.2903, Train RMSE: 58.4489, Time: 3.55 seconds


Validation: 100%|██████████████████████████████████████████████████████████████████| 72/72 [00:00<00:00, 535.26batch/s]


Validation Loss: 0.1179, Validation MAE: 53.3705, Validation RMSE: 276.5889, Time: 0.14 seconds
----------------------------------------------------------------------------------------------------
Epoch 34/50


Training: 100%|██████████████████████████████████████████████████████████████████| 561/561 [00:03<00:00, 176.80batch/s]


Training Loss: 0.0053, Train MAE: 37.0449, Train RMSE: 58.2713, Time: 3.21 seconds


Validation: 100%|██████████████████████████████████████████████████████████████████| 72/72 [00:00<00:00, 580.51batch/s]


Validation Loss: 0.1185, Validation MAE: 55.4305, Validation RMSE: 277.4016, Time: 0.13 seconds
----------------------------------------------------------------------------------------------------
Epoch 35/50


Training: 100%|██████████████████████████████████████████████████████████████████| 561/561 [00:02<00:00, 191.45batch/s]


Training Loss: 0.0052, Train MAE: 36.9528, Train RMSE: 57.8889, Time: 2.96 seconds


Validation: 100%|██████████████████████████████████████████████████████████████████| 72/72 [00:00<00:00, 533.20batch/s]


Validation Loss: 0.1176, Validation MAE: 52.7476, Validation RMSE: 276.4343, Time: 0.14 seconds
----------------------------------------------------------------------------------------------------
Epoch 36/50


Training: 100%|██████████████████████████████████████████████████████████████████| 561/561 [00:03<00:00, 185.06batch/s]


Training Loss: 0.0051, Train MAE: 36.8316, Train RMSE: 57.6105, Time: 3.06 seconds


Validation: 100%|██████████████████████████████████████████████████████████████████| 72/72 [00:00<00:00, 529.25batch/s]


Validation Loss: 0.1182, Validation MAE: 50.4375, Validation RMSE: 277.0663, Time: 0.14 seconds
----------------------------------------------------------------------------------------------------
Epoch 37/50


Training: 100%|██████████████████████████████████████████████████████████████████| 561/561 [00:02<00:00, 189.71batch/s]


Training Loss: 0.0051, Train MAE: 36.7943, Train RMSE: 57.4993, Time: 2.99 seconds


Validation: 100%|██████████████████████████████████████████████████████████████████| 72/72 [00:00<00:00, 501.68batch/s]


Validation Loss: 0.1181, Validation MAE: 53.0529, Validation RMSE: 276.9668, Time: 0.15 seconds
----------------------------------------------------------------------------------------------------
Epoch 38/50


Training: 100%|██████████████████████████████████████████████████████████████████| 561/561 [00:02<00:00, 198.62batch/s]


Training Loss: 0.0050, Train MAE: 36.5258, Train RMSE: 56.8315, Time: 2.86 seconds


Validation: 100%|██████████████████████████████████████████████████████████████████| 72/72 [00:00<00:00, 531.31batch/s]


Validation Loss: 0.1179, Validation MAE: 50.2266, Validation RMSE: 276.7844, Time: 0.14 seconds
Best model saved with validation MAE: 50.2266
----------------------------------------------------------------------------------------------------
Epoch 39/50


Training: 100%|██████████████████████████████████████████████████████████████████| 561/561 [00:03<00:00, 164.96batch/s]


Training Loss: 0.0051, Train MAE: 36.8780, Train RMSE: 57.5428, Time: 3.44 seconds


Validation: 100%|██████████████████████████████████████████████████████████████████| 72/72 [00:00<00:00, 512.42batch/s]


Validation Loss: 0.1185, Validation MAE: 50.8972, Validation RMSE: 277.4789, Time: 0.15 seconds
----------------------------------------------------------------------------------------------------
Epoch 40/50


Training: 100%|██████████████████████████████████████████████████████████████████| 561/561 [00:03<00:00, 160.71batch/s]


Training Loss: 0.0049, Train MAE: 36.1142, Train RMSE: 56.0536, Time: 3.52 seconds


Validation: 100%|██████████████████████████████████████████████████████████████████| 72/72 [00:00<00:00, 494.64batch/s]


Validation Loss: 0.1181, Validation MAE: 51.4134, Validation RMSE: 276.7336, Time: 0.15 seconds
----------------------------------------------------------------------------------------------------
Epoch 41/50


Training: 100%|██████████████████████████████████████████████████████████████████| 561/561 [00:03<00:00, 184.08batch/s]


Training Loss: 0.0051, Train MAE: 36.8139, Train RMSE: 57.4192, Time: 3.08 seconds


Validation: 100%|██████████████████████████████████████████████████████████████████| 72/72 [00:00<00:00, 543.34batch/s]


Validation Loss: 0.1181, Validation MAE: 51.3090, Validation RMSE: 276.9925, Time: 0.14 seconds
----------------------------------------------------------------------------------------------------
Epoch 42/50


Training: 100%|██████████████████████████████████████████████████████████████████| 561/561 [00:03<00:00, 177.32batch/s]


Training Loss: 0.0051, Train MAE: 36.9016, Train RMSE: 57.3595, Time: 3.20 seconds


Validation: 100%|██████████████████████████████████████████████████████████████████| 72/72 [00:00<00:00, 483.29batch/s]


Validation Loss: 0.1181, Validation MAE: 51.1930, Validation RMSE: 276.9369, Time: 0.15 seconds
----------------------------------------------------------------------------------------------------
Epoch 43/50


Training: 100%|██████████████████████████████████████████████████████████████████| 561/561 [00:03<00:00, 140.70batch/s]


Training Loss: 0.0051, Train MAE: 36.9107, Train RMSE: 57.5276, Time: 4.02 seconds


Validation: 100%|██████████████████████████████████████████████████████████████████| 72/72 [00:00<00:00, 527.43batch/s]


Validation Loss: 0.1180, Validation MAE: 50.5864, Validation RMSE: 276.7210, Time: 0.14 seconds
----------------------------------------------------------------------------------------------------
Epoch 44/50


Training: 100%|██████████████████████████████████████████████████████████████████| 561/561 [00:03<00:00, 151.40batch/s]


Training Loss: 0.0050, Train MAE: 36.3493, Train RMSE: 56.5331, Time: 3.74 seconds


Validation: 100%|██████████████████████████████████████████████████████████████████| 72/72 [00:00<00:00, 543.35batch/s]


Validation Loss: 0.1182, Validation MAE: 51.6552, Validation RMSE: 277.0395, Time: 0.14 seconds
----------------------------------------------------------------------------------------------------
Epoch 45/50


Training: 100%|██████████████████████████████████████████████████████████████████| 561/561 [00:03<00:00, 179.40batch/s]


Training Loss: 0.0048, Train MAE: 35.6793, Train RMSE: 55.5809, Time: 3.16 seconds


Validation: 100%|██████████████████████████████████████████████████████████████████| 72/72 [00:00<00:00, 528.80batch/s]


Validation Loss: 0.1174, Validation MAE: 50.6805, Validation RMSE: 276.1785, Time: 0.14 seconds
----------------------------------------------------------------------------------------------------
Epoch 46/50


Training: 100%|██████████████████████████████████████████████████████████████████| 561/561 [00:03<00:00, 156.85batch/s]


Training Loss: 0.0050, Train MAE: 36.1223, Train RMSE: 56.6628, Time: 3.61 seconds


Validation: 100%|██████████████████████████████████████████████████████████████████| 72/72 [00:00<00:00, 489.70batch/s]


Validation Loss: 0.1182, Validation MAE: 51.3571, Validation RMSE: 277.0837, Time: 0.15 seconds
----------------------------------------------------------------------------------------------------
Epoch 47/50


Training: 100%|██████████████████████████████████████████████████████████████████| 561/561 [00:04<00:00, 125.08batch/s]


Training Loss: 0.0049, Train MAE: 36.0795, Train RMSE: 55.9723, Time: 4.52 seconds


Validation: 100%|██████████████████████████████████████████████████████████████████| 72/72 [00:00<00:00, 506.71batch/s]


Validation Loss: 0.1191, Validation MAE: 50.6475, Validation RMSE: 276.8963, Time: 0.15 seconds
----------------------------------------------------------------------------------------------------
Epoch 48/50


Training: 100%|██████████████████████████████████████████████████████████████████| 561/561 [00:03<00:00, 163.29batch/s]


Training Loss: 0.0049, Train MAE: 36.1648, Train RMSE: 56.1244, Time: 3.47 seconds


Validation: 100%|██████████████████████████████████████████████████████████████████| 72/72 [00:00<00:00, 467.41batch/s]


Validation Loss: 0.1178, Validation MAE: 51.7616, Validation RMSE: 276.6138, Time: 0.16 seconds
----------------------------------------------------------------------------------------------------
Epoch 49/50


Training: 100%|██████████████████████████████████████████████████████████████████| 561/561 [00:03<00:00, 167.54batch/s]


Training Loss: 0.0048, Train MAE: 35.7612, Train RMSE: 55.7362, Time: 3.38 seconds


Validation: 100%|██████████████████████████████████████████████████████████████████| 72/72 [00:00<00:00, 506.90batch/s]


Validation Loss: 0.1184, Validation MAE: 51.6026, Validation RMSE: 277.2987, Time: 0.15 seconds
----------------------------------------------------------------------------------------------------
Epoch 50/50


Training: 100%|██████████████████████████████████████████████████████████████████| 561/561 [00:03<00:00, 149.69batch/s]


Training Loss: 0.0050, Train MAE: 36.4263, Train RMSE: 56.5243, Time: 3.78 seconds


Validation: 100%|██████████████████████████████████████████████████████████████████| 72/72 [00:00<00:00, 516.08batch/s]

Validation Loss: 0.1180, Validation MAE: 51.8092, Validation RMSE: 276.8065, Time: 0.15 seconds
----------------------------------------------------------------------------------------------------





In [25]:
model1 = MLPRegressionModel(input_dim=input_dim)
model1.to(device)

model1.load_state_dict(torch.load(model_save_path))
model1.to(device)
all_labels = []
all_preds = []

start_time = time.time()

with torch.no_grad():
    for batch in tqdm(test_loader, desc="Testing", unit="batch"):

        inputs = batch['combined_features'].to(device)
        norm_target = batch['norm_target'].to(device)
        targets = batch['target'].to(device)

        outputs = model(inputs)

        loss = criterion(outputs, norm_target.unsqueeze(1))
        
        outputs = denormalize(outputs)
        all_preds.extend(outputs.cpu().tolist())
        all_labels.extend(targets.cpu().tolist())
        
all_labels = np.array(all_labels).flatten()
all_preds = np.array(all_preds).flatten()
        
test_mae = mean_absolute_error(all_labels, all_preds)
test_rmse = rmse(all_labels, all_preds)

end_time = time.time()  
epoch_duration = end_time - start_time

print(f'Testing MAE: {test_mae:.4f}, Testing RMSE: {test_rmse:.4f}, Time: {epoch_duration:.2f} seconds')

Testing: 100%|█████████████████████████████████████████████████████████████████████| 72/72 [00:00<00:00, 523.33batch/s]

Testing MAE: 46.3191, Testing RMSE: 137.0980, Time: 0.14 seconds



