In [15]:
import torch
import torch.nn as nn
import numpy as np
from sklearn.metrics import mean_squared_error, mean_absolute_error
from sklearn.model_selection import LeaveOneOut
from transformers import DistilBertModel, DistilBertTokenizer
from torch.utils.data import TensorDataset, DataLoader, RandomSampler, SequentialSampler
import pandas as pd
from tqdm import tqdm
from sklearn.model_selection import KFold


data = pd.read_csv('mohler_dataset_edited.csv')

# Load DistilBERT tokenizer and model
tokenizer = DistilBertTokenizer.from_pretrained('distilbert-base-uncased')

# Define device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Tokenize and prepare data
def tokenize_data(data, max_seq_length=128):
    input_ids = []
    attention_masks = []
    scores_list = []

    for i in range(len(data)):
        answer1_tokens = tokenizer.encode(data['desired_answer'].iloc[i], add_special_tokens=True)
        answer2_tokens = tokenizer.encode(data['student_answer'].iloc[i], add_special_tokens=True)

        # Truncate or pad the tokens to the specified max sequence length
        answer1_tokens = answer1_tokens[:max_seq_length-2]  # -2 for [CLS] and [SEP] tokens
        answer2_tokens = answer2_tokens[:max_seq_length-2]  # -2 for [CLS] and [SEP] tokens

        # Add [CLS] and [SEP] tokens
        answer1_tokens = [101] + answer1_tokens + [102]
        answer2_tokens = [101] + answer2_tokens + [102]

        # Padding
        padding_length = max_seq_length - len(answer1_tokens)
        padded_answer1 = answer1_tokens + [0] * padding_length
        padding_length = max_seq_length - len(answer2_tokens)
        padded_answer2 = answer2_tokens + [0] * padding_length

        input_ids.append(padded_answer1)
        attention_masks.append([1] * len(padded_answer1))

        input_ids.append(padded_answer2)
        attention_masks.append([1] * len(padded_answer2))

        # Append the score twice for both answer1 and answer2
        scores_list.append(data['score_avg'].iloc[i])
        scores_list.append(data['score_avg'].iloc[i])

    return torch.tensor(input_ids), torch.tensor(attention_masks), torch.tensor(scores_list, dtype=torch.float32)

In [16]:
class DistilBertRegression(nn.Module):
    def __init__(self):
        super(DistilBertRegression, self).__init__()
        self.distilbert = DistilBertModel.from_pretrained('distilbert-base-uncased')
        self.linear = nn.Linear(self.distilbert.config.hidden_size, 1)

    def forward(self, input_ids, attention_mask):
        outputs = self.distilbert(input_ids=input_ids, attention_mask=attention_mask)
        pooled_output = outputs[0][:, 0, :]  
        return self.linear(pooled_output)

In [18]:
learning_rate = 2e-5
batch_size = 16
epochs = 12
weight_decay = 0.01  

kf = KFold(n_splits=10, shuffle=True)

models = {
    'DistilBERT': DistilBertRegression,
    # Add RoBERTa and BERT models here
}

evaluation_results = []

shuffled_data = data.sample(frac=1).reset_index(drop=True)

for model_name, ModelClass in models.items():  
    print(f"Evaluating using {model_name} model...")

    total_rmse = 0
    total_mae = 0
    total_nrmse = 0

    fold_rmse = []
    fold_mae = []
    fold_nrmse = []

    for fold_idx, (train_index, val_index) in enumerate(kf.split(shuffled_data), 1):
        train_data = shuffled_data.iloc[train_index]
        val_data = shuffled_data.iloc[val_index]

        train_input_ids, train_attention_masks, train_scores = tokenize_data(train_data)
        val_input_ids, val_attention_masks, val_scores = tokenize_data(val_data)

        train_dataset = TensorDataset(train_input_ids, train_attention_masks, train_scores)
        train_sampler = RandomSampler(train_dataset)
        train_dataloader = DataLoader(train_dataset, sampler=train_sampler, batch_size=16)

        val_dataset = TensorDataset(val_input_ids, val_attention_masks, val_scores)
        val_sampler = SequentialSampler(val_dataset)
        val_dataloader = DataLoader(val_dataset, sampler=val_sampler, batch_size=16)

        model = ModelClass().to(device)
        criterion = nn.MSELoss()
        optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate, weight_decay=weight_decay)  

        for epoch in tqdm(range(epochs), desc=f"Fold {fold_idx}/{kf.get_n_splits()}"):  
            model.train()
            total_loss = 0

            for batch in train_dataloader:
                batch = tuple(t.to(device) for t in batch)
                inputs, masks, labels = batch

                optimizer.zero_grad()
                outputs = model(inputs, masks)
                loss = criterion(outputs.squeeze(-1), labels)
                loss.backward()
                optimizer.step()

                total_loss += loss.item()

            avg_train_loss = total_loss / len(train_dataloader)

            model.eval()
            all_predictions = []
            all_labels = []
            with torch.no_grad():
                for val_batch in val_dataloader:
                    val_batch = tuple(t.to(device) for t in val_batch)
                    val_inputs, val_masks, val_labels = val_batch
                    val_outputs = model(val_inputs, val_masks)
                    val_predictions = val_outputs.squeeze(-1).detach().cpu().numpy()
                    all_predictions.extend(val_predictions)
                    all_labels.extend(val_labels.detach().cpu().numpy())

                val_rmse = np.sqrt(mean_squared_error(all_labels, all_predictions))
                val_mae = mean_absolute_error(all_labels, all_predictions)
                val_nrmse = val_rmse / (max(all_labels) - min(all_labels))  

                print(f"Fold {fold_idx}/{kf.get_n_splits()}: Epoch {epoch + 1}/{epochs}, RMSE: {val_rmse:.4f}, MAE: {val_mae:.4f}, NRMSE: {val_nrmse:.4f}")

            fold_rmse.append(val_rmse)
            fold_mae.append(val_mae)
            fold_nrmse.append(val_nrmse)

    avg_rmse = np.mean(fold_rmse)
    avg_mae = np.mean(fold_mae)
    avg_nrmse = np.mean(fold_nrmse)

    model_eval_results = {
        f'{model_name}_RMSE': avg_rmse,
        f'{model_name}_MAE': avg_mae,
        f'{model_name}_NRMSE': avg_nrmse
    }
    evaluation_results.append(model_eval_results)

df = pd.DataFrame(evaluation_results)
print(df)


Evaluating using DistilBERT model...


Fold 1/10:   8%|▊         | 1/12 [00:53<09:43, 53.02s/it]

Fold 1/10: Epoch 1/12, RMSE: 1.2094, MAE: 1.0254, NRMSE: 0.2419


Fold 1/10:  17%|█▋        | 2/12 [01:44<08:40, 52.05s/it]

Fold 1/10: Epoch 2/12, RMSE: 1.1849, MAE: 0.9639, NRMSE: 0.2370


Fold 1/10:  25%|██▌       | 3/12 [02:36<07:47, 51.98s/it]

Fold 1/10: Epoch 3/12, RMSE: 1.2058, MAE: 0.9122, NRMSE: 0.2412


Fold 1/10:  33%|███▎      | 4/12 [03:28<06:55, 51.92s/it]

Fold 1/10: Epoch 4/12, RMSE: 1.1844, MAE: 0.9511, NRMSE: 0.2369


Fold 1/10:  42%|████▏     | 5/12 [04:20<06:03, 51.95s/it]

Fold 1/10: Epoch 5/12, RMSE: 1.1869, MAE: 0.9315, NRMSE: 0.2374


Fold 1/10:  50%|█████     | 6/12 [05:11<05:11, 51.87s/it]

Fold 1/10: Epoch 6/12, RMSE: 1.1815, MAE: 0.9423, NRMSE: 0.2363


Fold 1/10:  58%|█████▊    | 7/12 [06:03<04:18, 51.79s/it]

Fold 1/10: Epoch 7/12, RMSE: 1.1839, MAE: 0.9340, NRMSE: 0.2368


Fold 1/10:  67%|██████▋   | 8/12 [06:55<03:27, 51.81s/it]

Fold 1/10: Epoch 8/12, RMSE: 1.2071, MAE: 0.8972, NRMSE: 0.2414


Fold 1/10:  75%|███████▌  | 9/12 [07:47<02:35, 51.82s/it]

Fold 1/10: Epoch 9/12, RMSE: 1.1819, MAE: 0.8607, NRMSE: 0.2364


Fold 1/10:  83%|████████▎ | 10/12 [08:39<01:43, 51.86s/it]

Fold 1/10: Epoch 10/12, RMSE: 1.1430, MAE: 0.9424, NRMSE: 0.2286


Fold 1/10:  92%|█████████▏| 11/12 [09:31<00:51, 51.90s/it]

Fold 1/10: Epoch 11/12, RMSE: 1.1411, MAE: 0.9098, NRMSE: 0.2282


Fold 1/10: 100%|██████████| 12/12 [10:23<00:00, 51.92s/it]

Fold 1/10: Epoch 12/12, RMSE: 1.1365, MAE: 0.8609, NRMSE: 0.2273



Fold 2/10:   8%|▊         | 1/12 [00:52<09:32, 52.01s/it]

Fold 2/10: Epoch 1/12, RMSE: 1.1291, MAE: 0.8767, NRMSE: 0.2258


Fold 2/10:  17%|█▋        | 2/12 [01:43<08:38, 51.87s/it]

Fold 2/10: Epoch 2/12, RMSE: 1.1266, MAE: 0.8862, NRMSE: 0.2253


Fold 2/10:  25%|██▌       | 3/12 [02:35<07:46, 51.81s/it]

Fold 2/10: Epoch 3/12, RMSE: 1.1338, MAE: 0.8652, NRMSE: 0.2268


Fold 2/10:  33%|███▎      | 4/12 [03:27<06:54, 51.83s/it]

Fold 2/10: Epoch 4/12, RMSE: 1.1290, MAE: 0.8779, NRMSE: 0.2258


Fold 2/10:  42%|████▏     | 5/12 [04:19<06:03, 51.88s/it]

Fold 2/10: Epoch 5/12, RMSE: 1.1458, MAE: 0.8467, NRMSE: 0.2292


Fold 2/10:  50%|█████     | 6/12 [05:11<05:11, 51.90s/it]

Fold 2/10: Epoch 6/12, RMSE: 1.1266, MAE: 0.8846, NRMSE: 0.2253


Fold 2/10:  58%|█████▊    | 7/12 [06:03<04:19, 51.91s/it]

Fold 2/10: Epoch 7/12, RMSE: 1.1253, MAE: 0.8936, NRMSE: 0.2251


Fold 2/10:  67%|██████▋   | 8/12 [06:55<03:27, 51.91s/it]

Fold 2/10: Epoch 8/12, RMSE: 1.1274, MAE: 0.9223, NRMSE: 0.2255


Fold 2/10:  75%|███████▌  | 9/12 [07:47<02:35, 51.90s/it]

Fold 2/10: Epoch 9/12, RMSE: 1.1258, MAE: 0.8914, NRMSE: 0.2252


Fold 2/10:  83%|████████▎ | 10/12 [08:38<01:43, 51.89s/it]

Fold 2/10: Epoch 10/12, RMSE: 1.1291, MAE: 0.8770, NRMSE: 0.2258


Fold 2/10:  92%|█████████▏| 11/12 [09:30<00:51, 51.88s/it]

Fold 2/10: Epoch 11/12, RMSE: 1.1293, MAE: 0.8751, NRMSE: 0.2259


Fold 2/10: 100%|██████████| 12/12 [10:22<00:00, 51.88s/it]

Fold 2/10: Epoch 12/12, RMSE: 1.1518, MAE: 0.8398, NRMSE: 0.2304



Fold 3/10:   8%|▊         | 1/12 [00:52<09:32, 52.00s/it]

Fold 3/10: Epoch 1/12, RMSE: 0.9768, MAE: 0.7843, NRMSE: 0.2442


Fold 3/10:  17%|█▋        | 2/12 [01:43<08:38, 51.81s/it]

Fold 3/10: Epoch 2/12, RMSE: 0.9830, MAE: 0.8384, NRMSE: 0.2458


Fold 3/10:  25%|██▌       | 3/12 [02:35<07:47, 51.90s/it]

Fold 3/10: Epoch 3/12, RMSE: 0.9953, MAE: 0.8706, NRMSE: 0.2488


Fold 3/10:  33%|███▎      | 4/12 [03:27<06:55, 51.99s/it]

Fold 3/10: Epoch 4/12, RMSE: 0.9423, MAE: 0.7931, NRMSE: 0.2356


Fold 3/10:  42%|████▏     | 5/12 [04:19<06:04, 52.01s/it]

Fold 3/10: Epoch 5/12, RMSE: 0.9647, MAE: 0.8339, NRMSE: 0.2412


Fold 3/10:  50%|█████     | 6/12 [05:11<05:12, 52.02s/it]

Fold 3/10: Epoch 6/12, RMSE: 0.8721, MAE: 0.6853, NRMSE: 0.2180


Fold 3/10:  58%|█████▊    | 7/12 [06:03<04:20, 52.00s/it]

Fold 3/10: Epoch 7/12, RMSE: 0.8603, MAE: 0.6601, NRMSE: 0.2151


Fold 3/10:  67%|██████▋   | 8/12 [06:55<03:27, 52.00s/it]

Fold 3/10: Epoch 8/12, RMSE: 0.8531, MAE: 0.6820, NRMSE: 0.2133


Fold 3/10:  75%|███████▌  | 9/12 [07:47<02:35, 51.98s/it]

Fold 3/10: Epoch 9/12, RMSE: 0.8653, MAE: 0.6918, NRMSE: 0.2163


Fold 3/10:  83%|████████▎ | 10/12 [08:39<01:43, 51.97s/it]

Fold 3/10: Epoch 10/12, RMSE: 0.8271, MAE: 0.6217, NRMSE: 0.2068


Fold 3/10:  92%|█████████▏| 11/12 [09:31<00:51, 51.96s/it]

Fold 3/10: Epoch 11/12, RMSE: 0.9528, MAE: 0.8247, NRMSE: 0.2382


Fold 3/10: 100%|██████████| 12/12 [10:23<00:00, 51.97s/it]

Fold 3/10: Epoch 12/12, RMSE: 0.8643, MAE: 0.6293, NRMSE: 0.2161



Fold 4/10:   8%|▊         | 1/12 [00:52<09:32, 52.02s/it]

Fold 4/10: Epoch 1/12, RMSE: 1.0243, MAE: 0.9013, NRMSE: 0.2927


Fold 4/10:  17%|█▋        | 2/12 [01:43<08:37, 51.79s/it]

Fold 4/10: Epoch 2/12, RMSE: 1.0345, MAE: 0.7872, NRMSE: 0.2956


Fold 4/10:  25%|██▌       | 3/12 [02:35<07:46, 51.81s/it]

Fold 4/10: Epoch 3/12, RMSE: 1.0335, MAE: 0.7447, NRMSE: 0.2953


Fold 4/10:  33%|███▎      | 4/12 [03:27<06:54, 51.87s/it]

Fold 4/10: Epoch 4/12, RMSE: 0.9644, MAE: 0.7899, NRMSE: 0.2755


Fold 4/10:  42%|████▏     | 5/12 [04:19<06:03, 51.93s/it]

Fold 4/10: Epoch 5/12, RMSE: 0.9544, MAE: 0.7047, NRMSE: 0.2727


Fold 4/10:  50%|█████     | 6/12 [05:11<05:11, 51.99s/it]

Fold 4/10: Epoch 6/12, RMSE: 0.9713, MAE: 0.7393, NRMSE: 0.2775


Fold 4/10:  58%|█████▊    | 7/12 [06:03<04:20, 52.04s/it]

Fold 4/10: Epoch 7/12, RMSE: 1.0658, MAE: 0.7487, NRMSE: 0.3045


Fold 4/10:  67%|██████▋   | 8/12 [06:55<03:28, 52.07s/it]

Fold 4/10: Epoch 8/12, RMSE: 0.9207, MAE: 0.7322, NRMSE: 0.2631


Fold 4/10:  75%|███████▌  | 9/12 [07:47<02:36, 52.06s/it]

Fold 4/10: Epoch 9/12, RMSE: 0.9901, MAE: 0.7586, NRMSE: 0.2829


Fold 4/10:  83%|████████▎ | 10/12 [08:39<01:44, 52.05s/it]

Fold 4/10: Epoch 10/12, RMSE: 0.9585, MAE: 0.7071, NRMSE: 0.2739


Fold 4/10:  92%|█████████▏| 11/12 [09:31<00:52, 52.03s/it]

Fold 4/10: Epoch 11/12, RMSE: 1.0451, MAE: 0.8433, NRMSE: 0.2986


Fold 4/10: 100%|██████████| 12/12 [10:23<00:00, 52.00s/it]

Fold 4/10: Epoch 12/12, RMSE: 0.9611, MAE: 0.7278, NRMSE: 0.2746



Fold 5/10:   8%|▊         | 1/12 [00:52<09:32, 52.00s/it]

Fold 5/10: Epoch 1/12, RMSE: 1.0644, MAE: 0.8924, NRMSE: 0.2129


Fold 5/10:  17%|█▋        | 2/12 [01:43<08:37, 51.80s/it]

Fold 5/10: Epoch 2/12, RMSE: 1.0624, MAE: 0.8883, NRMSE: 0.2125


Fold 5/10:  25%|██▌       | 3/12 [02:35<07:45, 51.75s/it]

Fold 5/10: Epoch 3/12, RMSE: 1.0631, MAE: 0.8841, NRMSE: 0.2126


Fold 5/10:  33%|███▎      | 4/12 [03:27<06:54, 51.83s/it]

Fold 5/10: Epoch 4/12, RMSE: 1.0700, MAE: 0.8678, NRMSE: 0.2140


Fold 5/10:  42%|████▏     | 5/12 [04:19<06:03, 51.91s/it]

Fold 5/10: Epoch 5/12, RMSE: 1.0709, MAE: 0.8580, NRMSE: 0.2142


Fold 5/10:  50%|█████     | 6/12 [05:11<05:11, 51.94s/it]

Fold 5/10: Epoch 6/12, RMSE: 1.1035, MAE: 0.8033, NRMSE: 0.2207


Fold 5/10:  58%|█████▊    | 7/12 [06:03<04:19, 51.95s/it]

Fold 5/10: Epoch 7/12, RMSE: 0.9953, MAE: 0.8093, NRMSE: 0.1991


Fold 5/10:  67%|██████▋   | 8/12 [06:55<03:27, 51.98s/it]

Fold 5/10: Epoch 8/12, RMSE: 1.0172, MAE: 0.6803, NRMSE: 0.2034


Fold 5/10:  75%|███████▌  | 9/12 [07:47<02:35, 51.99s/it]

Fold 5/10: Epoch 9/12, RMSE: 0.9341, MAE: 0.6795, NRMSE: 0.1868


Fold 5/10:  83%|████████▎ | 10/12 [08:39<01:43, 51.99s/it]

Fold 5/10: Epoch 10/12, RMSE: 0.9295, MAE: 0.6814, NRMSE: 0.1859


Fold 5/10:  92%|█████████▏| 11/12 [09:31<00:51, 52.00s/it]

Fold 5/10: Epoch 11/12, RMSE: 0.9420, MAE: 0.6820, NRMSE: 0.1884


Fold 5/10: 100%|██████████| 12/12 [10:23<00:00, 51.95s/it]

Fold 5/10: Epoch 12/12, RMSE: 0.9451, MAE: 0.6776, NRMSE: 0.1890



Fold 6/10:   8%|▊         | 1/12 [00:52<09:32, 52.05s/it]

Fold 6/10: Epoch 1/12, RMSE: 1.0910, MAE: 0.7975, NRMSE: 0.2182


Fold 6/10:  17%|█▋        | 2/12 [01:44<08:40, 52.03s/it]

Fold 6/10: Epoch 2/12, RMSE: 1.0692, MAE: 0.8434, NRMSE: 0.2138


Fold 6/10:  25%|██▌       | 3/12 [02:35<07:47, 51.90s/it]

Fold 6/10: Epoch 3/12, RMSE: 1.0805, MAE: 0.8131, NRMSE: 0.2161


Fold 6/10:  33%|███▎      | 4/12 [03:27<06:54, 51.82s/it]

Fold 6/10: Epoch 4/12, RMSE: 1.0755, MAE: 0.9002, NRMSE: 0.2151


Fold 6/10:  42%|████▏     | 5/12 [04:19<06:02, 51.83s/it]

Fold 6/10: Epoch 5/12, RMSE: 1.0579, MAE: 0.8532, NRMSE: 0.2116


Fold 6/10:  50%|█████     | 6/12 [05:11<05:11, 51.88s/it]

Fold 6/10: Epoch 6/12, RMSE: 1.0154, MAE: 0.8134, NRMSE: 0.2031


Fold 6/10:  58%|█████▊    | 7/12 [06:03<04:19, 51.90s/it]

Fold 6/10: Epoch 7/12, RMSE: 1.0619, MAE: 0.7033, NRMSE: 0.2124


Fold 6/10:  67%|██████▋   | 8/12 [06:55<03:27, 51.91s/it]

Fold 6/10: Epoch 8/12, RMSE: 0.9873, MAE: 0.6819, NRMSE: 0.1975


Fold 6/10:  75%|███████▌  | 9/12 [07:47<02:35, 51.93s/it]

Fold 6/10: Epoch 9/12, RMSE: 0.9533, MAE: 0.6615, NRMSE: 0.1907


Fold 6/10:  83%|████████▎ | 10/12 [08:39<01:43, 51.93s/it]

Fold 6/10: Epoch 10/12, RMSE: 0.9803, MAE: 0.7585, NRMSE: 0.1961


Fold 6/10:  92%|█████████▏| 11/12 [09:31<00:51, 51.94s/it]

Fold 6/10: Epoch 11/12, RMSE: 1.0823, MAE: 0.8416, NRMSE: 0.2165


Fold 6/10: 100%|██████████| 12/12 [10:23<00:00, 51.92s/it]

Fold 6/10: Epoch 12/12, RMSE: 0.9393, MAE: 0.7223, NRMSE: 0.1879



Fold 7/10:   8%|▊         | 1/12 [00:51<09:31, 51.98s/it]

Fold 7/10: Epoch 1/12, RMSE: 1.2045, MAE: 0.8758, NRMSE: 0.2409


Fold 7/10:  17%|█▋        | 2/12 [01:43<08:38, 51.87s/it]

Fold 7/10: Epoch 2/12, RMSE: 1.1898, MAE: 0.8863, NRMSE: 0.2380


Fold 7/10:  25%|██▌       | 3/12 [02:35<07:45, 51.78s/it]

Fold 7/10: Epoch 3/12, RMSE: 1.1636, MAE: 0.9264, NRMSE: 0.2327


Fold 7/10:  33%|███▎      | 4/12 [03:27<06:54, 51.79s/it]

Fold 7/10: Epoch 4/12, RMSE: 1.1550, MAE: 0.9207, NRMSE: 0.2310


Fold 7/10:  42%|████▏     | 5/12 [04:19<06:02, 51.82s/it]

Fold 7/10: Epoch 5/12, RMSE: 1.1284, MAE: 0.7978, NRMSE: 0.2257


Fold 7/10:  50%|█████     | 6/12 [05:11<05:11, 51.86s/it]

Fold 7/10: Epoch 6/12, RMSE: 1.0450, MAE: 0.7806, NRMSE: 0.2090


Fold 7/10:  58%|█████▊    | 7/12 [06:02<04:19, 51.86s/it]

Fold 7/10: Epoch 7/12, RMSE: 1.0185, MAE: 0.7743, NRMSE: 0.2037


Fold 7/10:  67%|██████▋   | 8/12 [06:54<03:27, 51.87s/it]

Fold 7/10: Epoch 8/12, RMSE: 1.0054, MAE: 0.7483, NRMSE: 0.2011


Fold 7/10:  75%|███████▌  | 9/12 [07:46<02:35, 51.86s/it]

Fold 7/10: Epoch 9/12, RMSE: 1.0479, MAE: 0.7046, NRMSE: 0.2096


Fold 7/10:  83%|████████▎ | 10/12 [08:38<01:43, 51.87s/it]

Fold 7/10: Epoch 10/12, RMSE: 1.0034, MAE: 0.7524, NRMSE: 0.2007


Fold 7/10:  92%|█████████▏| 11/12 [09:30<00:51, 51.88s/it]

Fold 7/10: Epoch 11/12, RMSE: 1.0222, MAE: 0.7015, NRMSE: 0.2044


Fold 7/10: 100%|██████████| 12/12 [10:22<00:00, 51.86s/it]

Fold 7/10: Epoch 12/12, RMSE: 1.0182, MAE: 0.7228, NRMSE: 0.2036



Fold 8/10:   8%|▊         | 1/12 [00:51<09:31, 52.00s/it]

Fold 8/10: Epoch 1/12, RMSE: 1.1968, MAE: 0.9425, NRMSE: 0.2394


Fold 8/10:  17%|█▋        | 2/12 [01:43<08:38, 51.81s/it]

Fold 8/10: Epoch 2/12, RMSE: 1.2930, MAE: 0.9118, NRMSE: 0.2586


Fold 8/10:  25%|██▌       | 3/12 [02:35<07:45, 51.73s/it]

Fold 8/10: Epoch 3/12, RMSE: 1.2089, MAE: 0.9354, NRMSE: 0.2418


Fold 8/10:  33%|███▎      | 4/12 [03:26<06:53, 51.70s/it]

Fold 8/10: Epoch 4/12, RMSE: 1.2005, MAE: 0.9428, NRMSE: 0.2401


Fold 8/10:  42%|████▏     | 5/12 [04:18<06:01, 51.70s/it]

Fold 8/10: Epoch 5/12, RMSE: 1.2021, MAE: 0.9409, NRMSE: 0.2404


Fold 8/10:  50%|█████     | 6/12 [05:10<05:10, 51.71s/it]

Fold 8/10: Epoch 6/12, RMSE: 1.2675, MAE: 0.9090, NRMSE: 0.2535


Fold 8/10:  58%|█████▊    | 7/12 [06:02<04:18, 51.70s/it]

Fold 8/10: Epoch 7/12, RMSE: 1.1413, MAE: 0.8702, NRMSE: 0.2283


Fold 8/10:  67%|██████▋   | 8/12 [06:53<03:26, 51.74s/it]

Fold 8/10: Epoch 8/12, RMSE: 1.1656, MAE: 0.8099, NRMSE: 0.2331


Fold 8/10:  75%|███████▌  | 9/12 [07:45<02:35, 51.77s/it]

Fold 8/10: Epoch 9/12, RMSE: 1.0466, MAE: 0.8386, NRMSE: 0.2093


Fold 8/10:  83%|████████▎ | 10/12 [08:37<01:43, 51.79s/it]

Fold 8/10: Epoch 10/12, RMSE: 1.0382, MAE: 0.8321, NRMSE: 0.2076


Fold 8/10:  92%|█████████▏| 11/12 [09:29<00:51, 51.80s/it]

Fold 8/10: Epoch 11/12, RMSE: 1.0627, MAE: 0.7554, NRMSE: 0.2125


Fold 8/10: 100%|██████████| 12/12 [10:21<00:00, 51.77s/it]

Fold 8/10: Epoch 12/12, RMSE: 1.0826, MAE: 0.7425, NRMSE: 0.2165



Fold 9/10:   8%|▊         | 1/12 [00:51<09:31, 51.99s/it]

Fold 9/10: Epoch 1/12, RMSE: 1.2121, MAE: 0.8433, NRMSE: 0.2424


Fold 9/10:  17%|█▋        | 2/12 [01:43<08:39, 51.92s/it]

Fold 9/10: Epoch 2/12, RMSE: 1.1292, MAE: 0.8397, NRMSE: 0.2258


Fold 9/10:  25%|██▌       | 3/12 [02:35<07:46, 51.81s/it]

Fold 9/10: Epoch 3/12, RMSE: 1.0849, MAE: 0.8299, NRMSE: 0.2170


Fold 9/10:  33%|███▎      | 4/12 [03:27<06:54, 51.82s/it]

Fold 9/10: Epoch 4/12, RMSE: 1.0467, MAE: 0.7946, NRMSE: 0.2093


Fold 9/10:  42%|████▏     | 5/12 [04:19<06:02, 51.85s/it]

Fold 9/10: Epoch 5/12, RMSE: 1.0662, MAE: 0.8146, NRMSE: 0.2132


Fold 9/10:  50%|█████     | 6/12 [05:11<05:11, 51.88s/it]

Fold 9/10: Epoch 6/12, RMSE: 1.0362, MAE: 0.7123, NRMSE: 0.2072


Fold 9/10:  58%|█████▊    | 7/12 [06:03<04:19, 51.91s/it]

Fold 9/10: Epoch 7/12, RMSE: 1.0161, MAE: 0.7290, NRMSE: 0.2032


Fold 9/10:  67%|██████▋   | 8/12 [06:55<03:27, 51.93s/it]

Fold 9/10: Epoch 8/12, RMSE: 1.0130, MAE: 0.6765, NRMSE: 0.2026


Fold 9/10:  75%|███████▌  | 9/12 [07:47<02:35, 51.95s/it]

Fold 9/10: Epoch 9/12, RMSE: 1.0209, MAE: 0.6689, NRMSE: 0.2042


Fold 9/10:  83%|████████▎ | 10/12 [08:39<01:43, 51.97s/it]

Fold 9/10: Epoch 10/12, RMSE: 1.0353, MAE: 0.6699, NRMSE: 0.2071


Fold 9/10:  92%|█████████▏| 11/12 [09:31<00:51, 51.97s/it]

Fold 9/10: Epoch 11/12, RMSE: 1.0407, MAE: 0.6665, NRMSE: 0.2081


Fold 9/10: 100%|██████████| 12/12 [10:23<00:00, 51.93s/it]

Fold 9/10: Epoch 12/12, RMSE: 1.0219, MAE: 0.7287, NRMSE: 0.2044



Fold 10/10:   8%|▊         | 1/12 [00:52<09:32, 52.07s/it]

Fold 10/10: Epoch 1/12, RMSE: 1.0941, MAE: 0.9100, NRMSE: 0.2188


Fold 10/10:  17%|█▋        | 2/12 [01:43<08:38, 51.81s/it]

Fold 10/10: Epoch 2/12, RMSE: 1.0903, MAE: 0.8895, NRMSE: 0.2181


Fold 10/10:  25%|██▌       | 3/12 [02:35<07:46, 51.82s/it]

Fold 10/10: Epoch 3/12, RMSE: 1.1127, MAE: 0.9703, NRMSE: 0.2225


Fold 10/10:  33%|███▎      | 4/12 [03:27<06:55, 51.90s/it]

Fold 10/10: Epoch 4/12, RMSE: 1.0621, MAE: 0.8877, NRMSE: 0.2124


Fold 10/10:  42%|████▏     | 5/12 [04:19<06:03, 51.99s/it]

Fold 10/10: Epoch 5/12, RMSE: 0.9966, MAE: 0.6963, NRMSE: 0.1993


Fold 10/10:  50%|█████     | 6/12 [05:11<05:12, 52.06s/it]

Fold 10/10: Epoch 6/12, RMSE: 1.0272, MAE: 0.7104, NRMSE: 0.2054


Fold 10/10:  58%|█████▊    | 7/12 [06:03<04:20, 52.06s/it]

Fold 10/10: Epoch 7/12, RMSE: 1.0042, MAE: 0.7496, NRMSE: 0.2008


Fold 10/10:  67%|██████▋   | 8/12 [06:56<03:28, 52.06s/it]

Fold 10/10: Epoch 8/12, RMSE: 1.0040, MAE: 0.7911, NRMSE: 0.2008


Fold 10/10:  75%|███████▌  | 9/12 [07:48<02:36, 52.07s/it]

Fold 10/10: Epoch 9/12, RMSE: 1.0223, MAE: 0.6971, NRMSE: 0.2045


Fold 10/10:  83%|████████▎ | 10/12 [08:40<01:44, 52.07s/it]

Fold 10/10: Epoch 10/12, RMSE: 1.0570, MAE: 0.7730, NRMSE: 0.2114


Fold 10/10:  92%|█████████▏| 11/12 [09:32<00:52, 52.07s/it]

Fold 10/10: Epoch 11/12, RMSE: 0.9931, MAE: 0.6722, NRMSE: 0.1986


Fold 10/10: 100%|██████████| 12/12 [10:24<00:00, 52.03s/it]

Fold 10/10: Epoch 12/12, RMSE: 1.0042, MAE: 0.6846, NRMSE: 0.2008
   DistilBERT_RMSE  DistilBERT_MAE  DistilBERT_NRMSE
0         1.060811        0.808247          0.225245



