<a href="https://colab.research.google.com/github/shmuhammadd/semantic_relatedness/blob/main/Simple_English_Baseline_v2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Package Imports

In [103]:
import pandas as pd
import re
from scipy.stats import spearmanr, pearsonr, linregress
import Levenshtein
import matplotlib.pyplot as plt
import torch
import torch.nn.functional as F
from sklearn.preprocessing import StandardScaler
from transformers import AutoTokenizer, AutoModel
plt.style.use('ggplot')

# Data Import / Format / Export

Functions for importing, formatting, and exporting data

In [104]:
# Load data from csv, format into proper split
def load_data(filepath):
    data = pd.read_csv(filepath)
    data['Split_Text'] = data['Text'].apply(lambda x: x.split("\n"))
    data['Pred_Score'] = 0.0
    return data

# Preprocessing

In [105]:
def jaccard_similarity(s1, s2):
    set1, set2 = set(s1), set(s2)
    return len(set1.intersection(set2)) / len(set1.union(set2))

In [106]:
def word_overlap(s1, s2):
    set1, set2 = set(s1), set(s2)
    return len(set1.intersection(set2)) / len(set1)

In [107]:
def dice_score(s1,s2):
  s1 = s1.lower()
  s1_split = re.findall(r"\w+|[^\w\s]", s1, re.UNICODE)

  s2 = s2.lower()
  s2_split = re.findall(r"\w+|[^\w\s]", s2, re.UNICODE)

  dice_coef = len(set(s1_split).intersection(set(s2_split))) / (len(set(s1_split)) + len(set(s2_split)))
  return round(dice_coef, 2)

In [108]:
# Additional features added to RoBERTa embeddings
def compute_custom_metrics(row):
    metrics = {}
    cosine_sim = F.cosine_similarity(row["Embedding1"].unsqueeze(0), row["Embedding2"].unsqueeze(0))
    metrics["Cosine_Similarity"] = cosine_sim.item()

    set1 = set(row["Sentence1"].split())
    set2 = set(row["Sentence2"].split())
    jaccard_sim = len(set1.intersection(set2)) / len(set1.union(set2)) if len(set1.union(set2)) > 0 else 0
    metrics["Jaccard_Similarity"] = jaccard_sim

    metrics["Length_Diff"] = abs(len(row["Sentence1"].split()) - len(row["Sentence2"].split()))

    metrics['Levenshtein_Distance'] = Levenshtein.distance(row['Sentence1'], row['Sentence2'])

    word_overlap_score = word_overlap(row["Sentence1"].split(), row["Sentence2"].split())
    metrics['Word_Overlap'] = word_overlap_score
    
    dice = dice_score(row["Sentence1"], row["Sentence2"])
    metrics['Dice_Score'] = dice

    return metrics

In [109]:
tokenizer = AutoTokenizer.from_pretrained("roberta-base")
model = AutoModel.from_pretrained("roberta-base")

Some weights of RobertaModel were not initialized from the model checkpoint at roberta-base and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [110]:
# Needed batch sizes due to memory issues
def get_roberta_embeddings(sentences, batch_size=32):
    embeddings_list = []
    for i in range(0, len(sentences), batch_size):
        batch_sentences = sentences[i:i+batch_size]
        inputs = tokenizer(batch_sentences, padding=True, truncation=True, return_tensors="pt")
        with torch.no_grad():
            outputs = model(**inputs)
            embeddings = outputs.last_hidden_state[:, 0, :]
            embeddings_list.append(embeddings)
    return torch.cat(embeddings_list, dim=0)

In [111]:
def preprocess_with_roberta(data, batch_size=32):
    # Split into two sentences
    data[['Sentence1', 'Sentence2']] = pd.DataFrame(data['Split_Text'].tolist(), index=data.index)
    
    # Lowercase sentences, strip whitespace
    data["Sentence1"] = data["Sentence1"].str.lower().str.strip()
    data["Sentence2"] = data["Sentence2"].str.lower().str.strip()

    # Generate RoBERTa embeddings in batches (keeping everything as tensors)
    embeddings1 = get_roberta_embeddings(data["Sentence1"].tolist(), batch_size)
    embeddings2 = get_roberta_embeddings(data["Sentence2"].tolist(), batch_size)
    
    # Save embeddings for custom metrics
    data["Embedding1"] = list(embeddings1)
    data["Embedding2"] = list(embeddings2)

    # Compute custom metrics for each row
    metrics = data.apply(compute_custom_metrics, axis=1, result_type="expand")

    # Convert metrics to tensor
    metrics_tensor = torch.tensor(metrics.values, dtype=torch.float32)

    # Standardize custom metrics
    scaler = StandardScaler()
    standardized_metrics = scaler.fit_transform(metrics_tensor.numpy())
    standardized_metrics_tensor = torch.tensor(standardized_metrics, dtype=torch.float32)

    # Combine embeddings and metrics
    features = torch.cat([
        embeddings1,
        embeddings2,
        standardized_metrics_tensor
    ], dim=1)

    # Returned processed features as tensors
    return features, data

# Tools

In [112]:
def calculate_metrics(preds, scores):
    pearson_corr, _ = pearsonr(scores, preds)
    spearman_corr, _ = spearmanr(scores, preds)
    _, _, r, _, _ = linregress(scores, preds) # probably a better way of doing this, fix later
    r2 = r**2
    mse = ((scores - preds)**2).mean() # Scikit's mean_squared_error complained about being deprecated, so this is my temp fix
    return (pearson_corr, spearman_corr, r2, mse)

In [113]:
# Make sure these match the metrics above
def display_metrics(metrics, title="Metrics:"):
    print(title)
    print("Pearson Corr:", metrics[0])
    print("Spearman Corr:", metrics[1])
    print("R^2:", metrics[2])
    print("MSE:", metrics[3])

# Load data

In [114]:
train_data = load_data("./Semantic_Relatedness_SemEval2024/Track A/eng/eng_train.csv")
train_data.head()

Unnamed: 0,PairID,Text,Score,Split_Text,Pred_Score
0,ENG-train-0000,"It that happens, just pull the plug.\nif that ...",1.0,"[It that happens, just pull the plug., if that...",0.0
1,ENG-train-0001,A black dog running through water.\nA black do...,1.0,"[A black dog running through water., A black d...",0.0
2,ENG-train-0002,I've been searchingthe entire abbey for you.\n...,1.0,"[I've been searchingthe entire abbey for you.,...",0.0
3,ENG-train-0003,If he is good looking and has a good personali...,1.0,[If he is good looking and has a good personal...,0.0
4,ENG-train-0004,"She does not hate you, she is just annoyed wit...",1.0,"[She does not hate you, she is just annoyed wi...",0.0


In [115]:
test_data = load_data("./Semantic_Relatedness_SemEval2024/Track A/eng/eng_test_with_labels.csv")
test_data.head()

Unnamed: 0,PairID,Text,Score,Split_Text,Pred_Score
0,ENG-test-0000,Egypt's Brotherhood stands ground after killin...,0.7,[Egypt's Brotherhood stands ground after killi...,0.0
1,ENG-test-0001,install it for fre and get to know what all u ...,0.71,[install it for fre and get to know what all u...,0.0
2,ENG-test-0002,"Also, it was one of the debut novels that I wa...",0.49,"[Also, it was one of the debut novels that I w...",0.0
3,ENG-test-0003,"Therefore, you can use the code BRAIL, BASIL, ...",0.27,"[Therefore, you can use the code BRAIL, BASIL,...",0.0
4,ENG-test-0004,Solid YA novel with a funky take on zombies an...,0.32,[Solid YA novel with a funky take on zombies a...,0.0


In [116]:
train_features, train_data = preprocess_with_roberta(train_data)
print(train_data.shape)
train_data.head()

(5500, 9)


Unnamed: 0,PairID,Text,Score,Split_Text,Pred_Score,Sentence1,Sentence2,Embedding1,Embedding2
0,ENG-train-0000,"It that happens, just pull the plug.\nif that ...",1.0,"[It that happens, just pull the plug., if that...",0.0,"it that happens, just pull the plug.","if that ever happens, just pull the plug.","[tensor(-0.1094), tensor(0.1345), tensor(-0.04...","[tensor(-0.1166), tensor(0.1211), tensor(-0.04..."
1,ENG-train-0001,A black dog running through water.\nA black do...,1.0,"[A black dog running through water., A black d...",0.0,a black dog running through water.,a black dog is running through some water.,"[tensor(-0.1038), tensor(0.0925), tensor(-0.00...","[tensor(-0.0920), tensor(0.0753), tensor(-0.00..."
2,ENG-train-0002,I've been searchingthe entire abbey for you.\n...,1.0,"[I've been searchingthe entire abbey for you.,...",0.0,i've been searchingthe entire abbey for you.,i'm looking for you all over the abbey.,"[tensor(-0.1287), tensor(0.0527), tensor(-0.01...","[tensor(-0.1227), tensor(0.0650), tensor(0.013..."
3,ENG-train-0003,If he is good looking and has a good personali...,1.0,[If he is good looking and has a good personal...,0.0,if he is good looking and has a good personali...,"if he's good looking, and a good personality, ...","[tensor(-0.0881), tensor(0.0848), tensor(-0.01...","[tensor(-0.1034), tensor(0.0648), tensor(-0.02..."
4,ENG-train-0004,"She does not hate you, she is just annoyed wit...",1.0,"[She does not hate you, she is just annoyed wi...",0.0,"she does not hate you, she is just annoyed wit...","she doesn't hate you, she is just annoyed.","[tensor(-0.0909), tensor(0.1275), tensor(0.007...","[tensor(-0.1069), tensor(0.1247), tensor(0.013..."


In [117]:
print(train_features.shape)
print(train_features)
torch.save(train_features, "train_features.pt")

torch.Size([5500, 1542])
tensor([[-1.0935e-01,  1.3450e-01, -4.0140e-02,  ..., -1.7812e+00,
          3.0215e+00,  2.7462e+00],
        [-1.0378e-01,  9.2506e-02, -3.3523e-03,  ..., -1.7041e+00,
          3.7620e+00,  2.9718e+00],
        [-1.2873e-01,  5.2712e-02, -1.4779e-02,  ..., -7.4082e-01,
         -6.8096e-01,  1.2792e+00],
        ...,
        [-1.2664e-01,  6.4262e-02,  4.8498e-03,  ...,  4.5370e-01,
         -8.4551e-01, -6.3912e-01],
        [-9.2601e-02,  1.0219e-01,  2.0890e-03,  ..., -5.8669e-01,
         -9.0311e-01, -8.6480e-01],
        [-9.2274e-02,  1.2332e-01, -5.6928e-03,  ..., -5.4815e-01,
         -7.7821e-03,  3.7936e-02]])


In [118]:
train_labels = torch.tensor(train_data['Score'], dtype=torch.float32)
print(train_labels.shape)
print(train_labels)
torch.save(train_labels, "train_labels.pt")

torch.Size([5500])
tensor([1., 1., 1.,  ..., 0., 0., 0.])


In [119]:
test_features, test_data = preprocess_with_roberta(test_data)
print(test_data.shape)
test_data.head()

(2600, 9)


Unnamed: 0,PairID,Text,Score,Split_Text,Pred_Score,Sentence1,Sentence2,Embedding1,Embedding2
0,ENG-test-0000,Egypt's Brotherhood stands ground after killin...,0.7,[Egypt's Brotherhood stands ground after killi...,0.0,egypt's brotherhood stands ground after killings,egypt: muslim brotherhood stands behind morsi,"[tensor(-0.0477), tensor(0.0616), tensor(0.005...","[tensor(-0.0468), tensor(0.0575), tensor(0.008..."
1,ENG-test-0001,install it for fre and get to know what all u ...,0.71,[install it for fre and get to know what all u...,0.0,install it for fre and get to know what all u ...,"install the program, which is free to download...","[tensor(-0.0327), tensor(0.0544), tensor(-0.03...","[tensor(-0.0901), tensor(0.1307), tensor(-0.04..."
2,ENG-test-0002,"Also, it was one of the debut novels that I wa...",0.49,"[Also, it was one of the debut novels that I w...",0.0,"also, it was one of the debut novels that i wa...",pretty much the first thing people mentioned w...,"[tensor(-0.1055), tensor(0.0924), tensor(0.001...","[tensor(-0.1187), tensor(0.0733), tensor(-0.01..."
3,ENG-test-0003,"Therefore, you can use the code BRAIL, BASIL, ...",0.27,"[Therefore, you can use the code BRAIL, BASIL,...",0.0,"therefore, you can use the code brail, basil, ...",you can watch the wiggles every day on nick jr.,"[tensor(-0.0907), tensor(0.1206), tensor(-0.03...","[tensor(-0.0965), tensor(0.0422), tensor(0.012..."
4,ENG-test-0004,Solid YA novel with a funky take on zombies an...,0.32,[Solid YA novel with a funky take on zombies a...,0.0,solid ya novel with a funky take on zombies an...,my 13-year-old son recommended this book to me...,"[tensor(-0.1368), tensor(0.0987), tensor(-0.00...","[tensor(-0.0597), tensor(0.0636), tensor(-0.06..."


In [120]:
print(test_features.shape)
print(test_features)
torch.save(test_features, "test_features.pt")

torch.Size([2600, 1542])
tensor([[-4.7668e-02,  6.1642e-02,  5.0457e-03,  ..., -8.5190e-01,
         -1.4553e-01, -5.6737e-02],
        [-3.2685e-02,  5.4365e-02, -3.8078e-02,  ...,  4.3447e-01,
          3.7257e-01,  3.4154e-02],
        [-1.0550e-01,  9.2436e-02,  1.2523e-03,  ...,  6.6836e-01,
         -9.1528e-01, -6.9298e-01],
        ...,
        [-1.4587e-01,  1.0338e-01,  1.2481e-02,  ..., -6.1801e-01,
         -1.4926e+00, -1.1474e+00],
        [-1.1664e-01,  8.5891e-02,  1.1200e-02,  ..., -1.1126e-01,
         -1.4926e+00, -1.5110e+00],
        [-1.0706e-01,  9.1530e-02,  1.7318e-03,  ...,  8.6326e-01,
         -7.7944e-01, -6.9298e-01]])


In [121]:
test_labels = torch.tensor(test_data['Score'], dtype=torch.float32)
print(test_labels.shape)
print(test_labels)
torch.save(test_labels, "test_labels.pt")

torch.Size([2600])
tensor([0.7000, 0.7100, 0.4900,  ..., 0.4500, 0.4500, 0.2200])


# Model

In [122]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import numpy as np
from torch.utils.data import TensorDataset, random_split, DataLoader
from transformers import RobertaTokenizer, RobertaForSequenceClassification, Trainer, TrainingArguments

In [123]:
# Update data for torch usage
X_train = train_features
y_train = train_labels
X_test = test_features
y_test = test_labels
print("X_train:", X_train.shape)
print("y_train:", y_train.shape)
print("X_test:", X_test.shape)
print("y_test:", y_test.shape)

X_train: torch.Size([5500, 1542])
y_train: torch.Size([5500])
X_test: torch.Size([2600, 1542])
y_test: torch.Size([2600])


In [124]:
# Needed assistance with getting a differentiable spearman correlation for loss function
# https://forum.numer.ai/t/differentiable-spearman-in-pytorch-optimize-for-corr-directly/2287/26
import torchsort

def corrcoef(target, pred):
    pred_n = pred - pred.mean()
    target_n = target - target.mean()
    pred_n = pred_n / pred_n.norm()
    target_n = target_n / target_n.norm()
    return (pred_n * target_n).sum()

def spearman_loss(pred, target, x=1e-2):
    pred = torchsort.soft_rank(pred.reshape(1,-1),regularization_strength=x)
    target = torchsort.soft_rank(target.reshape(1,-1),regularization_strength=x)
    pred = pred - pred.mean()
    pred = pred / pred.norm()
    target = target - target.mean()
    target = target / target.norm()
    return 1 - (pred * target).sum()

In [125]:
class Model(nn.Module):
    def __init__(self, input_size=1539, hidden_size=128, num_layers=1):
        super(Model, self).__init__()
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers=num_layers, 
                            batch_first=True, bidirectional=True)
        self.dropout = nn.Dropout(p=0.3)
        self.fc = nn.Linear(hidden_size * 2, 1)

    def forward(self, x):
        x = x.unsqueeze(1)
        output, _ = self.lstm(x)
        output = self.dropout(output)
        output = self.fc(output[:, -1, :])
        return output
    
    def split(self, X, y, s = 0.8):
        dataset = TensorDataset(torch.tensor(X, dtype=torch.float32),
                        torch.tensor(y, dtype=torch.float32))
        train_size = int(s * len(dataset))
        val_size = len(dataset) - train_size
        train_dataset, val_dataset = random_split(dataset, [train_size, val_size])
        
        X_train, y_train = zip(*train_dataset)
        X_train = torch.stack(X_train)
        y_train = torch.stack(y_train)

        X_val, y_val = zip(*val_dataset)
        X_val = torch.stack(X_val)
        y_val = torch.stack(y_val)
        
        return X_train, y_train, X_val, y_val
    
    def fit(self, X, y, num_epochs=20, lr=1e-4, weight_decay=1e-4):
        X_train, y_train, X_val, y_val = self.split(X, y)
        optimizer = optim.Adam(self.parameters(), lr=lr, weight_decay=weight_decay)
        best_val_loss = float('inf')
        for epoch in range(num_epochs):
            self.train()
            optimizer.zero_grad()
            y_pred = self(X_train)
            loss = spearman_loss(y_train, y_pred)
            loss.backward()
            optimizer.step()

            self.eval()
            with torch.no_grad():
                val_pred = self(X_val)
                val_loss = spearman_loss(y_val, val_pred).item()

            if epoch % (num_epochs // 10) == 0 or epoch == num_epochs - 1:
                print(f"Epoch {epoch+1}/{num_epochs}, Spearman Loss: {loss.item():.4f}, Val Loss: {val_loss:.4f}")

            if val_loss < best_val_loss:
                best_val_loss = val_loss
                torch.save(self.state_dict(), './best_model.pth')
                
    def predict(self, x):
        self.eval()
        with torch.no_grad():
            return self(x)

In [126]:
class Transformation(nn.Module):
    def __init__(self):
        super(Transformation, self).__init__()
        self.scale = nn.Parameter(torch.ones(1))
        self.shift = nn.Parameter(torch.zeros(1))

    def forward(self, x):
        if x.ndim == 1:
            x = x.unsqueeze(-1)
        return x * self.scale + self.shift
    
    def fit(self, X_train, y_train, num_epochs=1000, lr=0.01):
        optimizer = optim.Adam(self.parameters(), lr)
        for epoch in range(num_epochs):
            optimizer.zero_grad()
            y_pred = self(X_train)
            loss = nn.functional.mse_loss(y_train.squeeze(), y_pred.squeeze())
            loss.backward()
            optimizer.step()
            if epoch % (num_epochs // 10) == 0 or epoch == num_epochs - 1:
                print(f"Epoch {epoch+1}/{num_epochs}, MSE Loss: {loss.item():.4f}")
                
    def transform(self, x):
        self.eval()
        with torch.no_grad():
            return self(x)
    

In [127]:
X = X_train
y = y_train
model = Model(input_size=X_train.shape[1], hidden_size=128, num_layers=2)
model.fit(X, y, 200, 0.1, 0.0001)

raw_pred = model.predict(X)
print()

trans = Transformation()
trans.fit(raw_pred, y)


  dataset = TensorDataset(torch.tensor(X, dtype=torch.float32),
  torch.tensor(y, dtype=torch.float32))


Epoch 1/200, Spearman Loss: 1.0387, Val Loss: 0.6414
Epoch 21/200, Spearman Loss: 0.3782, Val Loss: 0.3344
Epoch 41/200, Spearman Loss: 0.3725, Val Loss: 0.3052
Epoch 61/200, Spearman Loss: 0.3382, Val Loss: 0.2998
Epoch 81/200, Spearman Loss: 0.2714, Val Loss: 0.2803
Epoch 101/200, Spearman Loss: 0.2755, Val Loss: 0.2869
Epoch 121/200, Spearman Loss: 0.2520, Val Loss: 0.2866
Epoch 141/200, Spearman Loss: 0.2291, Val Loss: 0.2793
Epoch 161/200, Spearman Loss: 0.2296, Val Loss: 0.2827
Epoch 181/200, Spearman Loss: 0.2792, Val Loss: 0.2911
Epoch 200/200, Spearman Loss: 0.2269, Val Loss: 0.2746

Epoch 1/1000, MSE Loss: 44.1342
Epoch 101/1000, MSE Loss: 2.2861
Epoch 201/1000, MSE Loss: 0.0818
Epoch 301/1000, MSE Loss: 0.0196
Epoch 401/1000, MSE Loss: 0.0190
Epoch 501/1000, MSE Loss: 0.0190
Epoch 601/1000, MSE Loss: 0.0190
Epoch 701/1000, MSE Loss: 0.0190
Epoch 801/1000, MSE Loss: 0.0190
Epoch 901/1000, MSE Loss: 0.0190
Epoch 1000/1000, MSE Loss: 0.0190


# Evaluate Model

In [132]:
train_preds = model.predict(X_train)
test_preds = model.predict(X_test)

train_preds_np = np.array(train_preds).flatten()
train_y_np = np.array(y_train).flatten()
test_preds_np = np.array(test_preds).flatten()
test_y_np = np.array(y_test).flatten()

train_metrics = calculate_metrics(train_y_np, train_preds_np)
test_metrics = calculate_metrics(test_y_np, test_preds_np)
full_metrics = calculate_metrics(np.concatenate((train_y_np, test_y_np), axis=0),
                                 np.concatenate((train_preds_np, test_preds_np), axis=0))

display_metrics(train_metrics, "Training Metrics:")
print()
display_metrics(test_metrics, "Testing Metrics:")
print()
display_metrics(full_metrics, "Full data Metrics:")

Training Metrics:
Pearson Corr: 0.7828682234700208
Spearman Corr: 0.77112076735343
R^2: 0.6128826505847913
MSE: 44.134193

Testing Metrics:
Pearson Corr: 0.7005200838752899
Spearman Corr: 0.6931057695369068
R^2: 0.4907283319260672
MSE: 45.105404

Full data Metrics:
Pearson Corr: 0.7509701610619879
Spearman Corr: 0.7439210166020174
R^2: 0.5639562351164263
MSE: 44.445934


In [129]:
print("Pred vs True for training data")
for i in range(10):
    print(f"{train_preds_np[i]:.4f}, {train_labels[i]:.4f}")
print()
print("Pred vs True for testing data")
for i in range(10):
    print(f"{test_preds_np[i]:.4f}, {test_labels[i]:.4f}")

Pred vs True for training data
18.0153, 1.0000
13.4034, 1.0000
12.8838, 1.0000
15.3110, 1.0000
15.8661, 1.0000
13.2623, 1.0000
16.9125, 1.0000
13.0393, 1.0000
17.2727, 1.0000
17.6072, 1.0000

Pred vs True for testing data
6.2189, 0.7000
2.2515, 0.7100
3.6112, 0.4900
-3.2972, 0.2700
-4.6490, 0.3200
2.4305, 0.4300
-4.8544, 0.3100
-4.2192, 0.3200
0.8545, 0.7700
-6.9278, 0.3400


In [130]:
train_preds_trans = trans.transform(train_preds)
test_preds_trans = trans.transform(test_preds)

train_preds_trans_np = np.array(train_preds_trans).flatten()
train_y_trans_np = np.array(y_train).flatten()
test_preds_trans_np = np.array(test_preds_trans).flatten()
test_y_trans_np = np.array(y_test).flatten()

train_metrics_trans = calculate_metrics(train_y_trans_np, train_preds_trans_np)
test_metrics_trans = calculate_metrics(test_y_trans_np, test_preds_trans_np)

display_metrics(train_metrics_trans, "Training Metrics:")
print()
display_metrics(test_metrics_trans, "Testing Metrics:")

Training Metrics:
Pearson Corr: 0.782868201051803
Spearman Corr: 0.77112076735343
R^2: 0.6128826510301096
MSE: 0.01895909

Testing Metrics:
Pearson Corr: 0.7005201116574824
Spearman Corr: 0.6931057695369068
R^2: 0.4907283305629485
MSE: 0.019378621


In [131]:
print("Pred vs True for training data")
for i in range(10):
    print(f"{train_preds_trans_np[i]:.4f}, {train_labels[i]:.4f}")
print()
print("Pred vs True for testing data")
for i in range(10):
    print(f"{test_preds_trans_np[i]:.4f}, {test_labels[i]:.4f}")

Pred vs True for training data
0.9180, 1.0000
0.7978, 1.0000
0.7842, 1.0000
0.8475, 1.0000
0.8620, 1.0000
0.7941, 1.0000
0.8892, 1.0000
0.7883, 1.0000
0.8986, 1.0000
0.9073, 1.0000

Pred vs True for testing data
0.6105, 0.7000
0.5071, 0.7100
0.5426, 0.4900
0.3625, 0.2700
0.3273, 0.3200
0.5118, 0.4300
0.3219, 0.3100
0.3385, 0.3200
0.4707, 0.7700
0.2679, 0.3400
