1. Load the CSV file into a DataFrame.
2. Parse the "Radiomics" column, as it contains JSON data.
3. Remove columns with the same values across all rows.

In [39]:
import pandas as pd
import json
from sklearn.model_selection import train_test_split
import os

# create random seed for reproducibility
ran_seed = 42

# Load the data from DF_Radiomics_noduls_with_diagnose.csv
file_path = "DF_Radiomics_noduls_with_diagnose.csv"
data = pd.read_csv(file_path)


# Convert the 'Labels' column to an integer
data['Labels'] = data['Labels'].astype(int)

# drop all rows where the label == 0
data = data[data.Labels != 0]

# Parse the JSON in the 'Radiomics' column
data['Radiomics'] = data['Radiomics'].apply(json.loads)

# Convert the 'Radiomics' column into separate columns
radiomics_data = pd.json_normalize(data['Radiomics'])


# Drop the original 'Radiomics' column
data = data.drop('Radiomics', axis=1)


# Reset the indices of both DataFrames
data = data.reset_index(drop=True)
radiomics_data = radiomics_data.reset_index(drop=True)

# Combine the data with the new radiomics columns
data = pd.concat([data, radiomics_data], axis=1)

# Remove columns with the same value across all rows
data = data.loc[:, (data != data.iloc[0]).any()]

#remove columns with all NaN values
data = data.dropna(axis=1, how='all')

print(data.shape)


(309, 103)


In [40]:
data.head()

Unnamed: 0,Patient,Node,Labels,diagnostics_Image-original_Hash,diagnostics_Image-original_Spacing,diagnostics_Image-original_Size,diagnostics_Image-original_Mean,diagnostics_Image-original_Minimum,diagnostics_Image-original_Maximum,diagnostics_Mask-original_Hash,...,original_gldm_GrayLevelNonUniformity,original_gldm_GrayLevelVariance,original_gldm_HighGrayLevelEmphasis,original_gldm_LargeDependenceEmphasis,original_gldm_LargeDependenceHighGrayLevelEmphasis,original_gldm_LargeDependenceLowGrayLevelEmphasis,original_gldm_LowGrayLevelEmphasis,original_gldm_SmallDependenceEmphasis,original_gldm_SmallDependenceHighGrayLevelEmphasis,original_gldm_SmallDependenceLowGrayLevelEmphasis
0,LIDC-IDRI-0068,Node_N1,3,bea2c9750ea59a0bebb6d3bd63ffacc40fcf6a28,"[0.683594, 0.683594, 1.25]","[512, 512, 261]",-1026.065264,-3024.0,3071.0,0506d1d0d6522eddd1640c8ea75c2fc5a9266270,...,7.355556,60.706173,469.644444,23.444444,16578.377778,0.053875,0.021012,0.488461,152.929922,0.019809
1,LIDC-IDRI-0068,Node_N1,3,bea2c9750ea59a0bebb6d3bd63ffacc40fcf6a28,"[0.683594, 0.683594, 1.25]","[512, 512, 261]",-1026.065264,-3024.0,3071.0,9d7da356d43e2f7ad7f374f6c193e97f6088d7c7,...,7.467153,72.801002,471.051095,17.49635,13573.328467,0.11065,0.024328,0.494688,165.356306,0.010062
2,LIDC-IDRI-0068,Node_N1,3,bea2c9750ea59a0bebb6d3bd63ffacc40fcf6a28,"[0.683594, 0.683594, 1.25]","[512, 512, 261]",-1026.065264,-3024.0,3071.0,c0a43747a23d26b107e21614525f2fd8870ffefc,...,7.685185,43.527006,277.787037,20.37037,9310.490741,0.084481,0.031811,0.463956,84.174037,0.027819
3,LIDC-IDRI-0068,Node_N1,3,bea2c9750ea59a0bebb6d3bd63ffacc40fcf6a28,"[0.683594, 0.683594, 1.25]","[512, 512, 261]",-1026.065264,-3024.0,3071.0,72a09dc3f5d5d146b13402b8ef109422cc3f38a5,...,6.78022,35.367709,229.21978,18.78022,7065.923077,0.084783,0.026368,0.465301,67.725183,0.021973
4,LIDC-IDRI-0072,Node_N1,1,54705f26f9320581c90452445aa820fe9630d5e9,"[0.732422, 0.732422, 1.25]","[512, 512, 305]",-871.93633,-3024.0,3071.0,05efcefff38c73903c3d7839bb987a49176f6068,...,629.334146,45.147393,1253.131545,28.918031,43475.541623,0.020967,0.001319,0.262518,254.476429,0.000632


In [41]:
#remove hash columns
data = data.drop(['diagnostics_Image-original_Hash', 'diagnostics_Mask-original_Hash'], axis=1)

# ok looks like all the objeckt columns except of "Patient" & "Node" are in this form [0.683594, 0.683594, 1.25] which is a list of multiple floats
# exploade them into multiple columns

object_columns = data.select_dtypes(include=['object']).columns.tolist()

# Remove 'Patient' and 'Node' from the list
object_columns.remove('Patient')
object_columns.remove('Node')

# Explode the lists in each object column into multiple columns
for column in object_columns:
    # Convert each list to a Series and expand it into multiple columns
    expanded_columns = data[column].apply(pd.Series)
    
    # Rename the expanded columns to have the original column name as a prefix
    expanded_columns = expanded_columns.rename(columns=lambda x: f"{column}_{x}")
    
    # Drop the original column from the DataFrame
    data = data.drop(column, axis=1)
    
    # Concatenate the expanded columns to the DataFrame
    data = pd.concat([data, expanded_columns], axis=1)

In [42]:
# Create a stratified split
train_data, test_data = train_test_split(data, test_size=0.2, stratify=data['Labels'], random_state=ran_seed)

# if the files already exist, skip this step
if os.path.isfile('DF_Radiomics_noduls_with_diagnose_train_data.csv') and os.path.isfile('DF_Radiomics_noduls_with_diagnose_test_data.csv'):
    print("Files already exist, skipping this step")
else:
    # Save the data to CSV files
    train_data.to_csv('DF_Radiomics_noduls_with_diagnose_train_data.csv', index=False)
    test_data.to_csv('DF_Radiomics_noduls_with_diagnose_test_data.csv', index=False)

Files already exist, skipping this step


In [43]:
print("Train data:", train_data.shape)
print("Test data:", test_data.shape)

Train data: (247, 118)
Test data: (62, 118)


In [44]:
train_data.head()

Unnamed: 0,Patient,Node,Labels,diagnostics_Image-original_Mean,diagnostics_Image-original_Minimum,diagnostics_Image-original_Maximum,diagnostics_Mask-original_VoxelNum,diagnostics_Mask-original_VolumeNum,original_firstorder_10Percentile,original_firstorder_90Percentile,...,diagnostics_Mask-original_BoundingBox_2,diagnostics_Mask-original_BoundingBox_3,diagnostics_Mask-original_BoundingBox_4,diagnostics_Mask-original_BoundingBox_5,diagnostics_Mask-original_CenterOfMassIndex_0,diagnostics_Mask-original_CenterOfMassIndex_1,diagnostics_Mask-original_CenterOfMassIndex_2,diagnostics_Mask-original_CenterOfMass_0,diagnostics_Mask-original_CenterOfMass_1,diagnostics_Mask-original_CenterOfMass_2
23,LIDC-IDRI-0137,Node_N1,3,-671.885608,-2048.0,3071.0,26,1,175.5,850.5,...,30,4,6,2,332.692308,389.538462,30.307692,53.215868,83.626926,-321.730769
263,LIDC-IDRI-0377,Node_N1,2,-882.321409,-3024.0,3071.0,2402,1,-307.0,61.0,...,169,29,24,9,382.402998,308.854288,173.03955,92.739302,28.898399,-68.460564
44,LIDC-IDRI-0167,Node_N1,1,-664.766231,-2048.0,3071.0,56,1,-444.5,-66.5,...,50,6,9,2,70.267857,174.964286,50.321429,-136.23778,-53.812866,-234.696429
219,LIDC-IDRI-0272,Node_N1,3,-824.358062,-2048.0,3071.0,51,1,-447.0,102.0,...,81,6,7,2,209.313725,390.941176,81.568627,-47.673652,80.722794,-109.078431
143,LIDC-IDRI-0234,Node_N1,1,-708.012378,-2048.0,3029.0,251,1,-569.0,82.0,...,41,11,14,3,367.756972,310.848606,41.689243,65.179121,43.765426,-236.276892


Scaling the data

In [45]:
# if DF_Radiomics_noduls_with_diagnose_train_data_scaled.csv and DF_Radiomics_noduls_with_diagnose_test_data_scaled.csv already exist, skip this step
# otherwise scale the data and save it to CSV files
if os.path.isfile('DF_Radiomics_noduls_with_diagnose_train_data_scaled.csv') and os.path.isfile('DF_Radiomics_noduls_with_diagnose_test_data_scaled.csv'):
    print("Scaled data already exists")
else:
    from sklearn.preprocessing import StandardScaler

    # Get all column names
    all_columns = train_data.columns.tolist()

    # Exclude the first three columns
    features = all_columns[3:]

    # Create a stratified split
    train_data, test_data = train_test_split(data, test_size=0.2, stratify=data['Labels'])

    # Create a scaler
    scaler = StandardScaler()

    # Fit the scaler on the training data and transform both training and test data
    train_data[features] = scaler.fit_transform(train_data[features])
    test_data[features] = scaler.transform(test_data[features])

    # Save the data to CSV files
    train_data.to_csv('DF_Radiomics_noduls_with_diagnose_train_data_scaled.csv', index=False)
    test_data.to_csv('DF_Radiomics_noduls_with_diagnose_test_data_scaled.csv', index=False)


Scaled data already exists


# Fully Connected Neural Network

In [46]:
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from sklearn.model_selection import train_test_split

# Assuming 'data' is your pandas DataFrame
# Ensure the DataFrame only contains numeric values
data = pd.read_csv('DF_Radiomics_noduls_with_diagnose_train_data_scaled.csv')
#drop patient and node columns
data = data.drop(['Patient', 'Node'], axis=1)
# TODO maybe add the columns later to see if it helps

Trainset

In [47]:
# Split data into features and labels
X = data.drop('Labels', axis=1).values
y = data['Labels'].values

# Convert to PyTorch tensors
X_tensor = torch.tensor(X).float()
y_tensor = torch.tensor(y).float()

# Stratified split
X_train, X_test, y_train, y_test = train_test_split(X_tensor, y_tensor, test_size=0.2, stratify=y_tensor, random_state=ran_seed)

# Create TensorDatasets
train_dataset = TensorDataset(X_train, y_train)
test_dataset = TensorDataset(X_test, y_test)

In [48]:
print("Train data:", train_dataset.tensors[0].shape)

Train data: torch.Size([197, 115])


In [49]:
# Model
class FCNN(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(FCNN, self).__init__()
        self.net = nn.Sequential(
            nn.Linear(input_size, hidden_size),
            nn.ReLU(),
            nn.Linear(hidden_size, hidden_size),
            nn.ReLU(),
            nn.Dropout(0.5),  # Dropout for regularization
            nn.Linear(hidden_size, hidden_size*3),
            nn.ReLU(),
            nn.Dropout(0.5),  # Dropout for regularization
            nn.Linear(hidden_size*3, hidden_size),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(hidden_size, output_size)
        )

    def forward(self, x):
        return self.net(x)

# Hyperparameters
input_size = train_dataset.tensors[0].shape[1]  # Get the number of features from your dataset
hidden_size = input_size*2  # You can tune this
output_size = 4   # 3 labels 
learning_rate = 0.001
batch_size = 32
epochs = 50  # Adjust based on your runtime requirement
early_stopping_factor = 10
clip_value = 1  # for gradient clipping

# Initialize model, loss function, and optimizer
model = FCNN(input_size, hidden_size, output_size).cuda()
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)
#optimizer = optim.Adam(model.parameters(), lr=learning_rate, weight_decay=1e-5)  # L2 regularization

# DataLoader
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

In [50]:
#check if cuda is available, print the gpu model name
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
if device.type == "cuda":
    print(torch.cuda.get_device_name(0))
    # Move model to the device
    model = model.to(device)

NVIDIA GeForce RTX 3090


In [51]:
# Initialize best loss to infinity for comparison in the first epoch
best_loss = float('inf')

# Patience counter
patience_counter = 0

# Patience limit
patience_limit = 5

# Training loop
model.train()
for epoch in range(epochs):
    epoch_loss = 0
    for inputs, targets in train_loader:
    
        # Move inputs and targets to the device
        inputs = inputs.to(device)
        targets = targets.to(device)
        
        targets = targets.long()

        # Zero the gradients
        optimizer.zero_grad()

        # Forward pass
        outputs = model(inputs)

        # Compute loss
        loss = criterion(outputs, targets)

        # Backward pass and optimize
        loss.backward()
        optimizer.step()

        epoch_loss += loss.item()

    # Average epoch loss
    epoch_loss = epoch_loss / len(train_loader)

    # If the training loss has improved, save the model and reset the patience counter
    if epoch_loss < best_loss:
        best_loss = epoch_loss
        patience_counter = 0
        torch.save(model.state_dict(), 'best_model.pth')
    else:
        # If the training loss has not improved, increment the patience counter
        patience_counter += 1
        if patience_counter >= patience_limit:
            print(f"Early stopping at epoch {epoch+1}/{epochs}, best loss: {best_loss}")
            break

    print(f"Epoch {epoch+1}/{epochs}, Loss: {epoch_loss}")

# Load the best model
model.load_state_dict(torch.load('best_model.pth'))

Epoch 1/50, Loss: 1.2985712460109167
Epoch 2/50, Loss: 1.1078111699649267


Epoch 3/50, Loss: 1.0728304641587394
Epoch 4/50, Loss: 0.959037286894662
Epoch 5/50, Loss: 0.9829844576971871
Epoch 6/50, Loss: 0.8095152718680245
Epoch 7/50, Loss: 0.7562763776097979
Epoch 8/50, Loss: 0.6779869198799133
Epoch 9/50, Loss: 0.6688331280435834
Epoch 10/50, Loss: 0.5295133377824511
Epoch 11/50, Loss: 0.45872377497809275
Epoch 12/50, Loss: 0.4910708112376077
Epoch 13/50, Loss: 0.39245015595640453
Epoch 14/50, Loss: 0.38358654507568907
Epoch 15/50, Loss: 0.29226075964314596
Epoch 16/50, Loss: 0.34072279717241016
Epoch 17/50, Loss: 0.3163018726876804
Epoch 18/50, Loss: 0.253993920981884
Epoch 19/50, Loss: 0.18849473127296992
Epoch 20/50, Loss: 0.15335129094975336
Epoch 21/50, Loss: 0.12616935106260435
Epoch 22/50, Loss: 0.08016080914863519
Epoch 23/50, Loss: 0.09280573257378169
Epoch 24/50, Loss: 0.1649495502434937
Epoch 25/50, Loss: 0.1516228589628424
Epoch 26/50, Loss: 0.1562061312475375
Epoch 27/50, Loss: 0.05456381419207901
Epoch 28/50, Loss: 0.07761004833238465
Epoch 29/

<All keys matched successfully>

# Evaluate

Since Confusion Matrix is 3x3 calculate Sensitivity & Specificity for each class by considering that class as the positive class and the other two as the negative class.

Sensitivity, also known as the true positive rate (TPR), measures the proportion of actual positives that are correctly identified as such. In other words, it measures the ability of the model to correctly identify positive instances.

Specificity, on the other hand, measures the proportion of actual negatives that are correctly identified as such. It measures the ability of the model to correctly identify negative instances.

The false positive rate (FPR) is the complement of specificity. It measures the proportion of actual negatives that are incorrectly identified as positives. In other words, it measures the rate at which the model makes false alarms.

Here's how they relate:

- TPR = Sensitivity = TP / (TP + FN)
- FPR = 1 - Specificity = FP / (FP + TN)
- Specificity = TN / (TN + FP)

Where:
- TP = True Positives
- FN = False Negatives
- FP = False Positives
- TN = True Negatives

In [52]:
from sklearn.metrics import confusion_matrix
import numpy as np

# Evaluation function
def evaluate(model, data_loader, device):
    model.eval()  # Set the model to evaluation mode
    correct_predictions = 0
    total_predictions = 0
    all_targets = []
    all_predictions = []

    with torch.no_grad():  # Disable gradient calculations
        for inputs, targets in data_loader:
            inputs = inputs.to(device)
            targets = targets.to(device)

            outputs = model(inputs)
            _, predicted = torch.max(outputs.data, 1)  # Get the index of the max log-probability

            total_predictions += targets.size(0)
            correct_predictions += (predicted == targets).sum().item()

            all_targets.extend(targets.cpu().numpy())
            all_predictions.extend(predicted.cpu().numpy())

    accuracy = correct_predictions / total_predictions

    # Calculate confusion matrix
    cm = confusion_matrix(all_targets, all_predictions)

    # Calculate sensitivity and specificity for each class
    sensitivity = np.diag(cm) / np.sum(cm, axis = 1)
    specificity = (np.sum(cm) - np.sum(cm, axis = 0) - np.sum(cm, axis = 1) + np.diag(cm)) / (np.sum(cm) - np.sum(cm, axis = 0))

    return accuracy, sensitivity, specificity

## Testset

In [53]:
# Use the function
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)
accuracy, sensitivity, specificity = evaluate(model, test_loader, device)
print(f'Accuracy: {accuracy * 100}%')
for i, (sens, spec) in enumerate(zip(sensitivity, specificity)):
    print(f'Class {i}: Sensitivity: {sens * 100}%, Specificity: {spec * 100}%')

Accuracy: 80.0%
Class 0: Sensitivity: 87.5%, Specificity: 93.93939393939394%
Class 1: Sensitivity: 61.53846153846154%, Specificity: 87.5%
Class 2: Sensitivity: 85.71428571428571%, Specificity: 88.88888888888889%


## Validation Set

In [54]:
validation_data = pd.read_csv('DF_Radiomics_noduls_with_diagnose_test_data_scaled.csv')

#create the tensor dataset
X_test = validation_data.drop(['Patient', 'Node', 'Labels'], axis=1).values
y_test = validation_data['Labels'].values
validation_dataset = TensorDataset(torch.tensor(X_test).float(), torch.tensor(y_test).float())

# Use the function
validation_loader = DataLoader(validation_dataset, batch_size=batch_size, shuffle=False)
accuracy, sensitivity, specificity = evaluate(model, validation_loader, device)
print(f'Accuracy: {accuracy * 100}%')
for i, (sens, spec) in enumerate(zip(sensitivity, specificity)):
    print(f'Class {i}: Sensitivity: {sens * 100}%, Specificity: {spec * 100}%')

Accuracy: 77.41935483870968%
Class 0: Sensitivity: 95.0%, Specificity: 96.96969696969697%
Class 1: Sensitivity: 75.0%, Specificity: 91.83673469387756%
Class 2: Sensitivity: 65.38461538461539%, Specificity: 78.57142857142857%


# Gridsearch

In [55]:
# Hyperparameters
input_size = train_dataset.tensors[0].shape[1]  # Get the number of features from your dataset
output_size = 4   # 3 labels 
batch_size = 32
epochs = 50  # Adjust based on your runtime requirement
early_stopping_factor = 10
clip_value = 1  # for gradient clipping

# Hyperparameters to tune
hidden_sizes = [input_size*3, input_size*4, input_size*5]
learning_rates = [0.0001, 0.001, 0.01]
weight_decays = [0, 1e-5, 1e-4]  # L2 regularization

# Load validation data
validation_data = pd.read_csv('DF_Radiomics_noduls_with_diagnose_test_data_scaled.csv')

# Create the tensor dataset
X_test = validation_data.drop(['Patient', 'Node', 'Labels'], axis=1).values
y_test = validation_data['Labels'].values
validation_dataset = TensorDataset(torch.tensor(X_test).float(), torch.tensor(y_test).float())

# Use the function
validation_loader = DataLoader(validation_dataset, batch_size=batch_size, shuffle=False)

# Initialize DataFrame to store results
results = pd.DataFrame(columns=["epoch", "epoch_loss", "best_loss", "hidden_size", "learning_rate", "weight_decay", "accuracy", "sensitivity", "specificity"])

# Grid search
for hidden_size in hidden_sizes:
    for learning_rate in learning_rates:
        for weight_decay in weight_decays:
            # Initialize model, loss function, and optimizer
            model = FCNN(input_size, hidden_size, output_size).cuda()
            criterion = nn.CrossEntropyLoss()
            optimizer = optim.Adam(model.parameters(), lr=learning_rate, weight_decay=weight_decay)

            # DataLoader
            train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
            
                        # Initialize best loss to infinity for comparison in the first epoch
            best_loss = float('inf')

            # Patience counter
            patience_counter = 0

            # Patience limit
            patience_limit = 5
            
            # Training loop (as before)...
            model.train()
            for epoch in range(epochs):
                epoch_loss = 0
                for inputs, targets in train_loader:
                
                    # Move inputs and targets to the device
                    inputs = inputs.to(device)
                    targets = targets.to(device)
                    
                    targets = targets.long()

                    # Zero the gradients
                    optimizer.zero_grad()

                    # Forward pass
                    outputs = model(inputs)

                    # Compute loss
                    loss = criterion(outputs, targets)

                    # Backward pass and optimize
                    loss.backward()
                    optimizer.step()

                    epoch_loss += loss.item()

                # Average epoch loss
                epoch_loss = epoch_loss / len(train_loader)

                # If the training loss has improved, save the model and reset the patience counter
                if epoch_loss < best_loss:
                    best_loss = epoch_loss
                    patience_counter = 0
                    #torch.save(model.state_dict(), 'best_model.pth')
                    #change to realtive path according to the hidden size, learning rate and weight decay
                    torch.save(model.state_dict(), f'../models/best_model_{hidden_size}_{str(learning_rate).replace(".", "_")}_{weight_decay}.pth')

                    
                else:
                    # If the training loss has not improved, increment the patience counter
                    patience_counter += 1
                    if patience_counter >= patience_limit:
                        print(f"Early stopping at epoch {epoch+1}/{epochs}, best loss: {best_loss}")
                        break

                print(f"Epoch {epoch+1}/{epochs}, Loss: {epoch_loss}")

            # Load the best model
            model.load_state_dict(torch.load(f'../models/best_model_{hidden_size}_{str(learning_rate).replace(".", "_")}_{weight_decay}.pth'))

            # Evaluate the model
            accuracy, sensitivity, specificity = evaluate(model, validation_loader, device)

            # Write results to DataFrame
            # Check if results is a DataFrame with concat
            results = pd.concat([results, pd.DataFrame([[epoch, epoch_loss, best_loss, hidden_size, learning_rate, weight_decay, accuracy, sensitivity, specificity]], columns=results.columns)])


# Print the results
print(results)

Epoch 1/50, Loss: 1.3823916230882918


Epoch 2/50, Loss: 1.3525687456130981
Epoch 3/50, Loss: 1.319555129323687
Epoch 4/50, Loss: 1.293316398348127
Epoch 5/50, Loss: 1.2495623145784651
Epoch 6/50, Loss: 1.1964195626122611
Epoch 7/50, Loss: 1.170014466558184
Epoch 8/50, Loss: 1.1464959723608834
Epoch 9/50, Loss: 1.1096115963799613
Epoch 10/50, Loss: 1.0855580568313599
Epoch 11/50, Loss: 1.0560859186308724
Epoch 12/50, Loss: 1.017404317855835
Epoch 13/50, Loss: 0.9935423816953387
Epoch 14/50, Loss: 0.9936107822826931
Epoch 15/50, Loss: 0.9784697634833199
Epoch 16/50, Loss: 0.9678853494780404
Epoch 17/50, Loss: 0.9208238550594875
Epoch 18/50, Loss: 0.8790452310017177
Epoch 19/50, Loss: 0.9471218585968018
Epoch 20/50, Loss: 0.8959122385297503
Epoch 21/50, Loss: 0.8787334987095424
Epoch 22/50, Loss: 0.8780438133648464
Epoch 23/50, Loss: 0.7965034416743687
Epoch 24/50, Loss: 0.8067137598991394
Epoch 25/50, Loss: 0.7516182150159564
Epoch 26/50, Loss: 0.7507141743387494
Epoch 27/50, Loss: 0.7148684859275818
Epoch 28/50, Loss: 0.73

  results = pd.concat([results, pd.DataFrame([[epoch, epoch_loss, best_loss, hidden_size, learning_rate, weight_decay, accuracy, sensitivity, specificity]], columns=results.columns)])


Epoch 11/50, Loss: 1.0468641860144479
Epoch 12/50, Loss: 1.0722462620053972
Epoch 13/50, Loss: 1.019565028803689
Epoch 14/50, Loss: 1.0075282113892692
Epoch 15/50, Loss: 0.9623549836022514
Epoch 16/50, Loss: 0.9993742789540973
Epoch 17/50, Loss: 0.9398404444966998
Epoch 18/50, Loss: 0.9595872589520046
Epoch 19/50, Loss: 0.8555400967597961
Epoch 20/50, Loss: 0.8277589508465358
Epoch 21/50, Loss: 0.8686086876051766
Epoch 22/50, Loss: 0.8317681806428092
Epoch 23/50, Loss: 0.7823511958122253
Epoch 24/50, Loss: 0.804394509111132
Epoch 25/50, Loss: 0.7673937678337097
Epoch 26/50, Loss: 0.7206492338861737
Epoch 27/50, Loss: 0.7433012127876282
Epoch 28/50, Loss: 0.6779493859836033
Epoch 29/50, Loss: 0.7288133757455009
Epoch 30/50, Loss: 0.6762949909482684
Epoch 31/50, Loss: 0.6449400356837681
Epoch 32/50, Loss: 0.5943441433565957
Epoch 33/50, Loss: 0.6120719909667969
Epoch 34/50, Loss: 0.6483256646565029
Epoch 35/50, Loss: 0.5690533689090184
Epoch 36/50, Loss: 0.627948841878346
Epoch 37/50, Lo

In [56]:
#calculate a score from accuracy, sensitivity and specificity wher sensitivity and specificity are a list of 3 values
results['score'] = results['accuracy'] + results['sensitivity'].apply(lambda x: sum(x)) + results['specificity'].apply(lambda x: sum(x))
#sort by score
results = results.sort_values(by=['score'], ascending=False)
results

Unnamed: 0,epoch,epoch_loss,best_loss,hidden_size,learning_rate,weight_decay,accuracy,sensitivity,specificity,score
0,33,0.019185,0.009572,345,0.001,1e-05,0.887097,"[0.95, 0.8125, 0.8846153846153846]","[0.975609756097561, 0.9361702127659575, 0.9166...",6.362659
0,23,0.059413,0.034894,575,0.001,0.0001,0.870968,"[0.95, 0.8125, 0.8461538461538461]","[0.9736842105263158, 0.9375, 0.8947368421052632]",6.285543
0,49,0.070896,0.070896,575,0.0001,0.0001,0.854839,"[0.95, 0.8125, 0.8076923076923077]","[0.9736842105263158, 0.9375, 0.868421052631579]",6.204636
0,20,0.338307,0.06903,460,0.001,1e-05,0.854839,"[0.95, 0.8125, 0.8076923076923077]","[0.9743589743589743, 0.9361702127659575, 0.868...",6.203981
0,30,0.181804,0.048606,345,0.001,0.0001,0.854839,"[0.9, 0.8125, 0.8461538461538461]","[0.95, 0.9361702127659575, 0.8918918918918919]",6.191555
0,21,0.068989,0.067284,460,0.001,0.0001,0.83871,"[1.0, 0.8125, 0.7307692307692307]","[1.0, 0.9387755102040817, 0.825]",6.145754
0,29,0.030188,0.027156,345,0.001,0.0,0.83871,"[0.8, 0.8125, 0.8846153846153846]","[0.9090909090909091, 0.9347826086956522, 0.911...",6.091463
0,28,0.066078,0.029905,460,0.001,0.0,0.822581,"[0.9, 0.8125, 0.7692307692307693]","[0.9487179487179487, 0.9361702127659575, 0.842...",6.031305
0,49,0.218538,0.196044,460,0.0001,1e-05,0.822581,"[0.9, 0.8125, 0.7692307692307693]","[0.9473684210526315, 0.9375, 0.8421052631578947]",6.031285
0,16,0.230785,0.105816,575,0.001,0.0,0.806452,"[0.95, 0.8125, 0.6923076923076923]","[0.9714285714285714, 0.9361702127659575, 0.809...",5.978382
