In [1]:
import random
random.seed(42)

import torch
torch.manual_seed(42)
if torch.cuda.is_available():
    torch.cuda.manual_seed_all(42)

In [2]:
import numpy as np
import pandas as pd
import shap
import tqdm

from sklearn.base import clone
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import FunctionTransformer, StandardScaler
from sklearn.model_selection import GroupShuffleSplit
import sklearn.metrics as skmetrics
from sklearn.calibration import CalibrationDisplay
from sklearn.utils import resample

from skopt import BayesSearchCV
import matplotlib.pyplot as plt
import seaborn as sns

from process_fe import create_feature_engineering_datasets
from data import data_to_array_dict, get_data_date_split, get_data_date_id_split, get_feature_colnames
from utils import stratification
from plotting import paper_theme, ReliabilityDisplay, ShapDisplay, risk_feature_plot
import metrics
from shap_calculator import calc_shap_df

from tqdm_style import tqdm_style
from sklearn.metrics import precision_score, recall_score, f1_score, average_precision_score
import numpy as np
from sklearn.model_selection import StratifiedKFold, ParameterGrid
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from torch.utils.data import DataLoader, Subset
import torch
import torch.nn as nn
import torch.optim as optim

  from pandas.core import (


## Fixing the training set and predicting on the last year

In [21]:
N_DAYS = 3
THRESHOLDS = [0.3, 0.8]
DATES_SPLIT = {
    "date_train_start": "2021-06-28",
    "date_train_end": "2023-01-01",
    "date_test_end": "2024-01-01",
}

In [22]:
col_nice_names = {
    'awake_freq': 'Night time Awake Frequency', 
    'bathroom_daytime_freq': 'Daytime Bathroom Frequency', 
    'bathroom_daytime_freq_ma': 'Daytime Bathroom Frequency MA', 
    'bathroom_daytime_freq_ma_delta': 'Daytime Bathroom Frequency MA Delta', 
    'bathroom_freq': 'Bathroom Frequency',
    'bathroom_nighttime_freq': 'Night time Bathroom Frequency', 
    'bathroom_nighttime_freq_ma': 'Night time Bathroom Frequency MA', 
    'bathroom_nighttime_freq_ma_delta': 'Night time Bathroom Frequency MA Delta', 
    'bathroom_relative_transition_time_delta_mean': 'Mean Relative Bathroom Transition Time Delta',
    'bathroom_relative_transition_time_delta_std': 'STD Relative Bathroom Transition Time Delta',
    'bedroom_freq': 'Bedroom Frequency',
    'daily_entropy': 'Daily Entropy', 
    'hallway_freq': 'Hallway Frequency', 
    'heart_rate_mean': 'Mean Night Time Heart Rate',
    'heart_rate_std': 'STD Night Time Heart Rate', 
    'kitchen_freq': 'Kitchen Frequency', 
    'lounge_freq': 'Lounge Frequency', 
    'previous_uti': 'Number of Previous UTIs',
    'respiratory_rate_mean': 'Mean Night Time Respiratory Rate', 
    'respiratory_rate_std': 'STD Night Time Respiratory Rate',
}

## A MLP

In [23]:
fe_data = create_feature_engineering_datasets(reload=False)

In [24]:
print(fe_data.columns)

Index(['patient_id', 'date', 'bathroom_freq', 'bedroom_freq', 'hallway_freq',
       'kitchen_freq', 'lounge_freq', 'awake_freq', 'heart_rate_mean',
       'heart_rate_std', 'respiratory_rate_mean', 'respiratory_rate_std',
       'bathroom_nighttime_freq', 'bathroom_nighttime_freq_ma',
       'bathroom_nighttime_freq_ma_delta', 'bathroom_daytime_freq',
       'bathroom_daytime_freq_ma', 'bathroom_daytime_freq_ma_delta',
       'daily_entropy', 'bathroom_relative_transition_time_delta_mean',
       'bathroom_relative_transition_time_delta_std', 'previous_uti'],
      dtype='object')


In [25]:

data_train, data_test, _ = get_data_date_split(
    fe_data, dates_split=DATES_SPLIT, n_days=N_DAYS, impute=True
)


X_train, y_train, ids_train, sample_weight = (
    data_train['X'], data_train['y'], data_train["id"], data_train['sample_weight']
)

X_test, y_test, ids_test, dates_test = (
    data_test['X'], data_test['y'], data_test["id"], data_test['date']
)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_labels[


In [26]:
# Convert IDs to sets
set_ids_train = set(ids_train)
set_ids_test = set(ids_test)

# Check for intersection
common_ids = set_ids_train.intersection(set_ids_test)

# Check if there are any common elements
if common_ids:
    print(f"There are {len(common_ids)} common IDs between train and test datasets.")
else:
    print("IDs in train and test datasets are unique to each other.")

There are 41 common IDs between train and test datasets.


In [27]:
# List of IDs to remove
ids_to_remove = ['AboZyUBeiQW3nVCcbXGpay', 'NZjrVTZQR1w9LPJMt26MbG', 'XVb8nztyc2LYPCAewZq11S', 'XdbAAiDw1vd3Bjbo9EVo1B']

# Create a boolean index where False indicates IDs that need to be removed
indices_to_keep = ~np.isin(data_test['id'], ids_to_remove)

# Use this index to filter all related arrays in data_test
data_test['X'] = data_test['X'][indices_to_keep]
data_test['y'] = data_test['y'][indices_to_keep]
data_test['id'] = data_test['id'][indices_to_keep]
data_test['date'] = data_test['date'][indices_to_keep] if 'date' in data_test else None

X_test, y_test, ids_test, dates_test = (
    data_test['X'], data_test['y'], data_test["id"], data_test['date']
)

In [28]:
print("id_train:", ids_train.shape)
print("id_test:", ids_test.shape)

id_train: (1839,)
id_test: (1145,)


#### Here unique participants in the test dataset but not in the training dataset were removed.

In [35]:
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.preprocessing import StandardScaler
import numpy as np

# Define flatten function
def flatten(x):
    return x.reshape(x.shape[0], -1)

# Apply flattening
X_train_flattened = flatten(X_train)
X_test_flattened = flatten(X_test)

# Scale the data
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train_flattened)
X_test_scaled = scaler.transform(X_test_flattened)

# Convert to PyTorch tensors
X_train_torch = torch.tensor(X_train_scaled, dtype=torch.float32)
y_train_torch = torch.tensor(y_train, dtype=torch.float32)
sample_weight_torch = torch.tensor(sample_weight, dtype=torch.float32)

In [36]:
X_test_torch = torch.tensor(X_test_scaled, dtype=torch.float32)

y_test_torch = torch.tensor(y_test, dtype=torch.float32)

In [37]:
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.preprocessing import StandardScaler
import numpy as np

In [41]:
from sklearn.metrics import precision_score, recall_score, f1_score, average_precision_score

In [39]:
gender_map['gender_encoded'] = gender_map['Gender PwD'].map({'Male': 0, 'Female': 1})


patient_gender_dict = dict(zip(gender_map['patient_id'], gender_map['gender_encoded']))

gender_train = [patient_gender_dict.get(patient_id, 0) for patient_id in ids_train] 

gender_test = [patient_gender_dict.get(patient_id, 0) for patient_id in ids_test] 

gender_train = np.array(gender_train).astype(int)
gender_test = np.array(gender_test).astype(int)

In [40]:
print("Gender distribution in training data:", np.unique(gender_train, return_counts=True))
print("Gender distribution in testing data:", np.unique(gender_test, return_counts=True))

Gender distribution in training data: (array([0, 1]), array([1183,  656]))
Gender distribution in testing data: (array([0, 1]), array([550, 387]))


In [41]:
# Create a mapping from patient IDs to numeric values
unique_ids = np.unique(ids_train)
id_to_numeric = {id_: i for i, id_ in enumerate(unique_ids)}

# Apply the mapping to train and test IDs
numeric_ids_train = np.array([id_to_numeric[id_] for id_ in ids_train])
numeric_ids_test = np.array([id_to_numeric[id_] for id_ in ids_test])

In [42]:
from torch.utils.data import Dataset
import pickle

class CustomDataset(Dataset):
    def __init__(self, features, labels, gender, patient_id, sample_weight=None):
        self.features = torch.tensor(features, dtype=torch.float32)
        self.labels = torch.tensor(labels, dtype=torch.long)
        self.gender = torch.tensor(gender, dtype=torch.long)
        self.patient_id = torch.tensor(patient_id, dtype=torch.long)  # Ensure patient_id is Long
        if sample_weight is not None:
            self.sample_weight = torch.tensor(sample_weight, dtype=torch.float32)
        else:
            self.sample_weight = None

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, index):
        if self.sample_weight is not None:
            return self.features[index], self.labels[index], self.gender[index], self.patient_id[index], self.sample_weight[index]
        return self.features[index], self.labels[index], self.gender[index], self.patient_id[index]


In [43]:

# Instantiate datasets
train_dataset = CustomDataset(X_train_scaled, y_train, gender_train, numeric_ids_train, sample_weight)
test_dataset = CustomDataset(X_test_scaled, y_test, gender_test, numeric_ids_test, sample_weight)

## Embedding by ID

In [46]:
class MLPWithEmbedding(nn.Module):
    def __init__(self, input_size, hidden_size1, hidden_size2, output_size, num_patient_ids, embedding_dim=8, dropout_rate=0.5):
        super(MLPWithEmbedding, self).__init__()
        self.embedding = nn.Embedding(num_embeddings=num_patient_ids, embedding_dim=embedding_dim)
        self.layer1 = nn.Linear(input_size + embedding_dim, hidden_size1)
        self.dropout1 = nn.Dropout(p=dropout_rate)
        self.relu1 = nn.ReLU()
        self.layer2 = nn.Linear(hidden_size1, hidden_size2)
        self.dropout2 = nn.Dropout(p=dropout_rate)
        self.relu2 = nn.ReLU()
        self.layer3 = nn.Linear(hidden_size2, output_size)

    def forward(self, x, patient_id):
        patient_id_embedded = self.embedding(patient_id)
        x = torch.cat((x, patient_id_embedded), dim=1)
        x = self.layer1(x)
        x = self.relu1(x)
        x = self.dropout1(x)
        x = self.layer2(x)
        x = self.relu2(x)
        x = self.dropout2(x)
        x = self.layer3(x)
        return x

In [47]:
def train_model(model, train_loader, val_loader, criterion, optimizer, num_epochs=10):
    for epoch in range(num_epochs):
        model.train()
        running_loss = 0.0
        for features, labels, gender, patient_id, sample_weight in train_loader:
            optimizer.zero_grad()
            outputs = model(features, patient_id)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()

        train_loss = running_loss / len(train_loader)
        
        model.eval()
        val_running_loss = 0.0
        all_labels = []
        all_preds = []
        all_genders = []
        with torch.no_grad():
            for features, labels, gender, patient_id, sample_weight in val_loader:
                outputs = model(features, patient_id)
                loss = criterion(outputs, labels)
                val_running_loss += loss.item()
                _, predicted = torch.max(outputs.data, 1)
                all_labels.extend(labels.cpu().numpy())
                all_preds.extend(predicted.cpu().numpy())
                all_genders.extend(gender.cpu().numpy())
        
        val_loss = val_running_loss / len(val_loader)
        all_labels = np.array(all_labels)
        all_preds = np.array(all_preds)
        all_genders = np.array(all_genders)

        accuracy = accuracy_score(all_labels, all_preds)
        precision = precision_score(all_labels, all_preds, average='binary', zero_division=0)
        sensitivity = recall_score(all_labels, all_preds, average='binary', zero_division=0)

        male_indices = all_genders == 0
        female_indices = all_genders == 1

        male_accuracy = accuracy_score(all_labels[male_indices], all_preds[male_indices])
        male_precision = precision_score(all_labels[male_indices], all_preds[male_indices], average='binary', zero_division=0)
        male_sensitivity = recall_score(all_labels[male_indices], all_preds[male_indices], average='binary', zero_division=0)

        female_accuracy = accuracy_score(all_labels[female_indices], all_preds[female_indices])
        female_precision = precision_score(all_labels[female_indices], all_preds[female_indices], average='binary', zero_division=0)
        female_sensitivity = recall_score(all_labels[female_indices], all_preds[female_indices], average='binary', zero_division=0)

        print(f'Epoch {epoch+1}/{num_epochs}, '
              f'Val Loss: {val_loss:.4f}, '
              f'Overall - Accuracy: {accuracy:.4f}, Precision: {precision:.4f}, Sensitivity: {sensitivity:.4f}, '
              f'Male - Accuracy: {male_accuracy:.4f}, Precision: {male_precision:.4f}, Sensitivity: {male_sensitivity:.4f}, '
              f'Female - Accuracy: {female_accuracy:.4f}, Precision: {female_precision:.4f}, Sensitivity: {female_sensitivity:.4f}')
    
    return val_loss, accuracy, precision, sensitivity, male_accuracy, male_precision, male_sensitivity, female_accuracy, female_precision, female_sensitivity

In [48]:
def cross_validate_and_tune(train_dataset, num_unique_ids, param_grid, num_folds=10, num_epochs=10):
    best_hyperparams = None
    best_val_loss = float('inf')
    best_metrics = None

    skf = StratifiedKFold(n_splits=num_folds)
    
    for params in ParameterGrid(param_grid):
        print(f"Testing hyperparameters: {params}")
        
        fold_metrics = {
            'accuracy': [], 'precision': [], 'sensitivity': [],
            'male_accuracy': [], 'male_precision': [], 'male_sensitivity': [],
            'female_accuracy': [], 'female_precision': [], 'female_sensitivity': []
        }
        
        for fold, (train_index, val_index) in enumerate(skf.split(X_train_scaled, y_train)):
            print(f'Fold {fold+1}/{num_folds} with params: {params}')
            
            train_subset = Subset(train_dataset, train_index)
            val_subset = Subset(train_dataset, val_index)
            train_loader = DataLoader(train_subset, batch_size=128, shuffle=True)
            val_loader = DataLoader(val_subset, batch_size=128, shuffle=False)

            print(f"Train loader length: {len(train_loader)}, Validation loader length: {len(val_loader)}")
            
            model = MLPWithEmbedding(input_size=X_train_scaled.shape[1], hidden_size1=30, hidden_size2=10, output_size=2, num_patient_ids=num_unique_ids, embedding_dim=8, dropout_rate=params['dropout_rate'])
            criterion = nn.CrossEntropyLoss()
            optimizer = optim.Adam(model.parameters(), lr=params['lr'])
            
            val_loss, accuracy, precision, sensitivity, male_accuracy, male_precision, male_sensitivity, female_accuracy, female_precision, female_sensitivity = train_model(model, train_loader, val_loader, criterion, optimizer, num_epochs)
            
            fold_metrics['accuracy'].append(accuracy)
            fold_metrics['precision'].append(precision)
            fold_metrics['sensitivity'].append(sensitivity)
            fold_metrics['male_accuracy'].append(male_accuracy)
            fold_metrics['male_precision'].append(male_precision)
            fold_metrics['male_sensitivity'].append(male_sensitivity)
            fold_metrics['female_accuracy'].append(female_accuracy)
            fold_metrics['female_precision'].append(female_precision)
            fold_metrics['female_sensitivity'].append(female_sensitivity)
        
        avg_val_loss = np.mean([val_loss])
        if avg_val_loss < best_val_loss:
            best_val_loss = avg_val_loss
            best_hyperparams = params
            best_metrics = fold_metrics

        # Print the metrics for each combination of hyperparameters at the end of 10 folds
        print(f'Hyperparameters: {params}')
        for metric in fold_metrics:
            print(f'{metric}: Mean = {np.mean(fold_metrics[metric]):.4f}, Std = {np.std(fold_metrics[metric]):.4f}')

    # Print the best hyperparameters and corresponding metrics
    print("Best Hyperparameters:", best_hyperparams)
    for metric in best_metrics:
        print(f'{metric}: Mean = {np.mean(best_metrics[metric]):.4f}, Std = {np.std(best_metrics[metric]):.4f}')

    return best_hyperparams

param_grid = {'lr': [0.001, 0.005, 0.01], 'dropout_rate': [0, 0.2, 0.5]}
num_unique_ids = len(np.unique(ids_train))

# Perform cross-validation and find the best hyperparameters
best_hyperparams = cross_validate_and_tune(train_dataset, num_unique_ids, param_grid, num_folds=10, num_epochs=10)
    

Testing hyperparameters: {'dropout_rate': 0, 'lr': 0.001}
Fold 1/10 with params: {'dropout_rate': 0, 'lr': 0.001}
Train loader length: 13, Validation loader length: 2
Epoch 1/10, Val Loss: 0.7019, Overall - Accuracy: 0.6522, Precision: 0.4583, Sensitivity: 0.5690, Male - Accuracy: 0.6780, Precision: 0.4583, Sensitivity: 0.6471, Female - Accuracy: 0.0000, Precision: 0.0000, Sensitivity: 0.0000
Epoch 2/10, Val Loss: 0.7050, Overall - Accuracy: 0.6467, Precision: 0.4000, Sensitivity: 0.2414, Male - Accuracy: 0.6723, Precision: 0.4000, Sensitivity: 0.2745, Female - Accuracy: 0.0000, Precision: 0.0000, Sensitivity: 0.0000
Epoch 3/10, Val Loss: 0.7261, Overall - Accuracy: 0.6413, Precision: 0.3333, Sensitivity: 0.1379, Male - Accuracy: 0.6667, Precision: 0.3333, Sensitivity: 0.1569, Female - Accuracy: 0.0000, Precision: 0.0000, Sensitivity: 0.0000
Epoch 4/10, Val Loss: 0.7568, Overall - Accuracy: 0.6685, Precision: 0.4286, Sensitivity: 0.1552, Male - Accuracy: 0.6949, Precision: 0.4286, Sens

In [49]:
# Define bootstrap sampling function
def bootstrap_sample(dataset, patient_ids, proportion=0.8):
    sampled_indices = []
    unique_patient_ids = np.unique(patient_ids)
    for pid in unique_patient_ids:
        pid_indices = np.where(patient_ids == pid)[0]
        sample_size = int(proportion * len(pid_indices))
        sampled_pid_indices = np.random.choice(pid_indices, size=sample_size, replace=True)
        sampled_indices.extend(sampled_pid_indices)
    return Subset(dataset, sampled_indices)

# Function to evaluate the model
def evaluate_model(embedding_mlp_model, dataloader):
    embedding_mlp_model.eval()
    all_labels = []
    all_preds = []
    all_genders = []

    with torch.no_grad():
        for features, labels, gender, patient_id, sample_weight in dataloader:
            outputs = embedding_mlp_model(features, patient_id)
            _, preds = torch.max(outputs, 1)
            all_labels.extend(labels.cpu().numpy())
            all_preds.extend(preds.cpu().numpy())
            all_genders.extend(gender.cpu().numpy())
    
    accuracy = accuracy_score(all_labels, all_preds)
    precision = precision_score(all_labels, all_preds, average='binary', zero_division=0)
    recall = recall_score(all_labels, all_preds, average='binary', zero_division=0)
    f1 = f1_score(all_labels, all_preds, average='binary', zero_division=0)

    males = [i for i, g in enumerate(all_genders) if g == 0]
    females = [i for i, g in enumerate(all_genders) if g == 1]

    male_labels = [all_labels[i] for i in males]
    male_preds = [all_preds[i] for i in males]
    female_labels = [all_labels[i] for i in females]
    female_preds = [all_preds[i] for i in females]

    male_accuracy = accuracy_score(male_labels, male_preds)
    male_precision = precision_score(male_labels, male_preds, average='binary', zero_division=0)
    male_recall = recall_score(male_labels, male_preds, average='binary', zero_division=0)
    male_f1 = f1_score(male_labels, male_preds, average='binary', zero_division=0)

    female_accuracy = accuracy_score(female_labels, female_preds)
    female_precision = precision_score(female_labels, female_preds, average='binary', zero_division=0)
    female_recall = recall_score(female_labels, female_preds, average='binary', zero_division=0)
    female_f1 = f1_score(female_labels, female_preds, average='binary', zero_division=0)

    return {
        'overall': {
            'accuracy': accuracy,
            'precision': precision,
            'recall': recall,
            'f1': f1
        },
        'male': {
            'accuracy': male_accuracy,
            'precision': male_precision,
            'recall': male_recall,
            'f1': male_f1
        },
        'female': {
            'accuracy': female_accuracy,
            'precision': female_precision,
            'recall': female_recall,
            'f1': female_f1
        }
    }

# Perform cross-validation and find the best hyperparameters
param_grid = {'lr': [0.001, 0.005, 0.01], 'dropout_rate': [0, 0.2, 0.5]}
num_unique_ids = len(np.unique(ids_train))
best_hyperparams = cross_validate_and_tune(train_dataset, num_unique_ids, param_grid, num_folds=10, num_epochs=10)

# Bootstrap sampling and training the best model
num_bootstrap_samples = 5
bootstrap_results = {
    'accuracy': [], 'precision': [], 'sensitivity': [],
    'male_accuracy': [], 'male_precision': [], 'male_sensitivity': [],
    'female_accuracy': [], 'female_precision': [], 'female_sensitivity': []
}

for _ in range(num_bootstrap_samples):
    # Bootstrap sampling 80% of data points for each patient
    bootstrap_subset = bootstrap_sample(train_dataset, ids_train, proportion=0.8)
    bootstrap_loader = DataLoader(bootstrap_subset, batch_size=128, shuffle=True)
    
    model = MLPWithEmbedding(input_size=X_train_scaled.shape[1], hidden_size1=30, hidden_size2=10, output_size=2, num_patient_ids=num_unique_ids, embedding_dim=8, dropout_rate=best_hyperparams['dropout_rate'])
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=best_hyperparams['lr'])
    
    # Train with the best hyperparameters
    train_model(model, bootstrap_loader, DataLoader(test_dataset, batch_size=128, shuffle=False), criterion, optimizer, num_epochs=10)
    
    # Evaluate on the full test dataset
    test_loader = DataLoader(test_dataset, batch_size=128, shuffle=False)
    results = evaluate_model(model, test_loader)
    
    bootstrap_results['accuracy'].append(results['overall']['accuracy'])
    bootstrap_results['precision'].append(results['overall']['precision'])
    bootstrap_results['sensitivity'].append(results['overall']['recall'])
    bootstrap_results['male_accuracy'].append(results['male']['accuracy'])
    bootstrap_results['male_precision'].append(results['male']['precision'])
    bootstrap_results['male_sensitivity'].append(results['male']['recall'])
    bootstrap_results['female_accuracy'].append(results['female']['accuracy'])
    bootstrap_results['female_precision'].append(results['female']['precision'])
    bootstrap_results['female_sensitivity'].append(results['female']['recall'])

# Print bootstrap results
for metric in bootstrap_results:
    print(f'{metric.capitalize()}: Mean = {np.mean(bootstrap_results[metric]):.4f}, Std = {np.std(bootstrap_results[metric]):.4f}')

Testing hyperparameters: {'dropout_rate': 0, 'lr': 0.001}
Fold 1/10 with params: {'dropout_rate': 0, 'lr': 0.001}
Train loader length: 13, Validation loader length: 2
Epoch 1/10, Val Loss: 0.6800, Overall - Accuracy: 0.6902, Precision: 0.5238, Sensitivity: 0.1897, Male - Accuracy: 0.7062, Precision: 0.4737, Sensitivity: 0.1765, Female - Accuracy: 0.2857, Precision: 1.0000, Sensitivity: 0.2857
Epoch 2/10, Val Loss: 0.6656, Overall - Accuracy: 0.7120, Precision: 0.7778, Sensitivity: 0.1207, Male - Accuracy: 0.7401, Precision: 0.7778, Sensitivity: 0.1373, Female - Accuracy: 0.0000, Precision: 0.0000, Sensitivity: 0.0000
Epoch 3/10, Val Loss: 0.6519, Overall - Accuracy: 0.7120, Precision: 0.7778, Sensitivity: 0.1207, Male - Accuracy: 0.7401, Precision: 0.7778, Sensitivity: 0.1373, Female - Accuracy: 0.0000, Precision: 0.0000, Sensitivity: 0.0000
Epoch 4/10, Val Loss: 0.6490, Overall - Accuracy: 0.7065, Precision: 0.7500, Sensitivity: 0.1034, Male - Accuracy: 0.7345, Precision: 0.7500, Sens