In [None]:
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score
import numpy as np

In [None]:
Data_periods = 3
Data_Days = 30
LDays = 548

In [None]:
model_type = "Centralized_FL"
windows = f'{Data_periods}W'
label_days = f'{Data_Days}-Central'
prediction_days = f'{LDays}'

In [None]:
# Load data
CHF_data = pd.read_csv(f'CHF_Data_1/CHF_{Data_Days}D_{Data_periods}W.csv')
CHF_Labels_2 = pd.read_csv(f'CHF_Data_1/CHF_Labels_{LDays}.csv')

In [None]:
CHF_Labels = pd.DataFrame()
CHF_Labels['ClientId'] = CHF_Labels_2['ClientId'] 
CHF_Labels['Label'] = CHF_Labels_2['ListNumber']

In [None]:
pivoted_data = CHF_data.drop('Agency', axis=1)

In [None]:
# # Assuming CHF_data is your DataFrame and it has columns named "ClientId" and "Sleep"
# # Create a helper column to count the Sleep occurrences per ClientId
# CHF_data['SleepCount'] = CHF_data.groupby('ClientId').cumcount()

# # Use pivot_table to pivot the DataFrame, creating a column for each Sleep occurrence
# pivoted_data = CHF_data.pivot_table(index='ClientId', columns='SleepCount', values='Sleep', aggfunc='first')

# # Flatten the MultiIndex in columns and create a new column naming convention
# pivoted_data.columns = [f'Sleep_{i}' for i in pivoted_data.columns]

# # Reset the index to turn the ClientIds back into a column
# pivoted_data.reset_index(inplace=True)


In [None]:
# Merge and preprocess data
data = pd.merge(pivoted_data, CHF_Labels, on='ClientId')
data['Label'] = data['Label'].map({'Trn': 1, 'Epi': 2, 'Chr': 3})  # Replace class1, class2, class3 with actual class names

In [None]:
# Split data
X = data.drop(['Label','ClientId'], axis=1).values
y = data['Label'].values

y_indices = y - 1  # Convert labels to 0, 1, and 2

# Normalize features
scaler = StandardScaler()
X = scaler.fit_transform(X)

# Split into train and test sets
X_train2, X_test, y_train2, y_test = train_test_split(
    X, 
    y_indices, 
    test_size=0.2, 
    random_state=42, 
    stratify=y_indices  # This ensures the stratification
)

In [None]:
X_train, X_val, y_train, y_val = train_test_split(
    X_train2, 
    y_train2, 
    test_size=0.15, 
    random_state=42, 
    stratify=y_train2  # This ensures the stratification
) 

In [None]:
from sklearn.utils.class_weight import compute_class_weight

# Assuming y_train contains the class labels for the training dataset
class_weights = compute_class_weight(
    class_weight='balanced', 
    classes=np.unique(y_train),  # Directly use the unique labels from y_train
    y=y_train
)
class_weights_tensor_2 = torch.tensor(class_weights, dtype=torch.float32)

In [None]:
class_weights_tensor = torch.tensor(np.array([0.75, 3, 6]), dtype=torch.float32)

In [None]:
# Convert to PyTorch tensors
X_train_tensor = torch.tensor(X_train).float()
y_train_tensor = torch.tensor(y_train).long()

X_test_tensor = torch.tensor(X_test).float()
y_test_tensor = torch.tensor(y_test).long()

X_val_tensor = torch.tensor(X_val).float()
y_val_tensor = torch.tensor(y_val).long()

In [None]:
from torch.utils.data import DataLoader, TensorDataset

# Create DataLoaders
# Create TensorDatasets
train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
test_dataset = TensorDataset(X_test_tensor, y_test_tensor)
val_dataset = TensorDataset(X_val_tensor, y_val_tensor)

# Create DataLoaders
batch_size = 2048
train_loader = DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(dataset=val_dataset, batch_size=batch_size, shuffle=True)

In [None]:
import torch.optim as optim

class MLP(nn.Module):
    def __init__(self):
        super(MLP, self).__init__()
        self.layers = nn.Sequential(
            nn.Linear(X_train.shape[1], 256),
            nn.LeakyReLU(negative_slope=0.01),  # Using LeakyReLU
            #nn.Dropout(0.15),  # Dropout layer
            nn.Linear(256, 128),
            nn.LeakyReLU(negative_slope=0.01),  # Using LeakyReLU
            nn.Dropout(0.50),  # Another Dropout layer
            nn.Linear(128, 8),
            nn.LeakyReLU(negative_slope=0.01),  # Using LeakyReLU
            nn.Dropout(0.50),  # Another Dropout layer
            nn.Linear(8, 3)
        )
        
    def forward(self, x):
        return self.layers(x)

model = MLP()
criterion = nn.CrossEntropyLoss(weight=class_weights_tensor) 
optimizer = optim.AdamW(model.parameters(), lr=0.0001, weight_decay=0.01)  # Using AdamW
# nn.CrossEntropyLoss(weight=class_weights_tensor) 

In [None]:
def calculate_recall(outputs, labels, num_classes):
    _, preds = torch.max(outputs, 1)
    correct = preds.eq(labels.view_as(preds))

    recall_per_class = []
    for i in range(num_classes):
        correct_class = correct[labels == i]
        recall_class = torch.mean(correct_class.float()) if correct_class.numel() > 0 else torch.tensor(0)
        recall_per_class.append(recall_class.item())

    return recall_per_class


In [None]:
import copy
def early_stopping_check(epoch_val_loss, best_val_loss, best_model_weights, model, patience_counter, patience=10):
    if epoch_val_loss < best_val_loss:
        best_val_loss = epoch_val_loss
        best_model_weights = copy.deepcopy(model.state_dict())
        patience_counter = 0
    else:
        patience_counter += 1

    stop_training = False
    if patience_counter >= patience:
        stop_training = True

    return stop_training, best_val_loss, best_model_weights, patience_counter


In [None]:
import copy
import matplotlib.pyplot as plt
loss_values = []  # List to store training loss values
val_loss_values = []  # List to store validation loss values
train_recall_values = []  # This will store the average recall per epoch
val_recall_values = []

# Usage in your training loop
patience = 200
best_val_loss = float('inf')
best_model_weights = copy.deepcopy(model.state_dict())
patience_counter = 0


num_classes = 3
for epoch in range(patience):  # Number of epochs
    # Training
    model.train()  # Set the model to training mode
    running_loss = 0.0
    running_recall = []

    for inputs, labels in train_loader:
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

        # Calculate recall
        recall_batch = calculate_recall(outputs, labels, num_classes)
        running_recall.append(recall_batch)

    epoch_loss = running_loss / len(train_loader)
    epoch_recall = np.mean(np.array(running_recall), axis=0)  # Recall for each class
    average_epoch_recall = np.mean(epoch_recall)  # Average recall across all classes
    train_recall_values.append(average_epoch_recall)  # Store the average training recall for this epoch
    loss_values.append(epoch_loss)  # Store the average training loss for this epoch

    # Validation
    model.eval()  # Set the model to evaluation mode
    running_val_loss = 0.0
    running_val_recall = []

    with torch.no_grad():  # No gradient computation for validation
        for val_inputs, val_labels in val_loader:
            val_outputs = model(val_inputs)
            val_loss = criterion(val_outputs, val_labels)
            running_val_loss += val_loss.item()

            # Calculate recall
            val_recall_batch = calculate_recall(val_outputs, val_labels, num_classes)
            running_val_recall.append(val_recall_batch)

    epoch_val_loss = running_val_loss / len(val_loader)
    epoch_val_recall = np.mean(np.array(running_val_recall), axis=0)  # Average recall over all validation batches
    average_epoch_val_recall = np.mean(epoch_val_recall)
    val_recall_values.append(average_epoch_val_recall)  # Store the average validation recall for this epoch
    val_loss_values.append(epoch_val_loss)  # Store the average validation loss for this epoch


        # Early stopping check
    stop_training, best_val_loss, best_model_weights, patience_counter = early_stopping_check(
        epoch_val_loss, best_val_loss, best_model_weights, model, patience_counter, patience
    )

    if stop_training:
        print(f"Stopping early at epoch {epoch}. Restoring best model weights.")
        break

# Restore best model weights
model.load_state_dict(best_model_weights)

In [None]:
# Plotting the training and validation loss
plt.plot(loss_values, label='Training Loss')
plt.plot(val_loss_values, label='Validation Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.title('Training and Validation Loss vs. Epoch')
plt.legend()
plt.show()

In [None]:
# Convert each element to mean recall if they are lists or tensors
train_recall_avg = [np.mean(epoch) if isinstance(epoch, (list, np.ndarray)) else epoch for epoch in train_recall_values]
val_recall_avg = [np.mean(epoch) if isinstance(epoch, (list, np.ndarray)) else epoch for epoch in val_recall_values]


In [None]:
plt.plot(train_recall_avg, label='Training Recall')
plt.plot(val_recall_avg, label='Validation Recall')
plt.xlabel('Epoch')
plt.ylabel('Recall')
plt.title('Training and Validation Recall vs. Epoch')
plt.legend()
plt.show()

In [None]:
# Evaluate the model
with torch.no_grad():
    y_pred1 = model(X_test_tensor)
    y_pred = torch.argmax(y_pred1, dim=1)
    accuracy = accuracy_score(y_test, y_pred)
    print(f'Total Accuracy: {accuracy}')

In [None]:
from sklearn.metrics import confusion_matrix

y_test_np = y_test.numpy() if isinstance(y_test, torch.Tensor) else y_test
y_pred_np = y_pred.numpy() if isinstance(y_pred, torch.Tensor) else y_pred

# Generate a confusion matrix
cm = confusion_matrix(y_test_np, y_pred_np)

# Calculate per-class accuracy
per_class_accuracy = cm.diagonal() / cm.sum(axis=1)

# Print per-class accuracy
for i, accuracy in enumerate(per_class_accuracy):
    print(f"Accuracy for class {i}: {accuracy * 100:.2f}%")

In [None]:
from sklearn.metrics import confusion_matrix

y_test_np = y_test.numpy() if isinstance(y_test, torch.Tensor) else y_test
y_pred_np = y_pred.numpy() if isinstance(y_pred, torch.Tensor) else y_pred

# Generate a confusion matrix
cm = confusion_matrix(y_test_np, y_pred_np)

# Initialize precision and recall arrays
precision = np.zeros(cm.shape[0])
recall = np.zeros(cm.shape[0])

# Calculate precision and recall for each class
for i in range(cm.shape[0]):
    TP = cm[i, i]
    FP = cm[:, i].sum() - TP
    FN = cm[i, :].sum() - TP

    precision[i] = TP / (TP + FP) if (TP + FP) > 0 else 0
    recall[i] = TP / (TP + FN) if (TP + FN) > 0 else 0
    print(f"Precision for class {i}: {precision[i] * 100:.2f}%")
    print(f"Recall for class {i}: {recall[i] * 100:.2f}%")

# Calculate macro-averaged precision and recall
macro_avg_precision = precision.mean()
macro_avg_recall = recall.mean()

print(f"\nMacro Average Precision: {macro_avg_precision * 100:.2f}%")
print(f"Macro Average Recall: {macro_avg_recall * 100:.2f}%")


In [None]:
# Replace these with your actual values
macro_avg_precision = precision # Replace with your actual value
macro_avg_recall = recall  # Replace with your actual value

# Creating a DataFrame
data = {
    "Model Type": [model_type],
    "Windows": [windows],
    "Label Days": [label_days],
    "Prediction Days": [prediction_days],
    "Average Precision": [sum(macro_avg_precision)/3],
    "Average Recall": [sum(macro_avg_recall)/3],
    "Macro Average Precision": [macro_avg_precision],
    "Macro Average Recall": [macro_avg_recall]
}

new_data_df = pd.DataFrame(data)

# File name
excel_filename = "Model_Results_Central.xlsx"

# Check if the file exists
try:
    # If it exists, read the existing data and append the new data
    existing_data_df = pd.read_excel(excel_filename)
    combined_df = pd.concat([existing_data_df, new_data_df], ignore_index=True)
except FileNotFoundError:
    # If the file does not exist, just use the new data
    combined_df = new_data_df

# Save the combined data back to the Excel file
combined_df.to_excel(excel_filename, index=False)
