In [None]:
# Moi Teaching & Referral Hospital (MTRH) Federated Learning Notebook

This notebook runs a gender prediction experiment using the MIMIC-IV Clinical Database Demo. It will:

1. Load diagnosis data from S3.
2. Preprocess it.
3. Train a model to predict patient gender based on diagnosis count.
4. Upload the trained model and training log back to the director.


In [None]:
!pip install pandas torch scikit-learn boto3

import pandas as pd
import numpy as np
import torch
from torch import nn
from torch.utils.data import DataLoader, Dataset
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
import os
import requests


In [None]:
!aws s3 cp --no-sign-request s3://physionet-open/mimic-iv-demo/hosp/diagnoses_icd.csv ./diagnoses_icd.csv
!aws s3 cp --no-sign-request s3://physionet-open/mimic-iv-demo/core/patients.csv ./patients.csv

In [None]:
# Load data
diag = pd.read_csv("diagnoses_icd.csv")
patients = pd.read_csv("patients.csv")

# Join on subject_id
data = diag.groupby("subject_id").size().reset_index(name="diagnosis_count")
data = data.merge(patients[["subject_id", "gender"]], on="subject_id")

# Encode gender
label_encoder = LabelEncoder()
data["gender"] = label_encoder.fit_transform(data["gender"])  # 0=F, 1=M

# Split
X_train, X_test, y_train, y_test = train_test_split(
    data[["diagnosis_count"]], data["gender"], test_size=0.2, random_state=42
)

class GenderDataset(Dataset):
    def __init__(self, X, y):
        self.X = torch.tensor(X.values, dtype=torch.float32)
        self.y = torch.tensor(y.values, dtype=torch.long)

    def __len__(self):
        return len(self.X)

    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]

train_loader = DataLoader(GenderDataset(X_train, y_train), batch_size=32, shuffle=True)
test_loader = DataLoader(GenderDataset(X_test, y_test), batch_size=32)


In [None]:
class GenderNet(nn.Module):
    def __init__(self):
        super().__init__()
        self.net = nn.Sequential(
            nn.Linear(1, 16),
            nn.ReLU(),
            nn.Linear(16, 2)
        )

    def forward(self, x):
        return self.net(x)

model = GenderNet()
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)


In [None]:
log = []
for epoch in range(5):
    model.train()
    total_loss = 0
    for X_batch, y_batch in train_loader:
        optimizer.zero_grad()
        pred = model(X_batch)
        loss = loss_fn(pred, y_batch)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    avg_loss = total_loss / len(train_loader)
    log.append(f"Epoch {epoch+1}, Loss: {avg_loss:.4f}")
    print(log[-1])


In [None]:
model.eval()
correct = 0
total = 0
with torch.no_grad():
    for X_batch, y_batch in test_loader:
        outputs = model(X_batch)
        _, predicted = torch.max(outputs.data, 1)
        total += y_batch.size(0)
        correct += (predicted == y_batch).sum().item()

accuracy = correct / total
print(f"Test Accuracy: {accuracy:.2%}")
log.append(f"Test Accuracy: {accuracy:.2%}")


In [None]:
# Save model and logs
os.makedirs("outputs", exist_ok=True)
torch.save(model.state_dict(), "outputs/model.pt")
with open("outputs/log.txt", "w") as f:
    f.write("\n".join(log))

# Upload to director
with open("outputs/model.pt", "rb") as model_file, open("outputs/log.txt", "rb") as log_file:
    response = requests.post(
        "http://localhost:8000/upload_results",  # Replace with actual URL
        files={"model": model_file, "log": log_file},
        data={"envoy_name": "Moi Teaching & Referral Hospital (MTRH)"}
    )

print("Upload status:", response.status_code)
