In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, classification_report
import pandas as pd

In [2]:
# Load the dataset
df = pd.read_csv("../../data/encoded_dataset.csv")
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 100244 entries, 0 to 100243
Data columns (total 57 columns):
 #   Column                    Non-Null Count   Dtype  
---  ------                    --------------   -----  
 0   admission_type_id         100244 non-null  int64  
 1   discharge_disposition_id  100244 non-null  int64  
 2   admission_source_id       100244 non-null  int64  
 3   time_in_hospital          100244 non-null  int64  
 4   medical_specialty         100244 non-null  int64  
 5   num_lab_procedures        100244 non-null  int64  
 6   num_procedures            100244 non-null  int64  
 7   num_medications           100244 non-null  int64  
 8   number_outpatient         100244 non-null  int64  
 9   number_emergency          100244 non-null  int64  
 10  number_inpatient          100244 non-null  int64  
 11  diag_1                    100244 non-null  int64  
 12  diag_2                    100244 non-null  int64  
 13  diag_3                    100244 non-null  i

In [3]:
# Prepare features (X) and target variable (y)
X = df.drop(columns=["readmitted"])
y = df["readmitted"]

# Convert categorical features to numeric if necessary
X = pd.get_dummies(X)

# Standardize the features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(
    X_scaled, y, test_size=0.2, random_state=42
)

In [4]:
X

Unnamed: 0,admission_type_id,discharge_disposition_id,admission_source_id,time_in_hospital,medical_specialty,num_lab_procedures,num_procedures,num_medications,number_outpatient,number_emergency,...,age_[30-40),age_[40-50),age_[50-60),age_[60-70),age_[70-80),age_[80-90),age_[90-100),gender_Female,gender_Male,gender_other
0,1,1,7,3,0,59,0,18,0,0,...,False,False,False,False,False,False,False,True,False,False
1,1,1,7,2,0,11,5,13,2,0,...,False,False,False,False,False,False,False,True,False,False
2,1,1,7,2,0,44,1,16,0,0,...,True,False,False,False,False,False,False,False,True,False
3,1,1,7,1,0,51,0,8,0,0,...,False,True,False,False,False,False,False,False,True,False
4,2,1,2,3,0,31,6,16,0,0,...,False,False,True,False,False,False,False,False,True,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
100239,1,3,7,3,0,51,0,16,0,0,...,False,False,False,False,True,False,False,False,True,False
100240,1,4,5,5,0,33,3,18,0,0,...,False,False,False,False,False,True,False,True,False,False
100241,1,1,7,1,0,53,0,9,1,0,...,False,False,False,False,True,False,False,False,True,False
100242,2,3,7,10,4,45,2,21,0,0,...,False,False,False,False,False,True,False,True,False,False


In [5]:
X_scaled

array([[-0.70783402, -0.5168113 ,  0.30503934, ...,  0.92685256,
        -0.92679677, -0.00547064],
       [-0.70783402, -0.5168113 ,  0.30503934, ...,  0.92685256,
        -0.92679677, -0.00547064],
       [-0.70783402, -0.5168113 ,  0.30503934, ..., -1.07892025,
         1.0789852 , -0.00547064],
       ...,
       [-0.70783402, -0.5168113 ,  0.30503934, ..., -1.07892025,
         1.0789852 , -0.00547064],
       [-0.01574806, -0.13809516,  0.30503934, ...,  0.92685256,
        -0.92679677, -0.00547064],
       [-0.70783402, -0.5168113 ,  0.30503934, ..., -1.07892025,
         1.0789852 , -0.00547064]])

In [6]:
# Convert to PyTorch tensors
X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train.values, dtype=torch.long)
y_test_tensor = torch.tensor(y_test.values, dtype=torch.long)

In [7]:
y_train_tensor

tensor([0, 1, 0,  ..., 0, 1, 0])

In [8]:
# deep neural network model
class SimpleNN(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim):
        super().__init__()
        self.fc1 = nn.Linear(input_dim, hidden_dim)
        self.fc2 = nn.Linear(hidden_dim, output_dim)

    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = self.fc2(x)
        return x

In [9]:
# Initialize model
input_dim = X_train.shape[1]
hidden_dim = 64
output_dim = len(y.unique())  # Number of classes
model = SimpleNN(input_dim, hidden_dim, output_dim)

# loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Training
epochs = 1000
for epoch in range(epochs):
    model.train()
    optimizer.zero_grad()

    outputs = model(X_train_tensor)
    loss = criterion(outputs, y_train_tensor)
    loss.backward()
    optimizer.step()

    if (epoch + 1) % 5 == 0:
        print(f"Epoch [{epoch + 1}/{epochs}], Loss: {loss.item():.4f}")

Epoch [5/1000], Loss: 0.6901
Epoch [10/1000], Loss: 0.6795
Epoch [15/1000], Loss: 0.6721
Epoch [20/1000], Loss: 0.6664
Epoch [25/1000], Loss: 0.6619
Epoch [30/1000], Loss: 0.6583
Epoch [35/1000], Loss: 0.6554
Epoch [40/1000], Loss: 0.6529
Epoch [45/1000], Loss: 0.6509
Epoch [50/1000], Loss: 0.6491
Epoch [55/1000], Loss: 0.6477
Epoch [60/1000], Loss: 0.6464
Epoch [65/1000], Loss: 0.6454
Epoch [70/1000], Loss: 0.6444
Epoch [75/1000], Loss: 0.6436
Epoch [80/1000], Loss: 0.6428
Epoch [85/1000], Loss: 0.6421
Epoch [90/1000], Loss: 0.6415
Epoch [95/1000], Loss: 0.6409
Epoch [100/1000], Loss: 0.6404
Epoch [105/1000], Loss: 0.6399
Epoch [110/1000], Loss: 0.6394
Epoch [115/1000], Loss: 0.6389
Epoch [120/1000], Loss: 0.6384
Epoch [125/1000], Loss: 0.6380
Epoch [130/1000], Loss: 0.6376
Epoch [135/1000], Loss: 0.6371
Epoch [140/1000], Loss: 0.6367
Epoch [145/1000], Loss: 0.6363
Epoch [150/1000], Loss: 0.6359
Epoch [155/1000], Loss: 0.6356
Epoch [160/1000], Loss: 0.6352
Epoch [165/1000], Loss: 0.63

In [10]:
# evaluate model
model.eval()
with torch.no_grad():
    outputs = model(X_test_tensor)
    _, predicted = torch.max(outputs, 1)

    accuracy = accuracy_score(y_test_tensor, predicted)
    class_report = classification_report(y_test_tensor, predicted, output_dict=True)

    print(f"Accuracy: {accuracy:.4f}")
    print("Classification Report:")
    for label, metrics in class_report.items():
        print(f"{label}:")
        print(metrics)

Accuracy: 0.6171
Classification Report:
0:
{'precision': 0.6274066633182656, 'recall': 0.6990952336535771, 'f1-score': 0.6613138042087616, 'support': 10721.0}
1:
{'precision': 0.6018758484511909, 'recall': 0.5228344768439108, 'f1-score': 0.559577763754231, 'support': 9328.0}
accuracy:
0.6170881340715247
macro avg:
{'precision': 0.6146412558847283, 'recall': 0.6109648552487439, 'f1-score': 0.6104457839814963, 'support': 20049.0}
weighted avg:
{'precision': 0.6155281935152793, 'recall': 0.6170881340715247, 'f1-score': 0.6139800825588109, 'support': 20049.0}
