In [1]:
!pip install torch

Collecting torch
  Downloading torch-2.6.0-cp312-cp312-win_amd64.whl.metadata (28 kB)
Collecting sympy==1.13.1 (from torch)
  Downloading sympy-1.13.1-py3-none-any.whl.metadata (12 kB)
Downloading torch-2.6.0-cp312-cp312-win_amd64.whl (204.1 MB)
   ---------------------------------------- 0.0/204.1 MB ? eta -:--:--
   ---------------------------------------- 0.1/204.1 MB 1.7 MB/s eta 0:02:04
   ---------------------------------------- 0.1/204.1 MB 1.3 MB/s eta 0:02:37
   ---------------------------------------- 0.2/204.1 MB 1.7 MB/s eta 0:01:59
   ---------------------------------------- 0.3/204.1 MB 1.6 MB/s eta 0:02:07
   ---------------------------------------- 0.3/204.1 MB 1.4 MB/s eta 0:02:21
   ---------------------------------------- 0.4/204.1 MB 1.5 MB/s eta 0:02:20
   ---------------------------------------- 0.5/204.1 MB 1.5 MB/s eta 0:02:15
   ---------------------------------------- 0.6/204.1 MB 1.5 MB/s eta 0:02:13
   ---------------------------------------- 0.6/204.1 MB 1.

In [3]:
# Import libraries
import torch
import torch.nn as nn
import torch.optim as optim
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder, MinMaxScaler
from sklearn.model_selection import train_test_split
from torch.utils.data import DataLoader, TensorDataset

In [7]:
df=pd.read_csv("C://ML//Dataset//healthcare-dataset-stroke-data.csv")
df

Unnamed: 0,id,gender,age,hypertension,heart_disease,ever_married,work_type,Residence_type,avg_glucose_level,bmi,smoking_status,stroke
0,9046,Male,67.0,0,1,Yes,Private,Urban,228.69,36.6,formerly smoked,1
1,51676,Female,61.0,0,0,Yes,Self-employed,Rural,202.21,,never smoked,1
2,31112,Male,80.0,0,1,Yes,Private,Rural,105.92,32.5,never smoked,1
3,60182,Female,49.0,0,0,Yes,Private,Urban,171.23,34.4,smokes,1
4,1665,Female,79.0,1,0,Yes,Self-employed,Rural,174.12,24.0,never smoked,1
...,...,...,...,...,...,...,...,...,...,...,...,...
5105,18234,Female,80.0,1,0,Yes,Private,Urban,83.75,,never smoked,0
5106,44873,Female,81.0,0,0,Yes,Self-employed,Urban,125.20,40.0,never smoked,0
5107,19723,Female,35.0,0,0,Yes,Self-employed,Rural,82.99,30.6,never smoked,0
5108,37544,Male,51.0,0,0,Yes,Private,Rural,166.29,25.6,formerly smoked,0


In [11]:
df.isnull().sum()

id                     0
gender                 0
age                    0
hypertension           0
heart_disease          0
ever_married           0
work_type              0
Residence_type         0
avg_glucose_level      0
bmi                  201
smoking_status         0
stroke                 0
dtype: int64

In [13]:
# Fill missing BMI values with median
df['bmi'].fillna(df['bmi'].median(), inplace=True)

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['bmi'].fillna(df['bmi'].median(), inplace=True)


In [15]:
df.isnull().sum()

id                   0
gender               0
age                  0
hypertension         0
heart_disease        0
ever_married         0
work_type            0
Residence_type       0
avg_glucose_level    0
bmi                  0
smoking_status       0
stroke               0
dtype: int64

In [18]:
# Encode categorical variables using LabelEncoder
categorical_cols = ['gender', 'ever_married', 'work_type', 'Residence_type', 'smoking_status']
label_encoders = {}
for col in categorical_cols:
    le = LabelEncoder()
    df[col] = le.fit_transform(df[col])
    label_encoders[col] = le

In [20]:
# Scale numerical features
scaler = MinMaxScaler()
numeric_cols = ['age', 'avg_glucose_level', 'bmi']
df[numeric_cols] = scaler.fit_transform(df[numeric_cols])

In [22]:
# Separate features and target
X = df.drop(columns=['id', 'stroke']).values
y = df['stroke'].values

In [24]:
# Reshape input to match LSTM's expected input (samples, time steps, features)
X = np.array(X).reshape((X.shape[0], 1, X.shape[1]))
y = torch.tensor(y, dtype=torch.float32)

In [26]:
# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [28]:
# Convert to PyTorch tensors and create DataLoader objects
X_train = torch.tensor(X_train, dtype=torch.float32)
X_test = torch.tensor(X_test, dtype=torch.float32)

In [30]:
train_dataset = TensorDataset(X_train, y_train)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)

In [32]:
# Define the LSTM model in PyTorch
class StrokeLSTM(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, output_size):
        super(StrokeLSTM, self).__init__()
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)
    
    def forward(self, x):
        _, (hn, _) = self.lstm(x)
        out = self.fc(hn[-1])  # Use the last hidden state
        return torch.sigmoid(out)

In [34]:
# Hyperparameters
input_size = X_train.shape[2]
hidden_size = 128
num_layers = 2
output_size = 1
learning_rate = 0.001
num_epochs = 20

In [36]:
# Instantiate model, define loss function and optimizer
model = StrokeLSTM(input_size, hidden_size, num_layers, output_size)
criterion = nn.BCELoss()  # Binary cross-entropy loss
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

In [37]:
# Training loop
for epoch in range(num_epochs):
    for batch_X, batch_y in train_loader:
        # Forward pass
        outputs = model(batch_X).squeeze()
        loss = criterion(outputs, batch_y)
        
        # Backward pass and optimization
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')

Epoch [1/20], Loss: 0.1893
Epoch [2/20], Loss: 0.0574
Epoch [3/20], Loss: 0.1656
Epoch [4/20], Loss: 0.1358
Epoch [5/20], Loss: 0.1431
Epoch [6/20], Loss: 0.0541
Epoch [7/20], Loss: 0.0397
Epoch [8/20], Loss: 0.1881
Epoch [9/20], Loss: 0.1071
Epoch [10/20], Loss: 0.0603
Epoch [11/20], Loss: 0.1183
Epoch [12/20], Loss: 0.1840
Epoch [13/20], Loss: 0.1420
Epoch [14/20], Loss: 0.1615
Epoch [15/20], Loss: 0.0244
Epoch [16/20], Loss: 0.0397
Epoch [17/20], Loss: 0.0549
Epoch [18/20], Loss: 0.0306
Epoch [19/20], Loss: 0.2171
Epoch [20/20], Loss: 0.1129


In [38]:
# Evaluate on test data
with torch.no_grad():
    test_outputs = model(X_test).squeeze()
    predicted = (test_outputs > 0.5).float()
    accuracy = (predicted == y_test).sum() / y_test.size(0)
    print(f'Test Accuracy: {accuracy * 100:.2f}%')

Test Accuracy: 93.93%
