In [82]:
import pandas as pd
import numpy as np

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader

from sklearn.model_selection import train_test_split

import matplotlib as mpl
import matplotlib.pyplot as plt

from pathlib import Path

import warnings

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

warnings.filterwarnings('ignore', category=UserWarning, module='openpyxl')
pd.set_option('display.max_columns', None)

ckd_df = pd.read_excel(
    "Data/ckd_dataset.xlsx",
    usecols = "B:W,AL:AQ,AS:AY"
)

#filtering
def ckd_stage_clf(eGFR):
    if eGFR >= 90:
        return 0 #stage 1
    elif eGFR >= 60:
        return 1 #stage 2
    elif eGFR >= 45:
        return 2 #3a
    elif eGFR >= 30:
        return 3 #3b
    elif eGFR >= 15:
        return 4 #stage 4
    else:
        return 5


ckd_df.dropna(inplace=True)
eGFR_final_mean = ckd_df["eGFR(last visit)"].mean()
eGFR_final_std = ckd_df["eGFR(last visit)"].std()
filtered_ckd_df = ckd_df[(ckd_df["eGFR(last visit)"] < (eGFR_final_mean + 4*eGFR_final_std)) & (ckd_df["eGFR(last visit)"] < (.95 * ckd_df["eGFR"]))]
filtered_ckd_df.loc[:, "CKD_stage"] = filtered_ckd_df.loc[:, "eGFR"].apply(ckd_stage_clf)

filtered_ckd_df

Unnamed: 0,gender,age,SBP,BMI,etiology of CKD,Hb,Alb,Cr,eGFR,CKD_stage,CKD category,dip-stick proteinuria,proteinuria,urinary occult blood,UPCR,UPCR category,hypertension,prevalence of CVD,diabetes,use of RAASi,use of CCB,use of diuretics,eGFR(last visit),observational duration,50%eGFR_reached,50%eGFR_duration,RRT,RRT_duration,CKD progression,CKD progression_duration,death,death_duration,development of CVD,development of CVD_duration,≥6M followed
0,2,74,120.0,23.137669,2,12.0,4.0,1.20,34.146986,3,6.0,2.0,1.0,0.0,1.253731,3.0,1,0,0,1,1,0,18.495328,37,0,37,0,37,0,37,0,37,0,37,1
2,1,74,143.0,24.554564,2,10.9,3.8,4.95,9.804696,5,12.0,3.0,1.0,0.0,1.759615,3.0,1,0,1,1,1,1,5.578057,13,0,13,1,13,1,13,0,13,0,13,1
3,1,57,139.0,28.515625,2,15.9,4.8,0.84,73.570568,1,4.0,1.0,1.0,0.0,0.513514,3.0,1,0,0,0,0,0,67.225032,37,0,37,0,37,0,37,0,37,0,37,1
4,1,32,154.0,24.582701,4,14.4,4.4,0.87,83.558305,1,1.0,-1.0,0.0,0.0,0.025000,1.0,1,0,0,0,0,0,69.171408,36,0,36,0,36,0,36,0,36,0,36,1
7,1,66,148.0,22.532285,1,10.2,3.3,5.39,9.230697,5,12.0,3.0,1.0,0.0,3.535088,3.0,1,0,1,0,1,1,8.403298,6,0,6,1,8,1,8,0,8,0,6,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1123,1,75,120.0,26.015635,1,11.2,3.9,2.56,20.092993,4,8.0,-1.0,0.0,0.0,0.210210,2.0,1,0,1,1,1,0,15.511510,37,0,37,0,37,0,37,0,37,0,37,1
1128,2,72,169.0,22.494988,1,6.6,2.7,3.12,12.100035,5,12.0,3.0,1.0,0.0,17.298091,3.0,1,0,1,0,1,0,9.285258,7,0,7,1,10,1,10,0,10,0,7,1
1131,1,71,154.0,24.238775,2,15.4,4.2,1.08,52.471683,2,6.0,2.0,1.0,0.0,0.760234,3.0,1,0,0,1,0,0,47.952934,38,0,38,0,38,0,38,0,38,0,38,1
1132,1,81,154.0,19.596458,2,9.0,3.5,2.00,25.747776,4,9.0,2.0,1.0,0.0,1.038526,3.0,1,1,0,1,1,1,17.923736,37,0,37,0,37,0,37,0,37,0,37,1


In [83]:
#testing based off of video - timestamp: 8:28:00


In [125]:
#loading in the dataset (?)

# class ckdDataset(Dataset):
#     def __init__(self):
#         self.data = 

#     def __len__(self):
#         return len(self.data)

#     def __getitem__(self, idx):
#         return self.data[idx]


features = filtered_ckd_df.drop(columns=["gender",
                                         "age",
                                         "Cr",
                                         "eGFR",
                                         "eGFR(last visit)"]).values
target = filtered_ckd_df.loc[:, "CKD_stage"].values

features_tensor = torch.tensor(features, dtype=torch.float32, device=device, requires_grad=True)
target_tensor = torch.tensor(target, dtype=torch.float32, device=device, requires_grad=True)

x_train, x_test, y_train, y_test = train_test_split(features_tensor, target_tensor, test_size=0.2)
y_train = y_train.type(torch.long)
y_test = y_test.type(torch.long)

#create dataset later

In [128]:
class NeuralNetwork(nn.Module):
  def __init__(self):
    super().__init__()
    #change activation functions when you know more about it
    self.linear_one = nn.Linear(in_features=30, out_features=6) #out features can change
    #final out features needs to match amount of classes needed for CKD stagge


  def forward(self, x):
    return self.linear_one(x) #will change based on layers

#might need random seed to pick certain weight/bias
ckd_model = NeuralNetwork()
ckd_model.to(device)

loss_func = nn.CrossEntropyLoss() #change when you know more about it - maybe crossEntropyLoss

optimizer = optim.AdamW(ckd_model.parameters(),
                      lr=1e-2,
                      weight_decay = 1e-2) #change when you know more about it - gradient descent algorithm basically
                      #weight_decay is regularization in essence

In [129]:
#training
torch.manual_seed(42)
epoch_count = 100 #number of times running the model to get better weights

#can save training data (epochs, train/test loss values, etc) into lists for later use
#will need to loop through DataLoader here

for epoch in range(epoch_count):
    y_train_pred = ckd_model(x_train)
    loss = loss_func(y_train_pred, y_train) #y_test will become iterated value from DataLoader
    optimizer.zero_grad()
    loss.backward(retain_graph=True)
    optimizer.step()
    if (epoch + 1) % 10 == 0:
        print(f"Epoch: {epoch}, Loss: {loss}")

Epoch: 9, Loss: 4.20238733291626
Epoch: 19, Loss: 2.858206033706665
Epoch: 29, Loss: 1.9225938320159912
Epoch: 39, Loss: 1.3686838150024414
Epoch: 49, Loss: 1.103173851966858
Epoch: 59, Loss: 1.0050783157348633
Epoch: 69, Loss: 0.9399095177650452
Epoch: 79, Loss: 0.887002170085907
Epoch: 89, Loss: 0.8432281613349915
Epoch: 99, Loss: 0.8076149225234985


In [133]:
#testing - could be done in the training loop to redo test predictions and calculate loss each epoch
ckd_model.eval()
# use torch.inference_mode() when doing predictions - turns off gradient tracking for gradient descent/backpropogation

with torch.inference_mode():
    y_test_pred = ckd_model(x_test)
    test_loss = loss_func(y_test_pred, y_test)
    print(f"Test loss: {test_loss}")
    


Test loss: 0.902309775352478


In [None]:
#saving the model
from pathlib import Path

# MODEL_FOLDER = Path("models")
# MODEL_FOLDER.mkdir(parents=True) - maybe make a conditional to only make directory when one isn't present
# also prob don't need to make a directory for this

# MODEL_NAME = "ckd_nn_1st_attempt.pth"
# MODEL_SAVE_PATH = MODEL_FOLDER / MODEL_NAME

# torch.save(ckd_model.save_state_dict, MODEL_SAVE_PATH)

# loading the model

# loaded_ckd_model = NeuralNetwork()
# loaded_ckd_model.load_state_dict(torch.load(MODEL_SAVE_PATH))
# loaded_ckd_model.to(device)