In [133]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import sklearn as sk
import torch
from torch import nn
from torch.utils.data import Dataset, DataLoader
from sklearn.metrics import roc_auc_score


device = torch.device("mps" if torch.backends.mps.is_available() else "cpu")
print(device)

mps


In [151]:
train = pd.read_csv('Blood_samples_dataset_balanced_2(f).csv')

test = pd.read_csv('blood_samples_dataset_test.csv')
train.duplicated().sum(), len(train) - train.duplicated().sum()

(2286, 65)

In [152]:
train_ = train.drop_duplicates()
len(train_)

65

In [153]:
test.duplicated().sum(), len(test) - test.duplicated().sum()

(0, 486)

In [154]:
dataset_ = pd.concat([train_, test], axis=0, ignore_index=True)
len(dataset_)

551

In [155]:
dataset_.duplicated().sum(), len(dataset_) - dataset_.duplicated().sum()

(0, 551)

In [158]:
train.describe()

Unnamed: 0,Glucose,Cholesterol,Hemoglobin,Platelets,White Blood Cells,Red Blood Cells,Hematocrit,Mean Corpuscular Volume,Mean Corpuscular Hemoglobin,Mean Corpuscular Hemoglobin Concentration,...,Triglycerides,HbA1c,LDL Cholesterol,HDL Cholesterol,ALT,AST,Heart Rate,Creatinine,Troponin,C-reactive Protein
count,2351.0,2351.0,2351.0,2351.0,2351.0,2351.0,2351.0,2351.0,2351.0,2351.0,...,2351.0,2351.0,2351.0,2351.0,2351.0,2351.0,2351.0,2351.0,2351.0,2351.0
mean,0.362828,0.393648,0.58619,0.504027,0.511086,0.50659,0.507152,0.4922,0.484459,0.562273,...,0.374373,0.439112,0.421777,0.546079,0.434972,0.452138,0.582255,0.425075,0.454597,0.430308
std,0.251889,0.239449,0.271498,0.303347,0.27727,0.266565,0.285537,0.275735,0.315618,0.273281,...,0.256981,0.263779,0.252124,0.269511,0.267388,0.242075,0.250915,0.229298,0.251189,0.243034
min,0.010994,0.012139,0.003021,0.012594,0.010139,0.044565,0.011772,0.046942,0.000554,0.006947,...,0.005217,0.016256,0.033037,0.039505,0.007186,0.013013,0.11455,0.021239,0.00749,0.004867
25%,0.129198,0.195818,0.346092,0.200865,0.259467,0.263589,0.288132,0.287532,0.207938,0.355774,...,0.184604,0.18875,0.217757,0.307132,0.211078,0.239659,0.339125,0.213026,0.288961,0.196192
50%,0.351722,0.397083,0.609836,0.533962,0.527381,0.467431,0.493428,0.453052,0.420723,0.603635,...,0.317857,0.466375,0.413071,0.512941,0.373235,0.486317,0.61086,0.417295,0.426863,0.481601
75%,0.582278,0.582178,0.791215,0.754841,0.743164,0.74367,0.753657,0.722293,0.77816,0.741381,...,0.57233,0.652514,0.604753,0.779378,0.710319,0.616181,0.800666,0.606719,0.682164,0.631426
max,0.96846,0.905026,0.983306,0.999393,0.990786,1.0,0.97752,0.995263,0.963235,0.975586,...,0.973679,0.950218,0.983826,0.989411,0.942549,0.99446,0.996873,0.925924,0.972803,0.797906


In [162]:
train["ALT"]

0       0.064187
1       0.942549
2       0.007186
3       0.265415
4       0.015280
          ...   
2346    0.155866
2347    0.790341
2348    0.250535
2349    0.362012
2350    0.680462
Name: ALT, Length: 2351, dtype: float64

In [143]:
X_train = train.drop(columns = ["prognosis"])
y_train = pd.get_dummies(train["prognosis"]).astype(int)
X_test = test.drop(columns = ["prognosis"])
y_test = pd.get_dummies(test["prognosis"]).astype(int)

diseases = y_train.columns

In [144]:
class MyDataset(Dataset):
    def __init__(self, X, y):
        self.X = torch.Tensor(X.to_numpy()).to(device)
        self.y = torch.Tensor(y.to_numpy().argmax(axis =1)).to(device)

    def __len__(self):
        return len(self.X)
    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]

train_set = MyDataset(X_train, y_train)
test_set = MyDataset(X_test, y_test)

train_loader = DataLoader(train_set, batch_size=16, shuffle=True)
test_loader = DataLoader(test_set, batch_size=16, shuffle=False)

In [145]:
class Net(nn.Module):
    def __init__(self, n_layers, n_neurons, input_size = 132, output_size = len(y_train.columns)):
        super(Net, self).__init__()

        layers = []
        if n_layers == 1:
            layers.append(nn.Linear(input_size, output_size))
            # layers.append(nn.Softmax(dim=1))
        else:
            layers.append(nn.Linear(input_size, n_neurons))
            layers.append(nn.ReLU())
            for i in range(n_layers-2):
                layers.append(nn.Linear(n_neurons, n_neurons))
                layers.append(nn.ReLU())
            layers.append(nn.Linear(n_neurons, output_size))
            # layers.append(nn.Softmax(dim=1))

        self.layers = nn.Sequential(*layers)


    def forward(self, x):
        return self.layers(x)



In [146]:
def train_model(model, train_loader = train_loader, test_loader = test_loader, epochs = 10, lr = 0.001):

    train_losses = []
    test_losses = []

    model.to(device)

    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.AdamW(model.parameters(), lr=lr)


    for epoch in range(epochs):
        full_train_loss = 0
        full_test_loss = 0

        print("Epoch {}/{}:".format(epoch + 1, epochs), end=" ")

        for X, y in train_loader:
            model.train()
            optimizer.zero_grad()

            y_pred = model(X)
            loss = criterion(y_pred, y)
            loss.backward()
            optimizer.step()
            full_train_loss += loss.item()
        full_train_loss /= len(train_loader)
        train_losses.append(full_train_loss)

        print("Train loss: {:.4f}".format(full_train_loss), end='')

        for X, y in test_loader:
            model.eval()
            y_pred = model(X)
            loss = criterion(y_pred, y)
            full_test_loss += loss.item()
        full_test_loss /= len(test_loader)
        test_losses.append(full_test_loss)

        print("Test loss: {:.4f}".format(full_test_loss))

    return train_losses, test_losses



model = Net(3,10)
train_model(model)



Epoch 1/10: Train loss: 3.2602Test loss: 2.2974
Epoch 2/10: Train loss: 1.4031Test loss: 0.6298
Epoch 3/10: Train loss: 0.3245Test loss: 0.1377
Epoch 4/10: Train loss: 0.0842Test loss: 0.0706
Epoch 5/10: Train loss: 0.0365Test loss: 0.0578
Epoch 6/10: Train loss: 0.0205Test loss: 0.0562
Epoch 7/10: Train loss: 0.0131Test loss: 0.0566
Epoch 8/10: Train loss: 0.0090Test loss: 0.0589
Epoch 9/10: Train loss: 0.0066Test loss: 0.0622
Epoch 10/10: Train loss: 0.0050Test loss: 0.0674


([3.260214192139638,
  1.4030665182448052,
  0.32445440528454716,
  0.08420769844881514,
  0.03646702803187556,
  0.020460271863145876,
  0.013095274965230424,
  0.00904989969469123,
  0.006555885849097235,
  0.00496807359665475],
 [2.297384738922119,
  0.629833588997523,
  0.13768239319324493,
  0.07064247876405716,
  0.05776903157432874,
  0.05619792484988769,
  0.05656601317847768,
  0.05891837536667784,
  0.06217548126975695,
  0.06739005229125421])

41

11cdef


In [5]:
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import numpy as np

# Generowanie przykładowych danych
np.random.seed(42)
torch.manual_seed(42)
X = np.random.rand(1000, 2) * 10  # 1000 próbek, 2 cechy
y = (X[:, 0] + X[:, 1] > 10).astype(np.float32)  # Klasa 1 jeśli suma cech > 10

# Podział na zestawy treningowy i testowy
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standaryzacja danych
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Konwersja do tensora
X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.float32).view(-1, 1)
y_test_tensor = torch.tensor(y_test, dtype=torch.float32).view(-1, 1)

# Definicja modelu
class BinaryClassifier(nn.Module):
    def __init__(self):
        super(BinaryClassifier, self).__init__()
        self.fc = nn.Linear(2, 1)  # Warstwa liniowa
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        return self.sigmoid(self.fc(x))

# Inicjalizacja modelu, funkcji straty i optymalizatora
model = BinaryClassifier()
criterion = nn.BCELoss()  # Binary Cross Entropy Loss
optimizer = optim.Adam(model.parameters(), lr=0.01)

# Trening modelu
epochs = 100
for epoch in range(epochs):
    optimizer.zero_grad()
    outputs = model(X_train_tensor)
    print(outputs.shape, y_train_tensor.shape)
    1+''
    loss = criterion(outputs, y_train_tensor)
    loss.backward()
    optimizer.step()

    if (epoch+1) % 10 == 0:
        print(f'Epoch [{epoch+1}/{epochs}], Loss: {loss.item():.4f}')

# Ewaluacja
with torch.no_grad():
    y_pred = model(X_test_tensor)
    y_pred_classes = (y_pred >= 0.5).float()
    accuracy = (y_pred_classes.eq(y_test_tensor).sum() / y_test_tensor.shape[0]).item()
    print(f'Accuracy: {accuracy:.4f}')


torch.Size([800, 1]) torch.Size([800, 1])


TypeError: unsupported operand type(s) for +: 'int' and 'str'