https://www.kaggle.com/datasets/fedesoriano/stroke-prediction-dataset?resource=download

Attribute Information:

1) id: unique identifier

2) gender: "Male", "Female" or "Other"

3) age: age of the patient

4) hypertension: 0 if the patient doesn't have hypertension, 1 if the patient has hypertension

5) heart_disease: 0 if the patient doesn't have any heart diseases, 1 if the patient has a heart disease

6) ever_married: "No" or "Yes"

7) work_type: "children", "Govt_jov", "Never_worked", "Private" or "Self-employed"

8) Residence_type: "Rural" or "Urban"

9) avg_glucose_level: average glucose level in blood

10) bmi: body mass index

11) smoking_status: "formerly smoked", "never smoked", "smokes" or "Unknown"*

12) stroke: 1 if the patient had a stroke or 0 if not

*Note: "Unknown" in smoking_status means that the information is unavailable for this patient

In [1]:
import numpy as np
import pandas as pd
import missingno as msno
import matplotlib.pyplot as plt

In [2]:
data = pd.read_csv('healthcare-dataset-stroke-data.csv')
# data.info()
data

Unnamed: 0,id,gender,age,hypertension,heart_disease,ever_married,work_type,Residence_type,avg_glucose_level,bmi,smoking_status,stroke
0,9046,Male,67.0,0,1,Yes,Private,Urban,228.69,36.6,formerly smoked,1
1,51676,Female,61.0,0,0,Yes,Self-employed,Rural,202.21,,never smoked,1
2,31112,Male,80.0,0,1,Yes,Private,Rural,105.92,32.5,never smoked,1
3,60182,Female,49.0,0,0,Yes,Private,Urban,171.23,34.4,smokes,1
4,1665,Female,79.0,1,0,Yes,Self-employed,Rural,174.12,24.0,never smoked,1
...,...,...,...,...,...,...,...,...,...,...,...,...
5105,18234,Female,80.0,1,0,Yes,Private,Urban,83.75,,never smoked,0
5106,44873,Female,81.0,0,0,Yes,Self-employed,Urban,125.20,40.0,never smoked,0
5107,19723,Female,35.0,0,0,Yes,Self-employed,Rural,82.99,30.6,never smoked,0
5108,37544,Male,51.0,0,0,Yes,Private,Rural,166.29,25.6,formerly smoked,0


In [3]:
#перетворюємо данні в числові для подальшої роботи
#2. data['gender'] стать перетворюємо жінки, інші = 0 ; чоловіки =1 
#(інші == жінки, адже жінок більше у вибірці, більша ймовірність що та особа саме тієї статі)
data['Int_Gender'] = np.where(data['gender'] == 'Male', 1, 0)

#6 data['ever_married'] Чи був колись одружений? перетворюєм в числові Так == 1; Ні == 0
data['Int_Ever_Married'] = np.where(data['ever_married'] == 'Yes', 1, 0) 

#8 data['Residence_type'] місце проживання місто == 0, село == 1
data['Int_Residence_type'] = np.where(data['Residence_type'] == 'Rural', 1, 0)

In [4]:
def func3_2(elem):
    if elem == 'Never_worked' or elem == 'children':
        return 'Never_worked'
    
    if elem == 'Private' or elem == 'Govt_job':
        return 'Company_worker'
    
    if elem == 'Self-employed':
        return 'Self-employed'


In [5]:
#7 data['work_type'] після аналізу впливу роботи на ризик інсульту, робюим 3 класи, в залежності від ризику інсульту
data['Alt_Work_type'] = data['work_type'].apply(func3_2)

In [6]:
#11 data['smoking_status'] Згідно данних ВООЗ в світі близько 13 % курців це відповідає тій кількості курців що є відомою,
# тому осіб що не вказали про себе можна важати - не курцями
# formerly smoked + smokes Не курець == 0; Курець == 1

def func5(elem):
    if elem == 'never smoked' or elem == 'Unknown':
        return 0
    else:
        return 1

In [7]:
data['Int_Smoking_status'] = data['smoking_status'].apply(func5)
data['Int_Smoking_status'].value_counts(dropna=False)

0    3436
1    1674
Name: Int_Smoking_status, dtype: int64

In [8]:
# data

In [9]:
# data.columns

In [10]:
#сгрупував данні/перетворив, та інше - вручну

data_mod = data[['Int_Gender', 'age', 'hypertension', 'heart_disease', 'Int_Ever_Married', 'Alt_Work_type', 'Int_Residence_type', 'avg_glucose_level', 'bmi', 'Int_Smoking_status',
             'stroke']].copy()
# data_mod.info()
data_mod

Unnamed: 0,Int_Gender,age,hypertension,heart_disease,Int_Ever_Married,Alt_Work_type,Int_Residence_type,avg_glucose_level,bmi,Int_Smoking_status,stroke
0,1,67.0,0,1,1,Company_worker,0,228.69,36.6,1,1
1,0,61.0,0,0,1,Self-employed,1,202.21,,0,1
2,1,80.0,0,1,1,Company_worker,1,105.92,32.5,0,1
3,0,49.0,0,0,1,Company_worker,0,171.23,34.4,1,1
4,0,79.0,1,0,1,Self-employed,1,174.12,24.0,0,1
...,...,...,...,...,...,...,...,...,...,...,...
5105,0,80.0,1,0,1,Company_worker,0,83.75,,0,0
5106,0,81.0,0,0,1,Self-employed,0,125.20,40.0,0,0
5107,0,35.0,0,0,1,Self-employed,1,82.99,30.6,0,0
5108,1,51.0,0,0,1,Company_worker,1,166.29,25.6,1,0


In [11]:
data_mod2 = data[['stroke','gender', 'age', 'hypertension', 'heart_disease', 'Int_Ever_Married',
       'work_type', 'Residence_type', 'avg_glucose_level', 'bmi',
       'smoking_status']].copy()
# data_mod2

In [12]:
# data.columns

In [13]:
# замінимо всі пропуски bmi на медіанні значення

data['bmi'].fillna(data['bmi'].median(), inplace = True)
data['bmi'].isna().sum()

0

In [14]:
# data.describe()

# Побудова Моделей

Беремо не змінені ніяк данні + лише заповнені пробіли

In [15]:
# data.columns

In [16]:
Data_1 = data[['gender', 'age', 'hypertension', 'heart_disease', 'ever_married',
       'work_type', 'Residence_type', 'avg_glucose_level', 'bmi',
       'smoking_status', 'stroke']].copy()
# Data_1

В нас присутній дисбаланс данних що негативно впливає на визначення пацієнтів з інсультом, штучно збільшимо кількість пацієнтів з інсультом

In [17]:
Data_1 = Data_1.sample(frac=1)

fraud_Data_1 = Data_1.loc[Data_1['stroke'] == 1]
non_fraud_Data_1 = Data_1.loc[Data_1['stroke'] == 0][:len(fraud_Data_1)]

normal_distributed_Data_1 = pd.concat([fraud_Data_1, non_fraud_Data_1])
new_Data_1 = normal_distributed_Data_1.sample(frac=1, random_state=42)

new_Data_1.shape

(498, 11)

Розділення данних на тренувальні та тестувальні

In [18]:
from sklearn.model_selection import train_test_split

train_data, test_data = train_test_split(new_Data_1, test_size=0.20, random_state=20)
print('The size of training data is: {} \nThe size of testing data is: {}'.format(len(train_data), len(test_data)))

The size of training data is: 398 
The size of testing data is: 100


In [19]:
# train_data.columns

Розділяєм данні, виділяєм таргет

In [20]:
X_train = train_data.drop(['stroke'], axis=1)
y_train = train_data['stroke']

X_test = test_data.drop(['stroke'], axis=1)
y_test = test_data['stroke']


Перетворення данних

In [21]:
# Перетворим категорійні данні за допомогою sklearn  LabelEncoder в циферкі
from sklearn import preprocessing

X_train_prepared=X_train.copy()
X_test_prepared=X_test.copy()

col = ['gender', 'ever_married', 'smoking_status', 'work_type','Residence_type']
LE = preprocessing.LabelEncoder()
for i in col:
    X_train_prepared[i]=X_train_prepared[[i]].apply(LE.fit_transform)
    X_test_prepared[i]=X_test_prepared[[i]].apply(LE.transform)
    
    print(LE.inverse_transform(list(set(X_train_prepared[i]))),'==', list(set(X_train_prepared[i]))) 
    print(X_train_prepared[i].value_counts(dropna=False))               
              


['Female' 'Male'] == [0, 1]
0    226
1    172
Name: gender, dtype: int64
['No' 'Yes'] == [0, 1]
1    296
0    102
Name: ever_married, dtype: int64
['Unknown' 'formerly smoked' 'never smoked' 'smokes'] == [0, 1, 2, 3]
2    135
0    116
1     85
3     62
Name: smoking_status, dtype: int64
['Govt_job' 'Never_worked' 'Private' 'Self-employed' 'children'] == [0, 1, 2, 3, 4]
2    219
3     86
0     58
4     34
1      1
Name: work_type, dtype: int64
['Rural' 'Urban'] == [0, 1]
1    216
0    182
Name: Residence_type, dtype: int64


Стандартизація набору даних

In [22]:
# маєм викиди тому використаємо RobustScaler
from sklearn.preprocessing import RobustScaler
rob_scaler = RobustScaler()

X_train_rob_scaler=rob_scaler.fit_transform(X_train_prepared)
X_test_rob_scaler=rob_scaler.transform(X_test_prepared)



# Піднімаєм ставки, використаєм нейронну мережу!

In [23]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader

In [24]:
X_train = X_train_rob_scaler
X_test = X_test_rob_scaler

In [25]:
class BinaryClassification(nn.Module):
    def __init__(self):
        super(BinaryClassification, self).__init__()
        self.layer_1 = nn.Linear(10, 64)
        self.layer_out = nn.Linear(64, 1)

        self.relu = nn.ReLU()
        self.sigmoid = nn.Sigmoid()

    def forward(self, inputs):
        x = self.relu(self.layer_1(inputs))
        x = self.layer_out(x)

        x = self.sigmoid(x)
        return x

In [26]:
device = torch.device("cpu")
print(device)

cpu


In [27]:
model = BinaryClassification()
model.to(device)

print(model)

BinaryClassification(
  (layer_1): Linear(in_features=10, out_features=64, bias=True)
  (layer_out): Linear(in_features=64, out_features=1, bias=True)
  (relu): ReLU()
  (sigmoid): Sigmoid()
)


In [28]:
criterion = nn.BCELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [29]:
def binary_acc(y_pred, y_test):
    y_pred_tag = torch.round(y_pred)

    correct_results_sum = (y_pred_tag == y_test).sum().float()
    acc = correct_results_sum/y_test.shape[0]
    acc = torch.round(acc * 100)

    return acc

In [30]:
# тренуєм 
X_train = torch.FloatTensor(X_train)
y_train = torch.FloatTensor(y_train)

X_train, y_train = X_train.to(device), y_train.to(device)

# ValueError: could not determine the shape of object type 'Series'

In [31]:
model.train()

for e in range(1, 51):
    epoch_loss = 0
    epoch_acc = 0

    optimizer.zero_grad()

    y_pred = model(X_train)
    loss = criterion(y_pred, y_train.unsqueeze(1))
    acc = binary_acc(y_pred, y_train.unsqueeze(1))

    loss.backward()
    optimizer.step()

    epoch_loss = loss.item()
    epoch_acc = acc.item()

    print(f'Epoch {e+0:03}: | Loss: {epoch_loss:.5f} | Acc: {epoch_acc:.3f}')

Epoch 001: | Loss: 0.69655 | Acc: 51.000
Epoch 002: | Loss: 0.69407 | Acc: 52.000
Epoch 003: | Loss: 0.69162 | Acc: 53.000
Epoch 004: | Loss: 0.68921 | Acc: 54.000
Epoch 005: | Loss: 0.68682 | Acc: 54.000
Epoch 006: | Loss: 0.68446 | Acc: 55.000
Epoch 007: | Loss: 0.68213 | Acc: 56.000
Epoch 008: | Loss: 0.67984 | Acc: 57.000
Epoch 009: | Loss: 0.67757 | Acc: 59.000
Epoch 010: | Loss: 0.67534 | Acc: 60.000
Epoch 011: | Loss: 0.67312 | Acc: 60.000
Epoch 012: | Loss: 0.67092 | Acc: 62.000
Epoch 013: | Loss: 0.66875 | Acc: 62.000
Epoch 014: | Loss: 0.66660 | Acc: 63.000
Epoch 015: | Loss: 0.66447 | Acc: 63.000
Epoch 016: | Loss: 0.66236 | Acc: 63.000
Epoch 017: | Loss: 0.66026 | Acc: 63.000
Epoch 018: | Loss: 0.65819 | Acc: 65.000
Epoch 019: | Loss: 0.65612 | Acc: 65.000
Epoch 020: | Loss: 0.65408 | Acc: 66.000
Epoch 021: | Loss: 0.65205 | Acc: 67.000
Epoch 022: | Loss: 0.65002 | Acc: 68.000
Epoch 023: | Loss: 0.64802 | Acc: 68.000
Epoch 024: | Loss: 0.64602 | Acc: 67.000
Epoch 025: | Los

Результати

In [32]:
X_test = torch.FloatTensor(X_test)
y_test = torch.FloatTensor(y_test.to_numpy())

X_test = X_test.to(device)


y_pred_list = []
model.eval()

with torch.no_grad():
    y_test_pred = model(X_test)
    y_pred_tag = torch.round(y_test_pred)
    y_pred_list.append(y_pred_tag.cpu().numpy())

y_pred_list = [a.squeeze().tolist() for a in y_pred_list][0]
y_pred_list

[1.0,
 0.0,
 1.0,
 0.0,
 0.0,
 1.0,
 0.0,
 0.0,
 0.0,
 1.0,
 1.0,
 0.0,
 0.0,
 0.0,
 0.0,
 1.0,
 1.0,
 1.0,
 1.0,
 0.0,
 0.0,
 0.0,
 0.0,
 1.0,
 1.0,
 0.0,
 1.0,
 0.0,
 0.0,
 0.0,
 1.0,
 1.0,
 0.0,
 1.0,
 1.0,
 0.0,
 1.0,
 1.0,
 0.0,
 0.0,
 1.0,
 0.0,
 1.0,
 0.0,
 1.0,
 0.0,
 1.0,
 1.0,
 0.0,
 0.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 0.0,
 1.0,
 0.0,
 1.0,
 1.0,
 1.0,
 1.0,
 0.0,
 0.0,
 0.0,
 0.0,
 1.0,
 1.0,
 0.0,
 0.0,
 1.0,
 0.0,
 1.0,
 1.0,
 0.0,
 1.0,
 0.0,
 1.0,
 0.0,
 0.0,
 1.0,
 1.0,
 0.0,
 1.0,
 1.0,
 0.0,
 1.0,
 0.0,
 0.0,
 1.0,
 0.0,
 0.0,
 1.0,
 1.0,
 1.0,
 0.0,
 1.0,
 0.0,
 0.0,
 0.0]

In [33]:
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

cm = confusion_matrix(y_test, y_pred_list)
acc = accuracy_score(y_test, y_pred_list)
report = classification_report(y_test, y_pred_list)

print('Accuracy = ', acc * 100, '%')
print(cm)
print(report)

Accuracy =  71.0 %
[[35 14]
 [15 36]]
              precision    recall  f1-score   support

         0.0       0.70      0.71      0.71        49
         1.0       0.72      0.71      0.71        51

    accuracy                           0.71       100
   macro avg       0.71      0.71      0.71       100
weighted avg       0.71      0.71      0.71       100



# Тренування за допомогою batch size

In [34]:
class TrainData(Dataset):

    def __init__(self, X_data, y_data):
        self.X_data = X_data
        self.y_data = y_data

    def __getitem__(self, index):
        return self.X_data[index], self.y_data[index]

    def __len__(self):
        return len(self.X_data)


train_data = TrainData(torch.FloatTensor(X_train),
                       torch.FloatTensor(y_train))


class TestData(Dataset):

    def __init__(self, X_data):
        self.X_data = X_data

    def __getitem__(self, index):
        return self.X_data[index]

    def __len__(self):
        return len(self.X_data)


test_data = TestData(torch.FloatTensor(X_test))

train_loader = DataLoader(dataset=train_data, batch_size=64, shuffle=True)
test_loader = DataLoader(dataset=test_data, batch_size=1)

In [35]:
model = BinaryClassification()
model.to(device)

criterion = nn.BCEWithLogitsLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [36]:
model.train()

for e in range(1, 51):
    epoch_loss = 0
    epoch_acc = 0
    for X_batch, y_batch in train_loader:
        X_batch, y_batch = X_batch.to(device), y_batch.to(device)
        optimizer.zero_grad()

        y_pred = model(X_batch)

        loss = criterion(y_pred, y_batch.unsqueeze(1))
        acc = binary_acc(y_pred, y_batch.unsqueeze(1))

        loss.backward()
        optimizer.step()

        epoch_loss += loss.item()
        epoch_acc += acc.item()

    print(f'Epoch {e+0:03}: | Loss: {epoch_loss/len(train_loader):.5f} | Acc: {epoch_acc/len(train_loader):.3f}')

Epoch 001: | Loss: 0.73153 | Acc: 33.000
Epoch 002: | Loss: 0.74098 | Acc: 40.571
Epoch 003: | Loss: 0.72112 | Acc: 44.286
Epoch 004: | Loss: 0.73270 | Acc: 53.286
Epoch 005: | Loss: 0.72208 | Acc: 52.857
Epoch 006: | Loss: 0.70705 | Acc: 51.429
Epoch 007: | Loss: 0.70912 | Acc: 52.000
Epoch 008: | Loss: 0.69856 | Acc: 51.571
Epoch 009: | Loss: 0.70334 | Acc: 53.000
Epoch 010: | Loss: 0.69849 | Acc: 52.000
Epoch 011: | Loss: 0.69629 | Acc: 54.000
Epoch 012: | Loss: 0.69008 | Acc: 53.571
Epoch 013: | Loss: 0.67824 | Acc: 52.429
Epoch 014: | Loss: 0.67852 | Acc: 56.714
Epoch 015: | Loss: 0.66850 | Acc: 56.429
Epoch 016: | Loss: 0.67730 | Acc: 60.857
Epoch 017: | Loss: 0.67773 | Acc: 64.286
Epoch 018: | Loss: 0.65909 | Acc: 59.286
Epoch 019: | Loss: 0.66207 | Acc: 64.000
Epoch 020: | Loss: 0.66277 | Acc: 66.429
Epoch 021: | Loss: 0.66615 | Acc: 67.857
Epoch 022: | Loss: 0.65568 | Acc: 72.429
Epoch 023: | Loss: 0.65776 | Acc: 71.571
Epoch 024: | Loss: 0.65824 | Acc: 71.000
Epoch 025: | Los

In [37]:
y_pred_list = []
model.eval()
with torch.no_grad():
    for X_batch in test_loader:
        X_batch = X_batch.to(device)
        y_test_pred = model(X_batch)
        y_pred_tag = torch.round(y_test_pred)
        y_pred_list.append(y_pred_tag.cpu().numpy())

y_pred_list = [a.squeeze().tolist() for a in y_pred_list]
# confusion_matrix(y_test, y_pred_list)

In [38]:
cm = confusion_matrix(y_test, y_pred_list)
acc = accuracy_score(y_test, y_pred_list)
report = classification_report(y_test, y_pred_list)

print('Accuracy = ', acc * 100, '%')
print(cm)
print(report)

Accuracy =  72.0 %
[[39 10]
 [18 33]]
              precision    recall  f1-score   support

         0.0       0.68      0.80      0.74        49
         1.0       0.77      0.65      0.70        51

    accuracy                           0.72       100
   macro avg       0.73      0.72      0.72       100
weighted avg       0.73      0.72      0.72       100



Задля того щоб розпізнавати всіх пацієнтів з інсультом, треба дуже сильно змістити алгоритм 0.5 -> 0.25

# Спробуємо отримати кращі результати, застосуєм данні погруповані

In [69]:
data_mod

Unnamed: 0,Int_Gender,age,hypertension,heart_disease,Int_Ever_Married,Alt_Work_type,Int_Residence_type,avg_glucose_level,bmi,Int_Smoking_status,stroke
0,1,67.0,0,1,1,Company_worker,0,228.69,36.6,1,1
1,0,61.0,0,0,1,Self-employed,1,202.21,,0,1
2,1,80.0,0,1,1,Company_worker,1,105.92,32.5,0,1
3,0,49.0,0,0,1,Company_worker,0,171.23,34.4,1,1
4,0,79.0,1,0,1,Self-employed,1,174.12,24.0,0,1
...,...,...,...,...,...,...,...,...,...,...,...
5105,0,80.0,1,0,1,Company_worker,0,83.75,,0,0
5106,0,81.0,0,0,1,Self-employed,0,125.20,40.0,0,0
5107,0,35.0,0,0,1,Self-employed,1,82.99,30.6,0,0
5108,1,51.0,0,0,1,Company_worker,1,166.29,25.6,1,0


In [70]:
data_mod['bmi'].fillna(data_mod['bmi'].median(), inplace = True)
data_mod['bmi'].isna().sum()

0

In [71]:
# from sklearn.utils import resample

# train_data, test_data = train_test_split(data_mod, test_size=0.20, random_state=20,
#                                          stratify=data_mod['stroke'])

# X_test = test_data.drop(['stroke'], axis=1)
# y_test = test_data['stroke']

# fraud_df = train_data.loc[train_data['stroke'] == 1]
# non_fraud_df = train_data.loc[train_data['stroke'] == 0]

# fraud_upsampled = resample(fraud_df,
#                            replace=True, # sample with replacement
#                            n_samples=len(non_fraud_df), # match number in majority class
#                            random_state=27)

# new_train_data = pd.concat([non_fraud_df, fraud_upsampled])

# X_train = new_train_data.drop(['stroke'], axis=1)
# y_train = new_train_data['stroke']

# print('Shape of train data = ', X_train.shape)
# print('Shape of test data = ', X_test.shape)

# train_unique_label, train_counts_label = np.unique(y_train, return_counts=True)
# test_unique_label, test_counts_label = np.unique(y_test, return_counts=True)

# print('Label Distributions: \n')
# print(train_counts_label/ len(y_train))
# print(test_counts_label/ len(y_test))

In [72]:
data_mod = data_mod.sample(frac=1)

fraud_data_mod = data_mod.loc[data_mod['stroke'] == 1]
non_fraud_data_mod = data_mod.loc[data_mod['stroke'] == 0][:len(fraud_data_mod)]

normal_distributed_data_mod = pd.concat([fraud_data_mod, non_fraud_data_mod])
new_data_mod = normal_distributed_data_mod.sample(frac=1, random_state=42)

new_data_mod.shape

(498, 11)

In [73]:
from sklearn.model_selection import train_test_split

train_data, test_data = train_test_split(new_data_mod, test_size=0.20, random_state=20)
print('The size of training data is: {} \nThe size of testing data is: {}'.format(len(train_data), len(test_data)))

The size of training data is: 398 
The size of testing data is: 100


In [74]:
X_train = train_data.drop(['stroke'], axis=1)
y_train = train_data['stroke']

X_test = test_data.drop(['stroke'], axis=1)
y_test = test_data['stroke']

In [75]:
#Перетворення категорійного стовпчика в цифровий за допомогою LabelEncoder()
from sklearn import preprocessing

X_train_prepared=X_train.copy()
X_test_prepared=X_test.copy()

col = ['Alt_Work_type']
LE = preprocessing.LabelEncoder()
for i in col:
    X_train_prepared[i]=X_train_prepared[[i]].apply(LE.fit_transform)
    X_test_prepared[i]=X_test_prepared[[i]].apply(LE.transform)
    
    print(LE.inverse_transform(list(set(X_train_prepared[i]))),'==', list(set(X_train_prepared[i]))) 
    print(X_train_prepared[i].value_counts(dropna=False)) 

['Company_worker' 'Never_worked' 'Self-employed'] == [0, 1, 2]
0    284
2     79
1     35
Name: Alt_Work_type, dtype: int64


In [76]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()

X_train_scaler=scaler.fit_transform(X_train_prepared)
X_test_scaler=scaler.transform(X_test_prepared)

In [77]:
X_train_scaler=X_train
X_test_scaler=X_test

In [78]:
class BinaryClassification(nn.Module):
    def __init__(self):
        super(BinaryClassification, self).__init__()
        self.layer_1 = nn.Linear(11, 64)
        self.layer_out = nn.Linear(64, 1)

        self.relu = nn.ReLU()
        self.sigmoid = nn.Sigmoid()

    def forward(self, inputs):
        x = self.relu(self.layer_1(inputs))
        x = self.layer_out(x)

        x = self.sigmoid(x)
        return x

In [79]:
device = torch.device("cpu")
print(device)

cpu


In [80]:
model = BinaryClassification()
model.to(device)

print(model)

BinaryClassification(
  (layer_1): Linear(in_features=11, out_features=64, bias=True)
  (layer_out): Linear(in_features=64, out_features=1, bias=True)
  (relu): ReLU()
  (sigmoid): Sigmoid()
)


In [81]:
criterion = nn.BCELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [82]:
def binary_acc(y_pred, y_test):
    y_pred_tag = torch.round(y_pred)

    correct_results_sum = (y_pred_tag == y_test).sum().float()
    acc = correct_results_sum/y_test.shape[0]
    acc = torch.round(acc * 100)

    return acc

In [83]:
X_train = torch.FloatTensor(X_train)
y_train = torch.FloatTensor(y_train)

X_train, y_train = X_train.to(device), y_train.to(device)

ValueError: could not determine the shape of object type 'DataFrame'