In [2]:
import numpy as np 
import pandas as pd
import matplotlib.pyplot as plt
import torch 
import torch.nn as nn 
from tqdm import tqdm_notebook as tqdm
from torch.utils.data import DataLoader,Dataset
from sklearn.model_selection import train_test_split

from sklearn.metrics import confusion_matrix, classification_report
new_df= pd.read_csv('numaric_data.csv')


class custom_csv_dataset(Dataset):
    def __init__(self,csv_file):
        self.data = csv_file
    def __len__(self):
        return len(self.data)
    def __getitem__(self,idx):
        # all collum expect 'HeartDisease' collum
        columns_to_exclude = ['HeartDisease']

        # Get the column names excluding 'HeartDisease'
        train_column_names = [col for col in self.data.columns if col not in columns_to_exclude]
        #get idx based train and target values
        train_data = self.data[train_column_names].iloc[idx].values
        target_data = self.data['HeartDisease'].iloc[idx]
        #convert to tensor
        train_data_tensor = torch.tensor(train_data,dtype=torch.float32)
        target_data_tensor = torch.tensor(target_data,dtype=torch.float32)
        return train_data_tensor,target_data_tensor
    

train_df, test_df = train_test_split(new_df, test_size=0.1, random_state=42)
        
#reset index on both dataframes
train_df.reset_index(drop=True, inplace=True)
test_df.reset_index(drop=True, inplace=True)

test_dataset = custom_csv_dataset(test_df)
test_loader = DataLoader(test_dataset,batch_size=1,shuffle=True)

#evalute model

model = nn.Sequential(
    nn.Linear(17, 64),
    nn.ReLU(),
    nn.Linear(64, 64),
    nn.ReLU(),
    nn.Linear(64, 1),
    nn.Sigmoid()
)

# Move model to the specified device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)


#load model
model.load_state_dict(torch.load('model_epoch12.pt'))

#evalute model
model.eval()
y_pred_list = []
y_true_list = []
with torch.no_grad():
    for X_batch, y_batch in test_loader:
        X_batch = X_batch.to(device)
        y_batch = y_batch.to(device)
        y_test_pred = model(X_batch)
        y_test_pred = torch.round(y_test_pred)
        y_pred_list.append(y_test_pred.cpu().numpy())
        y_true_list.append(y_batch.cpu().numpy())

y_pred_list = [a.squeeze().tolist() for a in y_pred_list]
y_true_list = [a.squeeze().tolist() for a in y_true_list]

print(classification_report(y_true_list, y_pred_list))
print(confusion_matrix(y_true_list, y_pred_list))



              precision    recall  f1-score   support

         0.0       0.92      0.93      0.93     29112
         1.0       0.25      0.22      0.24      2868

    accuracy                           0.87     31980
   macro avg       0.59      0.58      0.58     31980
weighted avg       0.86      0.87      0.87     31980

[[27165  1947]
 [ 2224   644]]


In [12]:
import numpy as np 
import pandas as pd
import matplotlib.pyplot as plt
import torch 
import torch.nn as nn 
from tqdm import tqdm_notebook as tqdm
from torch.utils.data import DataLoader,Dataset
from sklearn.model_selection import train_test_split

from sklearn.metrics import confusion_matrix, classification_report
new_df= pd.read_csv('numaric_data.csv')


class custom_csv_dataset(Dataset):
    def __init__(self,csv_file):
        self.data = csv_file
    def __len__(self):
        return len(self.data)
    def __getitem__(self,idx):
        # all collum expect 'HeartDisease' collum
        columns_to_exclude = ['HeartDisease']

        # Get the column names excluding 'HeartDisease'
        train_column_names = [col for col in self.data.columns if col not in columns_to_exclude]
        #get idx based train and target values
        train_data = self.data[train_column_names].iloc[idx].values
        target_data = self.data['HeartDisease'].iloc[idx]
        #convert to tensor
        train_data_tensor = torch.tensor(train_data,dtype=torch.float32)
        target_data_tensor = torch.tensor(target_data,dtype=torch.float32)
        return train_data_tensor,target_data_tensor
    

train_df, test_df = train_test_split(new_df, test_size=0.1, random_state=42)
        
#reset index on both dataframes
train_df.reset_index(drop=True, inplace=True)
test_df.reset_index(drop=True, inplace=True)

test_dataset = custom_csv_dataset(test_df)
test_loader = DataLoader(test_dataset,batch_size=1,shuffle=True)

#evalute model

model = nn.Sequential(
    nn.Linear(17, 64),
    nn.ReLU(),
    nn.Linear(64, 64),
    nn.ReLU(),
    nn.Linear(64, 1),
    nn.Sigmoid()
)

# Move model to the specified device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)


#load model
model.load_state_dict(torch.load('model_epoch12.pt'))

#evalute model
model.eval()
y_pred_list = []
y_true_list = []
with torch.no_grad():
    for X_batch, y_batch in test_loader:
        X_batch = X_batch.to(device)
        y_batch = y_batch.to(device)
        y_test_pred = model(X_batch)
        y_test_pred = torch.round(y_test_pred)
        y_pred_list.append(y_test_pred.cpu().numpy())
        y_true_list.append(y_batch.cpu().numpy())

y_pred_list = [a.squeeze().tolist() for a in y_pred_list]
y_true_list = [a.squeeze().tolist() for a in y_true_list]

print(classification_report(y_true_list, y_pred_list))
print(confusion_matrix(y_true_list, y_pred_list))



              precision    recall  f1-score   support

         0.0       0.93      0.93      0.93     29112
         1.0       0.25      0.24      0.24      2868

    accuracy                           0.87     31980
   macro avg       0.59      0.58      0.58     31980
weighted avg       0.86      0.87      0.87     31980

[[27023  2089]
 [ 2182   686]]


In [7]:
from collections import Counter

# Count the occurrences of unique values
value_counts = Counter(y_pred_list)
value_counts

Counter({0.0: 29389, 1.0: 2591})

In [13]:
from collections import Counter

# Count the occurrences of unique values
value_counts = Counter(y_pred_list)
value_counts

Counter({1.0: 2775, 0.0: 29205})

In [None]:
from collections import Counter

# Count the occurrences of unique values
value_counts = Counter(y_true_list)
value_counts

In [11]:
def count_value_changes(list1, list2):
    change_count = {}

    for val1, val2 in zip(list1, list2):
        if val1 != val2:
            change_count[val1] = change_count.get(val1, 0) + 1
            change_count[val2] = change_count.get(val2, 0) + 1

    # Add values that appear only in one of the lists
    remaining_values = set(list1) ^ set(list2)
    for val in remaining_values:
        change_count[val] = change_count.get(val, 0) + 1

    return change_count


# Count the changes and their occurrences
changes = count_value_changes(y_true_list, y_pred_list)

for value, count in changes.items():
    print(f"{value} changed {count} times")


0.0 changed 4171 times
1.0 changed 4171 times


In [9]:
def count_different_indices(list1, list2):
    count = 0
    min_length = min(len(list1), len(list2))

    for i in range(min_length):
        if list1[i] != list2[i]:
            count += 1
    
    # Add the remaining elements if the lists have different lengths
    count += abs(len(list1) - len(list2))

    return count
# Count the different indices
different_count = count_different_indices(y_true_list, y_pred_list)
different_count


4171

In [3]:
y_true_list


[0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 1.0,
 0.0,
 1.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 1.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 1.0,
 0.0,
 0.0,
 0.0,
 0.0,
 1.0,
 1.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 1.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 1.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 1.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 1.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 1.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0