<a href="https://colab.research.google.com/github/NikhilMamilla/NikhilMamilla/blob/main/Heart_Failure_Prediction_Clinical_Records.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

***Heart Failure Prediction - Clinical Records***

In [None]:
# Importing necessary libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, classification_report
from sklearn.svm import SVC
import warnings

In [None]:
# Suppressing warnings
warnings.filterwarnings('ignore')

In [None]:
# Importing the dataset
df = pd.read_csv('heart_failure_clinical_records.csv')

In [None]:
# Displaying the first few rows of the DataFrame
print("First few rows of the DataFrame:")
print(df.head())

First few rows of the DataFrame:
    age  anaemia  creatinine_phosphokinase  diabetes  ejection_fraction  \
0  55.0        0                       748         0                 45   
1  65.0        0                        56         0                 25   
2  45.0        0                       582         1                 38   
3  60.0        1                       754         1                 40   
4  95.0        1                       582         0                 30   

   high_blood_pressure  platelets  serum_creatinine  serum_sodium  sex  \
0                    0  263358.03               1.3           137    1   
1                    0  305000.00               5.0           130    1   
2                    0  319000.00               0.9           140    0   
3                    1  328000.00               1.2           126    1   
4                    0  461000.00               2.0           132    1   

   smoking  time  DEATH_EVENT  
0        1    88            0  
1      

In [None]:
# Displaying the last few rows of the DataFrame
print("\nLast few rows of the DataFrame:")
print(df.tail())


Last few rows of the DataFrame:
       age  anaemia  creatinine_phosphokinase  diabetes  ejection_fraction  \
4995  45.0        0                       582         1                 55   
4996  60.0        1                       582         0                 30   
4997  95.0        1                       112         0                 40   
4998  65.0        1                       160         1                 20   
4999  40.0        0                       244         0                 45   

      high_blood_pressure  platelets  serum_creatinine  serum_sodium  sex  \
4995                    0   543000.0               1.0           132    0   
4996                    1   127000.0               0.9           145    0   
4997                    1   196000.0               1.0           138    0   
4998                    0   327000.0               2.7           116    0   
4999                    1   275000.0               0.9           140    0   

      smoking  time  DEATH_EVENT  


In [None]:
# Displaying the shape of the DataFrame
print("\nShape of the DataFrame:")
print(df.shape)


Shape of the DataFrame:
(5000, 13)


In [None]:
# Displaying the columns of the DataFrame
print("\nColumns of the DataFrame:")
print(df.columns)


Columns of the DataFrame:
Index(['age', 'anaemia', 'creatinine_phosphokinase', 'diabetes',
       'ejection_fraction', 'high_blood_pressure', 'platelets',
       'serum_creatinine', 'serum_sodium', 'sex', 'smoking', 'time',
       'DEATH_EVENT'],
      dtype='object')


In [None]:
# Checking for duplicate rows in the DataFrame
print("\nNumber of duplicated rows in the DataFrame:")
print(df.duplicated().sum())


Number of duplicated rows in the DataFrame:
3680


In [None]:
# Dropping duplicates if any
df = df.drop_duplicates()

In [None]:
# Handling missing values if any
# (This step is not included as the provided data snippet does not contain missing values)

# Checking for missing values
print("\nMissing values in the DataFrame:")
print(df.isnull().sum())


Missing values in the DataFrame:
age                         0
anaemia                     0
creatinine_phosphokinase    0
diabetes                    0
ejection_fraction           0
high_blood_pressure         0
platelets                   0
serum_creatinine            0
serum_sodium                0
sex                         0
smoking                     0
time                        0
DEATH_EVENT                 0
dtype: int64


In [None]:
# Information about the DataFrame
print("\nInformation about the DataFrame:")
print(df.info())


Information about the DataFrame:
<class 'pandas.core.frame.DataFrame'>
Index: 1320 entries, 0 to 4972
Data columns (total 13 columns):
 #   Column                    Non-Null Count  Dtype  
---  ------                    --------------  -----  
 0   age                       1320 non-null   float64
 1   anaemia                   1320 non-null   int64  
 2   creatinine_phosphokinase  1320 non-null   int64  
 3   diabetes                  1320 non-null   int64  
 4   ejection_fraction         1320 non-null   int64  
 5   high_blood_pressure       1320 non-null   int64  
 6   platelets                 1320 non-null   float64
 7   serum_creatinine          1320 non-null   float64
 8   serum_sodium              1320 non-null   int64  
 9   sex                       1320 non-null   int64  
 10  smoking                   1320 non-null   int64  
 11  time                      1320 non-null   int64  
 12  DEATH_EVENT               1320 non-null   int64  
dtypes: float64(3), int64(10)
memory us

In [None]:
# Summary statistics of the DataFrame
print("\nSummary statistics of the DataFrame:")
print(df.describe())


Summary statistics of the DataFrame:
               age      anaemia  creatinine_phosphokinase     diabetes  \
count  1320.000000  1320.000000               1320.000000  1320.000000   
mean     60.587377     0.485606                576.135606     0.446970   
std      11.913538     0.499982                970.630878     0.497368   
min      40.000000     0.000000                 23.000000     0.000000   
25%      50.000000     0.000000                115.000000     0.000000   
50%      60.000000     0.000000                249.000000     0.000000   
75%      69.000000     1.000000                582.000000     1.000000   
max      95.000000     1.000000               7861.000000     1.000000   

       ejection_fraction  high_blood_pressure      platelets  \
count        1320.000000          1320.000000    1320.000000   
mean           37.881818             0.369697  263751.982189   
std            11.572547             0.482906  106345.010143   
min            14.000000             0.

In [None]:
# Encoding the target variable
label_encoder = LabelEncoder()
df['DEATH_EVENT'] = label_encoder.fit_transform(df['DEATH_EVENT'])

In [27]:
# Print DataFrame after LabelEncoder
print("\nDataFrame after LabelEncoder:")
print(df.head())


DataFrame after LabelEncoder:
    age  anaemia  creatinine_phosphokinase  diabetes  ejection_fraction  \
0  55.0        0                       748         0                 45   
1  65.0        0                        56         0                 25   
2  45.0        0                       582         1                 38   
3  60.0        1                       754         1                 40   
4  95.0        1                       582         0                 30   

   high_blood_pressure  platelets  serum_creatinine  serum_sodium  sex  \
0                    0  263358.03               1.3           137    1   
1                    0  305000.00               5.0           130    1   
2                    0  319000.00               0.9           140    0   
3                    1  328000.00               1.2           126    1   
4                    0  461000.00               2.0           132    1   

   smoking  time  DEATH_EVENT  
0        1    88            0  
1        

In [28]:
# Print DataFrame after LabelEncoder
print("\nDataFrame after LabelEncoder:")
print(df.tail())


DataFrame after LabelEncoder:
       age  anaemia  creatinine_phosphokinase  diabetes  ejection_fraction  \
4731  75.0        0                       582         1                 30   
4855  73.0        0                       582         0                 20   
4862  45.0        0                       220         0                 35   
4965  40.0        1                       129         1                 35   
4972  70.0        0                       835         0                 35   

      high_blood_pressure  platelets  serum_creatinine  serum_sodium  sex  \
4731                    1  263358.03              1.83           134    1   
4855                    0  263358.03              1.83           134    1   
4862                    0  174000.00              0.80           139    1   
4965                    0  255000.00              0.90           137    1   
4972                    0  327000.00              1.10           142    0   

      smoking  time  DEATH_EVENT  
47

In [26]:
# Displaying the encoded 'DEATH_EVENT' data
print("\nEncoded 'DEATH_EVENT' data:")
print(df['DEATH_EVENT'])


Encoded 'DEATH_EVENT' data:
0       0
1       0
2       0
3       0
4       1
       ..
4731    1
4855    1
4862    0
4965    0
4972    0
Name: DEATH_EVENT, Length: 1320, dtype: int64


In [None]:
# Splitting the dataset into train and test sets
X = df.drop('DEATH_EVENT', axis=1)
y = df['DEATH_EVENT']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, stratify=y, random_state=42)

In [None]:
# Training and evaluating a Logistic Regression model
log_reg_model = LogisticRegression()
log_reg_model.fit(X_train, y_train)
y_pred_log_reg = log_reg_model.predict(X_test)

In [None]:
# Training and evaluating a Decision Tree Classifier
dt_model = DecisionTreeClassifier()
dt_model.fit(X_train, y_train)
y_pred_dt = dt_model.predict(X_test)

In [None]:
# Training and evaluating a Support Vector Classifier
sv_model = SVC()
sv_model.fit(X_train, y_train)
y_pred_sv = sv_model.predict(X_test)

In [None]:
# Evaluating Logistic Regression model
accuracy_log_reg = accuracy_score(y_test, y_pred_log_reg)
report_log_reg = classification_report(y_test, y_pred_log_reg)
print("\nLogistic Regression Classifier Results:")
print(f"Accuracy: {accuracy_log_reg}")
print(f"Classification Report:\n{report_log_reg}")


Logistic Regression Classifier Results:
Accuracy: 0.8143939393939394
Classification Report:
              precision    recall  f1-score   support

           0       0.84      0.91      0.87       185
           1       0.73      0.59      0.66        79

    accuracy                           0.81       264
   macro avg       0.79      0.75      0.77       264
weighted avg       0.81      0.81      0.81       264



In [None]:
# Evaluating Decision Tree Classifier
accuracy_dt = accuracy_score(y_test, y_pred_dt)
report_dt = classification_report(y_test, y_pred_dt)
print("\nDecision Tree Classifier Results:")
print(f"Accuracy: {accuracy_dt}")
print(f"Classification Report:\n{report_dt}")


Decision Tree Classifier Results:
Accuracy: 0.9393939393939394
Classification Report:
              precision    recall  f1-score   support

           0       0.95      0.96      0.96       185
           1       0.91      0.89      0.90        79

    accuracy                           0.94       264
   macro avg       0.93      0.92      0.93       264
weighted avg       0.94      0.94      0.94       264



In [None]:
# Evaluating Support Vector Classifier
accuracy_sv = accuracy_score(y_test, y_pred_sv)
report_sv = classification_report(y_test, y_pred_sv)
print("\nSupport Vector Classifier Results:")
print(f"Accuracy: {accuracy_sv}")
print(f"Classification Report:\n{report_sv}")


Support Vector Classifier Results:
Accuracy: 0.7007575757575758
Classification Report:
              precision    recall  f1-score   support

           0       0.70      1.00      0.82       185
           1       0.00      0.00      0.00        79

    accuracy                           0.70       264
   macro avg       0.35      0.50      0.41       264
weighted avg       0.49      0.70      0.58       264

