<a href="https://colab.research.google.com/github/NithyaDugyala/Projects/blob/main/Heart_Failure_Predction.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

 D.Nithya

# Logistic Regression

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

file_path = '/content/heart.csv'
data = pd.read_csv(file_path)
print(data.head())

label_encoder = LabelEncoder()

categorical_columns = ['ChestPainType', 'RestingECG', 'ExerciseAngina', 'ST_Slope', 'Sex']
for col in categorical_columns:
    data[col] = label_encoder.fit_transform(data[col])

X = data.drop('HeartDisease', axis=1)
y = data['HeartDisease']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)
logistic_regression_model = LogisticRegression(max_iter=1000, random_state=42)
logistic_regression_model.fit(X_train, y_train)
y_pred = logistic_regression_model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy * 100:.2f}%')
print('Classification Report:')
print(classification_report(y_test, y_pred))

print('Confusion Matrix:')
print(confusion_matrix(y_test, y_pred))


   Age Sex ChestPainType  RestingBP  Cholesterol  FastingBS RestingECG  MaxHR  \
0   40   M           ATA        140          289          0     Normal    172   
1   49   F           NAP        160          180          0     Normal    156   
2   37   M           ATA        130          283          0         ST     98   
3   48   F           ASY        138          214          0     Normal    108   
4   54   M           NAP        150          195          0     Normal    122   

  ExerciseAngina  Oldpeak ST_Slope  HeartDisease  
0              N      0.0       Up             0  
1              N      1.0     Flat             1  
2              N      0.0       Up             0  
3              Y      1.5     Flat             1  
4              N      0.0       Up             0  
Accuracy: 87.68%
Classification Report:
              precision    recall  f1-score   support

           0       0.82      0.89      0.85       112
           1       0.92      0.87      0.89       164

   

# KNN with Euclidean distance

In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split

file_path = '/content/heart.csv'
data = pd.read_csv(file_path)


numerical_features = ['Age', 'RestingBP', 'Cholesterol', 'FastingBS', 'MaxHR', 'Oldpeak']
X = data[numerical_features].values
y = data['HeartDisease'].values

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

def euclidean_distance(point1, point2):
    return np.sqrt(np.sum((point1 - point2) ** 2))

def knn(X_train, y_train, query_point, k=5):
    distances = []
    for i in range(len(X_train)):
        distance = euclidean_distance(query_point, X_train[i])
        distances.append((distance, y_train[i]))


    distances = sorted(distances, key=lambda x: x[0])
    nearest_neighbors = distances[:k]

    classes = [neighbor[1] for neighbor in nearest_neighbors]
    prediction = max(set(classes), key=classes.count)

    return prediction


def calculate_accuracy(X_train, y_train, X_test, y_test, k=5):
    correct_predictions = 0
    for i in range(len(X_test)):
        prediction = knn(X_train, y_train, X_test[i], k)
        if prediction == y_test[i]:
            correct_predictions += 1
    accuracy = correct_predictions / len(y_test)
    return accuracy


k = 5
accuracy = calculate_accuracy(X_train, y_train, X_test, y_test, k)
print(f"Accuracy of KNN with k={k}: {accuracy * 100:.2f}%")


Accuracy of KNN with k=5: 71.74%


# Decision Tree

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score


data = pd.read_csv('/content/heart.csv')


for column in data.select_dtypes(include='object').columns:
    data[column] = LabelEncoder().fit_transform(data[column])


X = data.drop("HeartDisease", axis=1)
y = data["HeartDisease"]


X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

clf = DecisionTreeClassifier(random_state=42)
clf.fit(X_train, y_train)

y_pred = clf.predict(X_test)

accuracy = accuracy_score(y_test, y_pred)
print("Decision Tree accuracy:", accuracy)

Decision Tree accuracy: 0.7880434782608695


# SVM

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import LabelEncoder

data = pd.read_csv('/content/heart.csv')


categorical_cols = ['Sex', 'ChestPainType', 'RestingECG', 'ExerciseAngina', 'ST_Slope']
label_encoders = {col: LabelEncoder() for col in categorical_cols}

for col in categorical_cols:
    data[col] = label_encoders[col].fit_transform(data[col])

X = data.drop(columns='HeartDisease')
y = data['HeartDisease']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

svm_model = SVC()

svm_model.fit(X_train, y_train)

y_pred = svm_model.predict(X_test)

accuracy = accuracy_score(y_test, y_pred)

print(f'Accuracy of SVM model: {accuracy * 100:.2f}%')


Accuracy of SVM model: 72.46%
