<a href="https://colab.research.google.com/github/pavankishore-AIDS/disease-prediction/blob/main/Mini_Project_Final.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Import libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report

In [None]:
# Read CSV file
data = pd.read_csv('heart.csv')

# Pre-process the data
data.dropna(inplace=True)

In [None]:
# Separate features and target variable
X = data.drop('target', axis=1)
y = data['target']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [None]:
# Train KNN model
knn_model = KNeighborsClassifier(n_neighbors=5)
knn_model.fit(X_train_scaled, y_train)

# Predictions
knn_predictions = knn_model.predict(X_test_scaled)

# Evaluate the model
knn_accuracy = accuracy_score(y_test, knn_predictions)
print(f'KNN Accuracy: {knn_accuracy}')
print('Classification Report:\n', classification_report(y_test, knn_predictions))

KNN Accuracy: 0.8341463414634146
Classification Report:
               precision    recall  f1-score   support

           0       0.88      0.77      0.82       102
           1       0.80      0.89      0.84       103

    accuracy                           0.83       205
   macro avg       0.84      0.83      0.83       205
weighted avg       0.84      0.83      0.83       205



In [None]:
# Train Decision Tree model
dt_model = DecisionTreeClassifier(random_state=42)
dt_model.fit(X_train, y_train)

# Predictions
dt_predictions = dt_model.predict(X_test)

# Evaluate the model
dt_accuracy = accuracy_score(y_test, dt_predictions)
print(f'Decision Tree Accuracy: {dt_accuracy}')
print('Classification Report:\n', classification_report(y_test, dt_predictions))

Decision Tree Accuracy: 0.9853658536585366
Classification Report:
               precision    recall  f1-score   support

           0       0.97      1.00      0.99       102
           1       1.00      0.97      0.99       103

    accuracy                           0.99       205
   macro avg       0.99      0.99      0.99       205
weighted avg       0.99      0.99      0.99       205



In [None]:
# Compare Models
print(f'KNN Accuracy: {knn_accuracy}')
print(f'Decision Tree Accuracy: {dt_accuracy}')

KNN Accuracy: 0.8341463414634146
Decision Tree Accuracy: 0.9853658536585366


In [None]:
# Create a Random Forest using KNN and Decision Tree as base estimators
rf_model = RandomForestClassifier(n_estimators=100, random_state=42)
rf_model.fit(X_train, y_train)

# Predictions
rf_predictions = rf_model.predict(X_test)

# Evaluate the model
rf_accuracy = accuracy_score(y_test, rf_predictions)
print(f'Random Forest Accuracy: {rf_accuracy}')
print('Classification Report:\n', classification_report(y_test, rf_predictions))

Random Forest Accuracy: 0.9853658536585366
Classification Report:
               precision    recall  f1-score   support

           0       0.97      1.00      0.99       102
           1       1.00      0.97      0.99       103

    accuracy                           0.99       205
   macro avg       0.99      0.99      0.99       205
weighted avg       0.99      0.99      0.99       205



In [None]:
# Function to take user input for feature values
def get_user_input():
    age = float(input("Enter age: "))
    sex = float(input("Enter sex (0 for female, 1 for male): "))
    cp = float(input("Enter chest pain type: "))
    trestbps = float(input("Enter resting blood pressure: "))
    chol = float(input("Enter serum cholesterol: "))
    fbs = float(input("Enter fasting blood sugar (0 if < 120 mg/dl, 1 if >= 120 mg/dl): "))
    restecg = float(input("Enter resting electrocardiographic results: "))
    thalach = float(input("Enter maximum heart rate achieved: "))
    exang = float(input("Enter exercise-induced angina (0 for no, 1 for yes): "))
    oldpeak = float(input("Enter ST depression induced by exercise relative to rest: "))
    slope = float(input("Enter the slope of the peak exercise ST segment: "))
    ca = float(input("Enter number of major vessels colored by fluoroscopy: "))
    thal = float(input("Enter thalassemia type: "))

    user_data = np.array([age, sex, cp, trestbps, chol, fbs, restecg, thalach, exang, oldpeak, slope, ca, thal]).reshape(1, -1)

    return user_data

# Get user input
user_data = get_user_input()

Enter age: 56
Enter sex (0 for female, 1 for male): 0
Enter chest pain type: 1
Enter resting blood pressure: 140
Enter serum cholesterol: 204
Enter fasting blood sugar (0 if < 120 mg/dl, 1 if >= 120 mg/dl): 0
Enter resting electrocardiographic results: 0
Enter maximum heart rate achieved: 109
Enter exercise-induced angina (0 for no, 1 for yes): 0
Enter ST depression induced by exercise relative to rest: 0
Enter the slope of the peak exercise ST segment: 1
Enter number of major vessels colored by fluoroscopy: 2
Enter thalassemia type: 2


In [None]:
# Make knn predictions
knn_prediction = knn_model.predict(user_data)
print(knn_prediction)

[1]


In [None]:
# Print the knn result
if knn_prediction == 1:
  print("Sorry, You have heart disease")
else:
  print("Congratulations, You do not have heart disease")

Sorry, You have heart disease


In [None]:
# Make dt predictions
dt_prediction = dt_model.predict(user_data)
print(dt_prediction)

[0]




In [None]:
# Print the dt result
if dt_prediction == 1:
  print("Sorry, You have heart disease")
else:
  print("Congratulations, You do not have heart disease")

Congratulations, You do not have heart disease
