<a href="https://colab.research.google.com/github/Charanalp/Heart_Disease_data/blob/main/heart_diesase.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Data Preprocessing

In [None]:
import pandas as pd

# Load dataset
data = pd.read_csv('heart.csv')  # Replace 'heart.csv' with your dataset name
print(data.head())

# Check for missing values
print(data.isnull().sum())

# Handle missing data if any
data = data.fillna(method='ffill')


 Model Building (Classification Algorithm)

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report

# Features and Target
X = data.drop('target', axis=1)  # Replace 'target' with your label column
y = data['target']

# Splitting data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Model Training
model = RandomForestClassifier()
model.fit(X_train, y_train)

# Predictions
predictions = model.predict(X_test)

# Accuracy
print(f"Accuracy: {accuracy_score(y_test, predictions) * 100:.2f}%")
print(classification_report(y_test, predictions))


Predicting Disease Based on New Data



In [None]:
# Example input for prediction
new_data = [[63, 1, 3, 145, 233, 1, 0, 150, 0, 2.3, 0, 0, 1]]  # Adjust as per your dataset
prediction = model.predict(new_data)
print("Disease Prediction:", "Positive" if prediction[0] == 1 else "Negative")


In [None]:
import seaborn as sns
import matplotlib.pyplot as plt

# Correlation matrix
plt.figure(figsize=(12, 8))
sns.heatmap(data.corr(), annot=True, cmap='coolwarm', linewidths=0.5)
plt.title('Feature Correlation Heatmap')
plt.savefig('correlation_heatmap.png')  # Save the image
plt.show()


In [None]:
sns.countplot(x='target', data=data, palette='Set2')
plt.title('Disease Distribution (0 = No Disease, 1 = Disease)')
plt.xlabel('Disease Status')
plt.ylabel('Count')
plt.savefig('disease_distribution.png')
plt.show()


In [None]:
plt.figure(figsize=(10, 6))
sns.scatterplot(x='age', y='chol', hue='target', data=data, palette='coolwarm')
plt.title('Age vs Cholesterol with Disease Status')
plt.savefig('age_vs_cholesterol.png')
plt.show()


In [None]:
import numpy as np

# Feature importance from Random Forest
feature_importance = model.feature_importances_
features = np.array(X.columns)

# Bar plot
plt.figure(figsize=(12, 6))
sns.barplot(x=feature_importance, y=features, palette='viridis')
plt.title('Feature Importance in Disease Prediction')
plt.savefig('feature_importance.png')
plt.show()


In [None]:
from sklearn.metrics import roc_curve, auc

# ROC Curve
fpr, tpr, thresholds = roc_curve(y_test, predictions)
roc_auc = auc(fpr, tpr)

plt.figure(figsize=(8, 6))
plt.plot(fpr, tpr, color='darkorange', lw=2, label='ROC curve (area = %0.2f)' % roc_auc)
plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--')
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('Receiver Operating Characteristic (ROC) Curve')
plt.legend(loc='lower right')
plt.savefig('roc_curve.png')
plt.show()
