In [None]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

age in years


sex
(1 = male; 0 = female)


cp
chest pain type


trestbps
resting blood pressure (in mm Hg on admission to the hospital)


chol
serum cholestoral in mg/dl


fbs
(fasting blood sugar &gt; 120 mg/dl) (1 = true; 0 = false)


restecg
resting electrocardiographic results


thalach
maximum heart rate achieved


exang
exercise induced angina (1 = yes; 0 = no)


oldpeak
ST depression induced by exercise relative to rest

In [None]:
df = pd.read_csv("/content/heart.csv")

In [None]:
df

In [None]:
df.info()

In [None]:
df.isnull().sum()

In [None]:
num_cols = df.select_dtypes(include=np.number).columns

plt.figure(figsize=(12,10))
for i, col in enumerate(num_cols, 1):
    plt.subplot(3,3,i)
    sns.histplot(df[col], kde=True, bins=30)
    plt.title(f"Distribution of {col}", fontsize=12, fontweight="bold")
    plt.xlabel(col, fontsize=10)
    plt.ylabel("Frequency", fontsize=10)
    plt.grid(alpha=0.3)

plt.suptitle("Distribution of Numerical Features", fontsize=16, fontweight="bold", y=1.02)
plt.tight_layout()
plt.show()




In [None]:
df['target'].value_counts()


In [None]:
plt.figure(figsize=(16, 8))
# Draw boxplot
sns.boxplot(data=df[num_cols])
# Rotate x-axis labels
plt.xticks(rotation=45)
# Title
plt.title("Boxplot of Numerical Features (Outlier Detection)", fontsize=16, fontweight="bold")
# Layout fix
plt.tight_layout()
plt.show()

In [None]:

n_cols = 3
n_rows = math.ceil(len(num_cols) / n_cols)

plt.figure(figsize=(16, 5 * n_rows))

for i, col in enumerate(num_cols, 1):
    plt.subplot(n_rows, n_cols, i)

    # If binary/small unique → treat as categorical
    if df[col].nunique() <= 6:
        sns.countplot(x=col, hue="target", data=df)
    else:
        sns.boxplot(x="target", y=col, data=df)

    plt.title(f"{col} vs Target", fontweight="bold")
    plt.xlabel(col)
    plt.ylabel("Count")

plt.tight_layout()
plt.show()

In [None]:
plt.figure(figsize=(10,10))
sns.heatmap(df.corr(),annot=True)

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

In [None]:
X = df.drop("target", axis=1)
y = df["target"]

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.4, random_state=42
)

In [None]:
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)


In [None]:

knn = KNeighborsClassifier(n_neighbors=1,algorithm='auto')
knn.fit(X_train_scaled, y_train)
y_pred = knn.predict(X_test_scaled)
acc = accuracy_score(y_test, y_pred)
print(acc)



In [None]:
print("\nCONFUSION MATRIX:")
cm = confusion_matrix(y_test, y_pred)
print(cm)

print("\nCLASSIFICATION REPORT:")
print(classification_report(y_test, y_pred))

plt.figure(figsize=(5,4))
sns.heatmap(cm, annot=True, fmt="d", cmap="Blues")
plt.title("KNN Confusion Matrix")
plt.xlabel("Predicted")
plt.ylabel("Actual")
plt.tight_layout()
plt.show()


In [None]:
import numpy as np
import pandas as pd

feature_names = ['age', 'sex', 'cp', 'trestbps', 'chol', 'fbs',
                 'restecg', 'thalach', 'exang', 'oldpeak',
                 'slope', 'ca', 'thal']

user_inputs = {}
user_inputs['age'] = int(input("Age: "))
user_inputs['sex'] = int(input("Sex (1 = Male, 0 = Female): "))
user_inputs['cp'] = int(input("Chest Pain Type (0–3): "))
user_inputs['trestbps'] = int(input("Resting Blood Pressure: "))
user_inputs['chol'] = int(input("Serum Cholesterol: "))
user_inputs['fbs'] = int(input("Fasting Blood Sugar > 120 mg/dl (1 = Yes, 0 = No): "))
user_inputs['restecg'] = int(input("Resting ECG (0,1,2): "))
user_inputs['thalach'] = int(input("Maximum Heart Rate Achieved: "))
user_inputs['exang'] = int(input("Exercise Induced Angina (1 = Yes, 0 = No): "))
user_inputs['oldpeak'] = float(input("ST Depression (oldpeak): "))
user_inputs['slope'] = int(input("Slope (0,1,2): "))
user_inputs['ca'] = int(input("Number of Major Vessels (0–4): "))
user_inputs['thal'] = int(input("Thalassemia (0,1,2,3): "))
user_df = pd.DataFrame([user_inputs], columns=feature_names)
user_scaled = scaler.transform(user_df)
prediction = knn.predict(user_scaled)[0]
probability = knn.predict_proba(user_scaled)[0]

print("\n---------------------------")
if prediction == 1:
    print("Heart Disease Detected")
else:
    print(" No Heart Disease Detected")

print(f"Risk Probability: {probability[1]*100:.2f}%")
print("---------------------------")
