In [7]:
# Step 1: Import necessary libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report


In [8]:
# Step 2: Upload dataset manually
from google.colab import files
uploaded = files.upload()

# Replace with your dataset file name (after uploading heart_dataset.csv)
df = pd.read_csv("heart_dataset.csv")

# Quick check
print("Shape of dataset:", df.shape)
df.head()


Saving heart_dataset.csv to heart_dataset (1).csv
Shape of dataset: (5, 17)


Unnamed: 0,age,trestbps,chol,fbs,restecg,thalch,exang,oldpeak,slope,ca,thal,sex_Female,sex_Male,cp_asymptomatic,cp_atypical angina,cp_non-anginal,cp_typical angina
0,58,130,220,1,normal,150,False,1.4,flat,0,fixed defect,0,1,0,0,0,1
1,67,160,276,0,lv hypertrophy,108,True,1.5,flat,3,normal,0,1,1,0,0,0
2,42,120,230,0,normal,170,False,1.0,upsloping,0,reversable defect,1,0,0,0,1,0
3,50,130,210,0,lv hypertrophy,158,False,0.8,flat,0,normal,0,1,0,0,1,0
4,45,114,230,0,normal,165,False,1.1,downsloping,0,normal,1,0,0,1,0,0


In [12]:
# Step 2: Check if label exists
print("Columns before adding label:", df.columns.tolist())

# If target column is missing, create a dummy one for now (just to proceed)
# ⚠️ Replace this later with the REAL labels from your dataset
if "target" not in df.columns:
    import numpy as np
    df["target"] = np.random.randint(0, 2, size=len(df))  # random 0/1 labels

print("Columns after ensuring target:", df.columns.tolist())

# Step 3: Split features and labels
X = df.drop("target", axis=1)
y = df["target"]

print("Features shape:", X.shape)
print("Labels shape:", y.shape)


Columns before adding label: ['age', 'trestbps', 'chol', 'fbs', 'restecg', 'thalch', 'exang', 'oldpeak', 'slope', 'ca', 'thal', 'sex_Female', 'sex_Male', 'cp_asymptomatic', 'cp_atypical angina', 'cp_non-anginal', 'cp_typical angina']
Columns after ensuring target: ['age', 'trestbps', 'chol', 'fbs', 'restecg', 'thalch', 'exang', 'oldpeak', 'slope', 'ca', 'thal', 'sex_Female', 'sex_Male', 'cp_asymptomatic', 'cp_atypical angina', 'cp_non-anginal', 'cp_typical angina', 'target']
Features shape: (5, 17)
Labels shape: (5,)


In [15]:
from sklearn.preprocessing import LabelEncoder

# Convert all string/categorical columns into numbers
X_encoded = X.copy()

for col in X_encoded.columns:
    if X_encoded[col].dtype == "object":   # if column is string
        le = LabelEncoder()
        X_encoded[col] = le.fit_transform(X_encoded[col])

# Also encode y if it's string labels like "normal", "disease"
if y.dtype == "object":
    le_y = LabelEncoder()
    y = le_y.fit_transform(y)

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(
    X_encoded, y, test_size=0.2, random_state=42
)

# Train Logistic Regression
model = LogisticRegression(max_iter=1000)
model.fit(X_train, y_train)

# Predictions
y_pred = model.predict(X_test)

# Evaluation
print("✅ Model trained successfully")
print("Accuracy:", accuracy_score(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))


✅ Model trained successfully
Accuracy: 1.0

Classification Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00         1

    accuracy                           1.00         1
   macro avg       1.00      1.00      1.00         1
weighted avg       1.00      1.00      1.00         1



In [16]:
import joblib

# Save the model
joblib.dump(model, "disease_prediction_model.pkl")

print("✅ Model saved as disease_prediction_model.pkl")


✅ Model saved as disease_prediction_model.pkl


In [17]:
# Load model
loaded_model = joblib.load("disease_prediction_model.pkl")

# Test with new sample input (replace values with real patient data)
sample = X_test.iloc[0].values.reshape(1, -1)
prediction = loaded_model.predict(sample)

print("Predicted disease:", prediction)


Predicted disease: [0]


