# Task 5: Train-Test Split & Evaluation Metrics

# 1️⃣ Import Required Libraries

In [3]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, precision_score, recall_score, confusion_matrix, classification_report

# 2️⃣ Load the Dataset

In [6]:
df = pd.read_csv("heart.csv")

print(df.head())
print(df.info())

   age  sex  cp  trestbps  chol  fbs  restecg  thalach  exang  oldpeak  slope  \
0   67    1   1       140   238    1        1      109      0      5.7      1   
1   57    1   2       118   265    0        0       85      0      3.3      2   
2   43    1   3       159   172    1        1      189      0      5.2      1   
3   71    0   0       103   126    0        1       99      1      3.2      0   
4   36    1   0       149   215    0        2      145      0      3.9      1   

   ca  thal  target  
0   3     2       1  
1   3     3       0  
2   0     3       1  
3   4     2       0  
4   1     0       1  
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 300 entries, 0 to 299
Data columns (total 14 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   age       300 non-null    int64  
 1   sex       300 non-null    int64  
 2   cp        300 non-null    int64  
 3   trestbps  300 non-null    int64  
 4   chol      300 non-null    int64  
 5 

# 3️⃣ Split Features (X) and Target (y)

In [9]:
X = df.drop("target", axis=1)
y = df["target"]

# 4️⃣ Train–Test Split

In [12]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

print("Training samples:", X_train.shape[0])
print("Testing samples:", X_test.shape[0])

Training samples: 240
Testing samples: 60


# 5️⃣Train Logistic Regression Model

In [15]:
model = LogisticRegression(max_iter=1000)
model.fit(X_train, y_train)

print("Model trained successfully!")

Model trained successfully!


# 6️⃣ Predict on Test Data

In [18]:
y_pred = model.predict(X_test)

In [20]:
print(y_pred)

[1 1 0 0 0 0 1 1 1 1 0 1 1 0 1 0 1 0 1 0 1 0 0 1 1 0 0 1 0 1 0 1 0 0 1 0 1
 0 0 1 1 0 0 0 1 1 0 0 0 1 0 1 1 1 1 0 1 0 0 0]


# 7️⃣ Evaluation Metrics

In [23]:
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)

print("Accuracy :", accuracy)
print("Precision:", precision)
print("Recall   :", recall)

Accuracy : 0.5
Precision: 0.4827586206896552
Recall   : 0.4827586206896552


# 8️⃣ Confusion Matrix

In [26]:
cm = confusion_matrix(y_test, y_pred)
print("Confusion Matrix:\n", cm)

Confusion Matrix:
 [[16 15]
 [15 14]]


# 9️⃣ Full Classification Report

In [28]:
report = classification_report(y_test, y_pred)
print("Classification Report:\n", report)

Classification Report:
               precision    recall  f1-score   support

           0       0.52      0.52      0.52        31
           1       0.48      0.48      0.48        29

    accuracy                           0.50        60
   macro avg       0.50      0.50      0.50        60
weighted avg       0.50      0.50      0.50        60

