In [7]:
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import Pipeline

from sklearn.metrics import (
    confusion_matrix,
    precision_score,
    recall_score,
    f1_score,
    classification_report
)

In [8]:
data = {
    "age": [22, 25, 30, 35, 40, 45, 50, 55, 60, 65],
    "salary": [30000, 35000, 50000, 60000, 65000, 70000, 90000, 120000, 130000, 150000],
    "experience": [1, 2, 5, 7, 10, 12, 18, 25, 30, 35],
    "target": [0, 0, 0, 0, 1, 1, 1, 1, 1, 1]
}

df = pd.DataFrame(data)

X = df[["age", "salary", "experience"]]
y = df["target"]

X_train, X_test, y_train, y_test = train_test_split(
    X, y, 
    test_size = .25,
    stratify = y,
    random_state = 42
)

In [9]:
# Exercise 1 — Binary Classification
    # Train logistic regression
    # Predict probabilities 

pipeline = Pipeline([
    ("scaler", StandardScaler()),
    ("model", LogisticRegression())
])

pipeline.fit(X_train, y_train)

y_prob = pipeline.predict_proba(X_test)

print("Predicted probabilities: ")
print(y_prob)

y_pred = pipeline.predict(X_test)

print("Predicted classes: ")
print(y_pred)

Predicted probabilities: 
[[0.66743192 0.33256808]
 [0.03713094 0.96286906]
 [0.17020142 0.82979858]]
Predicted classes: 
[0 1 1]


In [10]:
# Exercise 2 — Decision Threshold
    # Change threshold
    # Observe precision/recall

threshold = .7

y_pred_custom = (y_prob[:,1] >= threshold).astype(int)

print("Predictions with threshold 0.7: ")
print(y_pred_custom)

Predictions with threshold 0.7: 
[0 1 1]


In [11]:
# Exercise 3 — Evaluate Model
    # Confusion matrix
    # Precision, Recall, F1 

cm = confusion_matrix(y_test, y_pred)
print("Confusion Matrix: ")
print(cm)

print("Precision: ",precision_score(y_test, y_pred))
print("Recall: ", recall_score(y_test, y_pred))
print("F1 score: ", f1_score(y_test, y_pred))

print("Classification Report: ")
print(classification_report(y_test, y_pred))

Confusion Matrix: 
[[1 0]
 [0 2]]
Precision:  1.0
Recall:  1.0
F1 score:  1.0
Classification Report: 
              precision    recall  f1-score   support

           0       1.00      1.00      1.00         1
           1       1.00      1.00      1.00         2

    accuracy                           1.00         3
   macro avg       1.00      1.00      1.00         3
weighted avg       1.00      1.00      1.00         3



In [12]:
# Exercise 4 — Interpret Coefficients
    # Explain feature impact 

model = pipeline.named_steps["model"]

coefficients = pd.Series(
    model.coef_[0],
    index=X.columns
)

print("Feature Coefficients: ")
print(coefficients)

Feature Coefficients: 
age           0.803437
salary        0.524126
experience    0.576114
dtype: float64
