In [None]:
# 🔧 Step 1: Import Required Libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

# To show plots in Jupyter notebooks
%matplotlib inline
# 👉 These are standard Python libraries used for:
# •	pandas, numpy → data handling
# •	matplotlib, seaborn → data visualization

# 🧱 Step 2: Create Synthetic Dataset for Binary Classification

#make_classification: It's used to create fake (synthetic) data for training and testing
#  machine learning models.
from sklearn.datasets import make_classification 

# Create a dataset with 1000 samples, 10 features, and 2 classes (binary)
X, y = make_classification(n_samples=1000, 
                           n_features=10, 
                           n_classes=2, 
                           random_state=42)
#random_state=42: Makes sure you get the same random dataset every time you run the code
# 👉 make_classification creates a fake dataset for classification.
# •	X = independent features (input)
# •	y = target (labels: 0 or 1)
# •	random_state makes sure you get the same results every time

#📊 Step 3: Convert to DataFrame (Optional) 
df = pd.DataFrame(X)
df.head()

# | Row | Feature\_0 | Feature\_1 | Feature\_2 | Feature\_3 | Feature\_4 | Feature\_5 | Feature\_6 | Feature\_7 | Feature\_8 | Feature\_9 |
# | --- | ---------- | ---------- | ---------- | ---------- | ---------- | ---------- | ---------- | ---------- | ---------- | ---------- |
# | 0   | 0.9648     | -0.0664    | 0.9868     | -0.3581    | 0.9973     | 1.1819     | -1.6157    | -1.2102    | -0.6281    | 1.2273     |
# | 1   | -0.9165    | -0.5664    | -1.0086    | 0.8316     | -1.1770    | 1.8205     | 1.7524     | -0.9845    | 0.3639     | 0.2095     |
# | 2   | -0.1095    | -0.4328    | -0.4576    | 0.7938     | -0.2686    | -1.8364    | 1.2391     | -0.2464    | -1.0581    | -0.2974    |
# | 3   | 1.7504     | 2.0236     | 1.6882     | 0.0068     | -1.6077    | 0.1847     | -2.6194    | -0.3574    | -1.4731    | -0.1900    |
# | 4   | -0.2247    | -0.7113    | -0.2208    | 0.1171     | 1.5361     | 0.5975     | 0.3486     | -0.9392    | 0.1759     | 0.2362     |


#✂️ Step 4: Train-Test Split
from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.30,random_state=42)

#👉 Always split your data before training to evaluate performance on unseen data.

# 🤖 Step 5: Train Logistic Regression Model
from sklearn.linear_model import LogisticRegression
# Create the model 
model = LogisticRegression()
# Train the model
model.fit(X_train, y_train)

# 🔮 Step 6: Make Predictions
# Predict the output for test data
y_pred = model.predict(X_test)
# 👉 y_pred will contain the predicted class (0 or 1) for the test set.

#📈 Optional: Get Class Probabilities
y_prob = model.predict_proba(X_test)
y_prob[:5]
# array([[0.77447791, 0.22552209],
#        [0.0336685 , 0.9663315 ],
#        [0.67068215, 0.32931785],
#        [0.0798668 , 0.9201332 ],
#        [0.97661665, 0.02338335]])

# 🧠 Simple Summary
# You're asking the model: "How sure are you?"
# Instead of giving you just 0 or 1, it gives you something like:
# ➤ [0.77, 0.22] → "77% chance of class 0, 22% chance of class 1"

# 📏 Step 7: Evaluate the Model
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

# Accuracy
print("Accuracy:", accuracy_score(y_test, y_pred))

# Confusion Matrix
cm = confusion_matrix(y_test, y_pred)
print("Confusion Matrix:\n", cm)

# Classification Report: shows precision, recall, f1-score
print("Classification Report:\n", classification_report(y_test, y_pred))
# 👉 This step helps evaluate the model:
# •	accuracy_score → overall correct predictions
# •	confusion_matrix → breakdown of TP, TN, FP, FN
# •	classification_report → includes precision, recall, and F1-score


# Accuracy: 0.8466666666666667
# Confusion Matrix:
#  [[118  17]
#  [ 29 136]]
# Classification Report:
#                precision    recall  f1-score   support

#            0       0.80      0.87      0.84       135
#            1       0.89      0.82      0.86       165

#     accuracy                           0.85       300
#    macro avg       0.85      0.85      0.85       300
# weighted avg       0.85      0.85      0.85       300

# 118 (True Negatives): The model correctly predicted class 0 when it was actually class 0.

# 17 (False Positives): The model predicted class 1, but it was actually class 0 (wrong prediction).

# 29 (False Negatives): The model predicted class 0, but it was actually class 1 (wrong prediction).

# 136 (True Positives): The model correctly predicted class 1 when it was actually class 1.


# Actual=1, Predict =1 ->TruePositive(Good)
# Actual=0, Predicted= 0->TrueNegative(Good)
# Actual=0, Predicted= 1->FalsePositive(Blunder)
# Actual=1, Predicted= 0->FalseNegative(fine)


Accuracy: 0.8466666666666667
Confusion Matrix:
 [[118  17]
 [ 29 136]]
Classification Report:
               precision    recall  f1-score   support

           0       0.80      0.87      0.84       135
           1       0.89      0.82      0.86       165

    accuracy                           0.85       300
   macro avg       0.85      0.85      0.85       300
weighted avg       0.85      0.85      0.85       300

