In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [6]:
from sklearn.datasets import make_classification
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score,classification_report

#===============Generate classification data with random sample==============
X, y = make_classification(
    n_samples=1000,   
    n_features=20,   
    n_informative=15,  
    n_redundant=5,   
    n_classes=2,    
    random_state=42
)

#================ Split into training and test sets==========================
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [3]:
# ================Create and train a logistic regression model==========
model = LogisticRegression(max_iter=1000)
model.fit(X_train, y_train)

# ========== predictions ===================
y_pred = model.predict(X_test)

In [7]:
y_pred

array([0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0,
       1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 1,
       0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1,
       1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0,
       1, 0, 0, 0, 0, 1, 1, 0, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0,
       1, 1, 1, 0, 1, 1, 1, 0, 1, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1,
       0, 1, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 1, 1, 0, 1, 0,
       0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0,
       0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1,
       0, 1])

In [5]:
model.predict_proba(X_test)

array([[0.98015042, 0.01984958],
       [0.03575693, 0.96424307],
       [0.66231476, 0.33768524],
       [0.89556584, 0.10443416],
       [0.011311  , 0.988689  ],
       [0.15391743, 0.84608257],
       [0.51575421, 0.48424579],
       [0.93409708, 0.06590292],
       [0.11457533, 0.88542467],
       [0.50357104, 0.49642896],
       [0.11050087, 0.88949913],
       [0.8953053 , 0.1046947 ],
       [0.02519696, 0.97480304],
       [0.04601005, 0.95398995],
       [0.12106464, 0.87893536],
       [0.02370392, 0.97629608],
       [0.92397742, 0.07602258],
       [0.01162396, 0.98837604],
       [0.48695542, 0.51304458],
       [0.02210227, 0.97789773],
       [0.14587681, 0.85412319],
       [0.76032545, 0.23967455],
       [0.19984035, 0.80015965],
       [0.05362629, 0.94637371],
       [0.5271698 , 0.4728302 ],
       [0.86512449, 0.13487551],
       [0.3509046 , 0.6490954 ],
       [0.42109268, 0.57890732],
       [0.98349101, 0.01650899],
       [0.67688761, 0.32311239],
       [0.

In [8]:
# ==================Evaluate the model===================
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Precision:", precision_score(y_test, y_pred))
print("Recall:", recall_score(y_test, y_pred))
print("F1 Score:", f1_score(y_test, y_pred))
print(classification_report(y_test,y_pred))

Accuracy: 0.825
Precision: 0.8172043010752689
Recall: 0.8085106382978723
F1 Score: 0.8128342245989305
              precision    recall  f1-score   support

           0       0.83      0.84      0.84       106
           1       0.82      0.81      0.81        94

    accuracy                           0.82       200
   macro avg       0.82      0.82      0.82       200
weighted avg       0.82      0.82      0.82       200



## Hyperparameter Tunning and Croos ValidatioN

In [10]:
from sklearn.model_selection import train_test_split, GridSearchCV


#===============Set up hyperparameter grid========================
param_grid = {
    'C': [0.01, 0.1, 1, 10],             
    'penalty': ['l1', 'l2'],           
    'solver': ['liblinear']           
}


In [11]:
# =========Set up GridSearch with 5-fold cross-validation ==============
grid = GridSearchCV(LogisticRegression(max_iter=1000), param_grid, cv=5, scoring='f1', n_jobs=-1)
grid.fit(X_train, y_train)

In [12]:
#============Get the best model==============
best_model = grid.best_estimator_
print("Best Hyperparameters:", grid.best_params_)

Best Hyperparameters: {'C': 1, 'penalty': 'l2', 'solver': 'liblinear'}


In [13]:

# ========== Evaluate on test set===========
y_pred = best_model.predict(X_test)

print("Accuracy:", accuracy_score(y_test, y_pred))
print("Precision:", precision_score(y_test, y_pred))
print("Recall:", recall_score(y_test, y_pred))
print("F1 Score:", f1_score(y_test, y_pred))

Accuracy: 0.825
Precision: 0.8172043010752689
Recall: 0.8085106382978723
F1 Score: 0.8128342245989305


## Randomize search CV

In [None]:
from sklearn.model_selection import train_test_split, RandomizedSearchCV

# ========== Randomized search with 20 iterations and 5-fold CV =============
random_search = RandomizedSearchCV(
    estimator=LogisticRegression(),
    param_distributions= param_grid ,
    n_iter=20,
    cv=5,
    scoring='f1',
    random_state=42,
    n_jobs=-1
)


In [None]:
#======= Fit model============
random_search.fit(X_train, y_train)

#========== Evaluate================
best_model = random_search.best_estimator_
print("Best Hyperparameters:", random_search.best_params_)

y_pred = best_model.predict(X_test)
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Precision:", precision_score(y_test, y_pred))
print("Recall:", recall_score(y_test, y_pred))
print("F1 Score:", f1_score(y_test, y_pred))s
print(classification_report(y_pred,y_test))



Best Hyperparameters: {'solver': 'liblinear', 'penalty': 'l2', 'C': 1}
Accuracy: 0.825
Precision: 0.8172043010752689
Recall: 0.8085106382978723
F1 Score: 0.8128342245989305
              precision    recall  f1-score   support

           0       0.84      0.83      0.84       107
           1       0.81      0.82      0.81        93

    accuracy                           0.82       200
   macro avg       0.82      0.82      0.82       200
weighted avg       0.83      0.82      0.83       200



## Multiclass classification problem

In [23]:
# ============= Generate a multiclass classification dataset=============
X, y = make_classification(
    n_samples=1000,
    n_features=20,
    n_informative=15,
    n_classes=3,          # Multiclass (3 classes)
    n_clusters_per_class=1,
    random_state=42
)

# =============Split the data================
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

X_test
y

array([0, 1, 2, 2, 0, 1, 2, 2, 2, 1, 2, 0, 2, 2, 1, 1, 1, 2, 2, 1, 0, 0,
       1, 0, 2, 0, 0, 1, 1, 0, 0, 2, 0, 0, 1, 0, 0, 1, 2, 1, 1, 1, 2, 1,
       0, 0, 1, 1, 0, 0, 1, 2, 2, 1, 1, 0, 0, 0, 0, 1, 1, 1, 0, 0, 1, 1,
       0, 0, 2, 2, 2, 2, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 2, 1, 1, 1, 2, 1,
       1, 0, 2, 2, 1, 0, 2, 1, 2, 1, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1,
       2, 0, 1, 2, 0, 2, 0, 2, 0, 1, 1, 1, 2, 0, 1, 0, 2, 2, 0, 0, 0, 0,
       0, 1, 2, 2, 1, 0, 0, 2, 0, 1, 2, 0, 2, 0, 2, 1, 0, 1, 2, 1, 0, 1,
       2, 1, 0, 2, 1, 0, 1, 2, 2, 2, 1, 1, 1, 1, 0, 0, 1, 0, 2, 2, 0, 0,
       1, 0, 1, 2, 2, 2, 0, 2, 2, 2, 0, 0, 2, 1, 2, 1, 0, 2, 1, 1, 0, 0,
       0, 2, 2, 1, 2, 1, 0, 1, 2, 2, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 2,
       2, 0, 1, 1, 1, 0, 1, 1, 1, 1, 0, 2, 2, 2, 2, 0, 0, 2, 1, 0, 0, 0,
       2, 0, 0, 0, 1, 2, 0, 1, 2, 0, 2, 1, 0, 1, 0, 1, 2, 1, 1, 0, 1, 1,
       0, 1, 2, 2, 2, 2, 0, 2, 1, 0, 2, 1, 1, 0, 1, 2, 0, 0, 1, 1, 2, 1,
       0, 2, 0, 1, 0, 2, 1, 2, 0, 2, 2, 0, 2, 1, 1,

In [24]:
# ========Create and train a logistic regression model======
model = LogisticRegression(
    multi_class='multinomial',   
    solver='saga',              
    max_iter=1000
)
model.fit(X_train, y_train)

# ======== Predict and evaluate========================
y_pred = model.predict(X_test)
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.93      0.86      0.90        65
           1       0.90      0.88      0.89        74
           2       0.85      0.95      0.90        61

    accuracy                           0.90       200
   macro avg       0.90      0.90      0.90       200
weighted avg       0.90      0.90      0.89       200



