## Kütüphaneler

In [None]:
import pandas as pd
import numpy as np

from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split, RandomizedSearchCV

# RandomSearch ve GridSearch'ün benzer çalışma mantıkları vardır.

from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report

## Veri Oluşturma

In [None]:
X, y = make_classification(n_samples = 2000, n_features = 50, n_informative = 10,
                           random_state = 812, n_classes = 2)
X

array([[ 1.23173717,  1.10532801,  0.22776564, ..., -0.78356596,
        -0.15711703, -1.65414508],
       [ 0.48439638, -0.14303632, -2.51423517, ...,  0.60566089,
        -0.13130922, -0.21336398],
       [ 1.43086386, -0.13222191, -0.97300061, ..., -1.24373168,
        -0.8441854 , -1.68326389],
       ...,
       [ 1.31133403,  0.03317187,  2.49820238, ..., -0.42860444,
         1.11198425, -0.37050314],
       [ 1.40062972, -0.98004073, -0.89407227, ...,  1.63212064,
         0.06716772,  1.21445157],
       [ 0.44062982, -0.17520526,  3.96299506, ...,  0.25240773,
        -0.095253  ,  0.83673848]])

In [None]:
X.shape

(2000, 50)

In [None]:
y

array([0, 0, 0, ..., 0, 1, 0])

In [None]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, random_state = 42, train_size = 0.8
)

In [None]:
X_train.shape, X_test.shape

((1600, 50), (400, 50))

## Vanilla Model

In [None]:
vanilla_lr = LogisticRegression()

In [None]:
vanilla_lr.fit(X_train, y_train)

In [None]:
vanilla_preds = vanilla_lr.predict(X_test)
vanilla_preds

array([1, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 0, 1, 0, 0, 0, 1, 1, 0, 1,
       0, 1, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 1, 0, 1, 1, 0, 1, 0, 0, 1, 0,
       1, 0, 0, 1, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 1,
       0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1, 0, 1, 0, 1,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 0,
       0, 0, 0, 0, 1, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0,
       1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 0, 0,
       0, 1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 0, 1, 1, 1, 1, 0,
       1, 0, 1, 0, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1,
       0, 1, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0,
       0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 0, 1, 0, 1,
       0, 0, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 0, 0, 0, 1, 0, 0,
       0, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1,
       1, 1, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 1, 0,

In [None]:
print(classification_report(y_train, vanilla_lr.predict(X_train), digits = 3))

              precision    recall  f1-score   support

           0      0.781     0.790     0.785       815
           1      0.779     0.769     0.774       785

    accuracy                          0.780      1600
   macro avg      0.780     0.780     0.780      1600
weighted avg      0.780     0.780     0.780      1600



In [None]:
print(classification_report(y_test, vanilla_preds, digits = 3))

              precision    recall  f1-score   support

           0      0.693     0.759     0.724       187
           1      0.769     0.704     0.735       213

    accuracy                          0.730       400
   macro avg      0.731     0.732     0.730       400
weighted avg      0.733     0.730     0.730       400



--> Trainde: 0.90 accuracy aldım
--> Testte: 0.85 accuracy aldım
---> Güzel skorlar, açıklık çok fazla değil
---> Olması gerektiği gibi

--> Trainde: 0.95 accuracy aldım
--> Testte: 0.80 accuracy aldım
---> Açıklık, fazla. 0.15 istenmez.
---> Overfitting (Ezberleme)

## Hyperparameter Tuning

In [None]:
param_dist = {
    "C": [0.01, 0.1, 1, 10, 100],
    # Verisetim, gerçek hayatı ne kadar ölçüde yansıtıyor?
    # Büyük C --> Datasetime güvenirim
    # Küçük C --> Datasetime çok güvenmiyorum
    "penalty": ["l1", "l2"],
    "solver": ["saga", "liblinear"]
}

In [None]:
tune_model = LogisticRegression()

In [None]:
random_search = RandomizedSearchCV(
    tune_model, param_distributions = param_dist, cv = 5, n_iter = 5,
    verbose = 1
)

In [None]:
random_search.fit(X_train, y_train)

Fitting 5 folds for each of 5 candidates, totalling 25 fits


In [None]:
random_search.best_params_

{'solver': 'liblinear', 'penalty': 'l1', 'C': 1}

In [None]:
best_model = random_search.best_estimator_

In [None]:
tuned_preds = best_model.predict(X_test)

In [None]:
print(classification_report(y_test, tuned_preds, digits = 3))

              precision    recall  f1-score   support

           0      0.695     0.754     0.723       187
           1      0.766     0.709     0.737       213

    accuracy                          0.730       400
   macro avg      0.731     0.731     0.730       400
weighted avg      0.733     0.730     0.730       400



In [None]:
tuned_probas = best_model.predict_proba(X_test)[:, 1]
tuned_probas

array([0.93803541, 0.66568809, 0.83210849, 0.42116224, 0.76028834,
       0.74799278, 0.37349751, 0.93862702, 0.93090321, 0.29050947,
       0.89824044, 0.7030545 , 0.61682318, 0.23543338, 0.91248393,
       0.48943698, 0.3259892 , 0.42641054, 0.95974525, 0.63065843,
       0.27495342, 0.6175912 , 0.04414837, 0.74360047, 0.08082391,
       0.3212452 , 0.04991041, 0.17757425, 0.96236156, 0.80263014,
       0.20163812, 0.6807709 , 0.22144041, 0.41982632, 0.73082875,
       0.07873054, 0.98307615, 0.63009332, 0.31596355, 0.87207481,
       0.23839502, 0.22867998, 0.69535162, 0.34109427, 0.57721295,
       0.19782651, 0.33509116, 0.99342467, 0.99397342, 0.86251663,
       0.50831425, 0.33994163, 0.21520026, 0.53228256, 0.21930383,
       0.23980119, 0.04204586, 0.22421952, 0.69954546, 0.16185768,
       0.78816656, 0.09190034, 0.40277723, 0.72116144, 0.43026313,
       0.60334302, 0.34618657, 0.69975864, 0.35005391, 0.0799653 ,
       0.04628539, 0.44669269, 0.30684306, 0.55294725, 0.09208

In [None]:
np.where(tuned_probas > 0.5, 1, 0)

array([1, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 0, 1, 0, 0, 0, 1, 1, 0, 1,
       0, 1, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 1, 0, 1, 1, 0, 1, 0, 0, 1, 0,
       1, 0, 0, 1, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 1,
       0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1, 0, 1, 0, 1,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 0,
       0, 0, 0, 0, 1, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0,
       1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 0, 0,
       0, 1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0,
       1, 0, 1, 0, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1,
       0, 1, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0,
       0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 0, 1, 0, 1,
       0, 0, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 0, 0, 0, 1, 0, 0,
       0, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 0, 0, 1, 1,
       1, 1, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 1, 0,