In [14]:
import numpy as np 
import matplotlib.pyplot as plt 
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split

In [15]:
X, y = make_classification(
    n_samples=10000,
    n_classes=2,
    n_features=10,
    n_informative=4,
    n_redundant=6,
    n_repeated=0,
    random_state=42
)

In [16]:
X.shape

(10000, 10)

In [17]:
X[:2]

array([[ 1.16979605, -0.4445191 , -2.69869981,  1.4436044 ,  2.32932257,
         1.93798847,  0.46274299,  1.91282418, -0.0491593 ,  1.77838296],
       [ 1.66141529,  0.87461333, -3.73700449,  1.57542438,  3.48988236,
         3.91716162,  0.43868589,  1.99926476,  2.02078013,  3.55899475]])

In [18]:
y[:2]

array([1, 1])

In [19]:
X_train, X_test, y_train,y_test = train_test_split(X,y, random_state=32, test_size=0.3)

In [20]:
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report

model = LogisticRegression()
model.fit(X_train, y_train)

y_pred = model.predict(X_test)
report = classification_report(y_test, y_pred)
print(report)

              precision    recall  f1-score   support

           0       0.72      0.74      0.73      1495
           1       0.73      0.72      0.73      1505

    accuracy                           0.73      3000
   macro avg       0.73      0.73      0.73      3000
weighted avg       0.73      0.73      0.73      3000



In [24]:
probablities = model.predict_proba(X_test)
probablities

array([[0.03979018, 0.96020982],
       [0.03713089, 0.96286911],
       [0.05019115, 0.94980885],
       ...,
       [0.93218353, 0.06781647],
       [0.10264978, 0.89735022],
       [0.18037401, 0.81962599]])

In [25]:
probablities = model.predict_proba(X_test)[:,1]
probablities

array([0.96020982, 0.96286911, 0.94980885, ..., 0.06781647, 0.89735022,
       0.81962599])

In [30]:
probablities = model.predict_proba(X_test)[:,1]
(probablities > 0.4).astype(int)

array([1, 1, 1, ..., 0, 1, 1])

In [None]:
probablities = model.predict_proba(X_test)[:,1]
y_pred = (probablities > 0.4).astype(int)

In [31]:
probablities = model.predict_proba(X_test)[:,1]
y_pred = (probablities > 0.4).astype(int)

report = classification_report(y_test, y_pred)
print(report)

              precision    recall  f1-score   support

           0       0.79      0.64      0.71      1495
           1       0.70      0.84      0.76      1505

    accuracy                           0.74      3000
   macro avg       0.75      0.74      0.73      3000
weighted avg       0.75      0.74      0.73      3000



In [32]:
probablities = model.predict_proba(X_test)[:,1]
y_pred = (probablities > 0.3).astype(int)

report = classification_report(y_test, y_pred)
print(report)

              precision    recall  f1-score   support

           0       0.86      0.51      0.64      1495
           1       0.65      0.92      0.76      1505

    accuracy                           0.71      3000
   macro avg       0.76      0.71      0.70      3000
weighted avg       0.76      0.71      0.70      3000



In [34]:
from sklearn.metrics import roc_curve

fpr , tpr, thresolds = roc_curve(y_test, probablities)
fpr[:5] , tpr[:5] , thresolds[:5]

(array([0.       , 0.       , 0.       , 0.0006689, 0.0006689]),
 array([0.        , 0.00066445, 0.09966777, 0.09966777, 0.12026578]),
 array([       inf, 0.9983289 , 0.96017363, 0.95896163, 0.94975229]))

In [35]:
from sklearn.metrics import auc

area = auc(fpr,tpr)
area

np.float64(0.8199535550395004)