In [1]:
import numpy as np 
import matplotlib.pyplot as plt 
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split

In [3]:
X, y = make_classification(
    n_samples=10000,
    n_classes=2,
    n_features=10,
    n_informative=8,
    n_redundant=2,
    n_repeated=0,
    random_state=42
)

In [4]:
X.shape

(10000, 10)

In [5]:
X[:2]

array([[-2.27357673,  0.02513535, -0.098951  , -1.91095893, -0.49093005,
        -2.61211981, -2.34050704, -0.07246443,  2.67111486, -0.65841466],
       [-2.6336023 , -1.06453045,  0.59606849, -0.31590674,  3.62061908,
         0.24313071, -4.92494701,  1.3095509 , -0.44190887,  8.13493314]])

In [6]:
y[:2]

array([1, 1])

In [7]:
X_train, X_test, y_train,y_test = train_test_split(X,y, random_state=32, test_size=0.3)

In [8]:
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report

model = LogisticRegression()
model.fit(X_train, y_train)

y_pred = model.predict(X_test)
report = classification_report(y_test, y_pred)
print(report)

              precision    recall  f1-score   support

           0       0.75      0.71      0.73      1539
           1       0.71      0.75      0.73      1461

    accuracy                           0.73      3000
   macro avg       0.73      0.73      0.73      3000
weighted avg       0.73      0.73      0.73      3000



In [11]:
from sklearn.model_selection import KFold

kf = KFold(n_splits=5, shuffle=True, random_state=42)

for train_index, test_index in kf.split([50,60,70,80,90]):
    print(train_index,test_index)

[0 2 3 4] [1]
[0 1 2 3] [4]
[0 1 3 4] [2]
[1 2 3 4] [0]
[0 1 2 4] [3]


### logistic regression 

In [15]:
from sklearn.model_selection import cross_val_score

scores_logistic = cross_val_score(LogisticRegression(), X , y , cv = kf)
np.average(scores_logistic)

np.float64(0.7238999999999999)

### Decision tree 

In [16]:
from sklearn.tree import DecisionTreeClassifier

scores_decision = cross_val_score(DecisionTreeClassifier(), X , y , cv = kf)
np.average(scores_decision)

np.float64(0.8581)

### Random forest 

In [21]:
from sklearn.ensemble import RandomForestClassifier

scores_rf = cross_val_score(RandomForestClassifier(), X , y , cv = kf)
np.average(scores_rf)

np.float64(0.9246000000000001)

In [18]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report

model = RandomForestClassifier()
model.fit(X_train, y_train)

y_pred = model.predict(X_test)
report = classification_report(y_test, y_pred)
print(report)

              precision    recall  f1-score   support

           0       0.94      0.91      0.92      1539
           1       0.91      0.94      0.92      1461

    accuracy                           0.92      3000
   macro avg       0.92      0.92      0.92      3000
weighted avg       0.92      0.92      0.92      3000

