In [1]:
from sklearn.datasets import make_classification
from sklearn.linear_model import LogisticRegression

X, y = make_classification(n_samples = 1000, n_features = 10, n_informative = 5, n_redundant = 5, n_classes = 2, random_state = 1)
classification = LogisticRegression()

In [2]:
from sklearn.model_selection import train_test_split as tts

X_train, X_test, y_train, y_test = tts(X, y, test_size = 0.20, random_state = 42)

## Grid Search CV

In [13]:
from sklearn.model_selection import GridSearchCV
import warnings
warnings.filterwarnings('ignore')

parameters = {
    'penalty' : ('l1', 'l2', 'elasticnet'),
    'C' : [1, 10, 20, 30],
    'solver' : ('lbfgs', 'liblinear', 'newton-cg', 'newton-cholesky', 'sag', 'saga')
}


gs =  GridSearchCV(LogisticRegression(), param_grid = parameters, cv = 5)

gs.fit(X_train, y_train)
print("Best Parameters:", gs.best_params_)
print("Best Accuracy:", gs.best_score_)

Best Parameters: {'C': 1, 'penalty': 'l1', 'solver': 'liblinear'}
Best Accuracy: 0.8087500000000001


In [14]:
classification = LogisticRegression(C = 1, penalty = 'l2')
classification.fit(X_train, y_train)
y_test_prediction = classification.predict(X_test)
# OR
# classification.predict_proba(X_test) # this will give probability. 
# Example --> [0.71649202, 0.28350798] --> 0.71 --> 0 and 0.28 --> 1. therefore 0 got selected

In [12]:
from sklearn.metrics import confusion_matrix, accuracy_score, classification_report

print(confusion_matrix(y_test, y_test_prediction))
print(accuracy_score(y_test, y_test_prediction))
print(classification_report(y_test, y_test_prediction))

[[78 13]
 [29 80]]
0.79
              precision    recall  f1-score   support

           0       0.73      0.86      0.79        91
           1       0.86      0.73      0.79       109

    accuracy                           0.79       200
   macro avg       0.79      0.80      0.79       200
weighted avg       0.80      0.79      0.79       200



## Randomized Search CV

In [16]:
from sklearn.model_selection import RandomizedSearchCV

parameters = {
    'penalty' : ('l1', 'l2', 'elasticnet'),
    'C' : [1, 10, 20, 30],
    'solver' : ('lbfgs', 'liblinear', 'newton-cg', 'newton-cholesky', 'sag', 'saga')
}

rs = RandomizedSearchCV(LogisticRegression(), param_distributions = parameters, cv = 5, n_iter = 20)

rs.fit(X_train, y_train)
print("Best Parameters:", rs.best_params_)
print("Best Accuracy:", rs.best_score_)

Best Parameters: {'solver': 'saga', 'penalty': 'l2', 'C': 10}
Best Accuracy: 0.8087500000000001


In [17]:
classification = LogisticRegression(C = 1, penalty = 'l2')
classification.fit(X_train, y_train)
y_test_prediction = classification.predict(X_test)
# OR
# classification.predict_proba(X_test) # this will give probability. 
# Example --> [0.71649202, 0.28350798] --> 0.71 --> 0 and 0.28 --> 1. therefore 0 got selected

In [18]:
from sklearn.metrics import confusion_matrix, accuracy_score, classification_report

print(confusion_matrix(y_test, y_test_prediction))
print(accuracy_score(y_test, y_test_prediction))
print(classification_report(y_test, y_test_prediction))

[[78 13]
 [29 80]]
0.79
              precision    recall  f1-score   support

           0       0.73      0.86      0.79        91
           1       0.86      0.73      0.79       109

    accuracy                           0.79       200
   macro avg       0.79      0.80      0.79       200
weighted avg       0.80      0.79      0.79       200



In [19]:
from sklearn.tree import DecisionTreeRegressor

dt = DecisionTreeRegressor()
dt.get_params()

{'ccp_alpha': 0.0,
 'criterion': 'squared_error',
 'max_depth': None,
 'max_features': None,
 'max_leaf_nodes': None,
 'min_impurity_decrease': 0.0,
 'min_samples_leaf': 1,
 'min_samples_split': 2,
 'min_weight_fraction_leaf': 0.0,
 'random_state': None,
 'splitter': 'best'}