## Initialization

In [39]:
import numpy as np 
from sklearn.datasets import load_breast_cancer
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.metrics import accuracy_score, recall_score, precision_score, f1_score, confusion_matrix

## Data loading 

In [45]:
data = load_breast_cancer()
X = data.data
y = data.target
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3)
scaler_x = MinMaxScaler().fit(X_train)
X_train, X_test = scaler_x.transform(X_train), scaler_x.transform(X_test)

## Logistic Regression Model - Grid search

In [48]:
model = LogisticRegression()
hyperparameters = {'penalty':['l1', 'l2'], 'C':[0.001, 0.01, 0.1, 1, 10, 100]}
new_model = GridSearchCV(model, hyperparameters)
new_model.fit(X_train, y_train)

Traceback (most recent call last):
  File "C:\Users\Lenovo\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 531, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\Lenovo\anaconda3\lib\site-packages\sklearn\linear_model\_logistic.py", line 1304, in fit
    solver = _check_solver(self.solver, self.penalty, self.dual)
  File "C:\Users\Lenovo\anaconda3\lib\site-packages\sklearn\linear_model\_logistic.py", line 443, in _check_solver
    "got %s penalty." % (solver, penalty))
ValueError: Solver lbfgs supports only 'l2' or 'none' penalties, got l1 penalty.

Traceback (most recent call last):
  File "C:\Users\Lenovo\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 531, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\Lenovo\anaconda3\lib\site-packages\sklearn\linear_model\_logistic.py", line 1304, in fit
    solver = _check_solver(self.solver, self.penalty, self.dual)
  File

GridSearchCV(estimator=LogisticRegression(),
             param_grid={'C': [0.001, 0.01, 0.1, 1, 10, 100],
                         'penalty': ['l1', 'l2']})

## Evaluation

In [49]:
print('Best Penalty:', new_model.best_estimator_.get_params()['penalty'])
print('Best C:', new_model.best_estimator_.get_params()['C'])
y_pred_acc = new_model.predict(X_test)
print('Accuracy Score : ' + str(accuracy_score(y_test,y_pred_acc)))
print('Precision Score : ' + str(precision_score(y_test,y_pred_acc)))
print('Recall Score : ' + str(recall_score(y_test,y_pred_acc)))
print('F1 Score : ' + str(f1_score(y_test,y_pred_acc)))
print(confusion_matrix(y_pred_acc, y_test))

Best Penalty: l2
Best C: 10
Accuracy Score : 0.9649122807017544
Precision Score : 0.9565217391304348
Recall Score : 0.990990990990991
F1 Score : 0.9734513274336283
[[ 55   1]
 [  5 110]]


## Decision Tree - Grid Search 

In [50]:
model = DecisionTreeClassifier()
hyperparameters = {'criterion':['entropy', 'gini'], 'min_samples_split':[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]} 
new_model = GridSearchCV(model, hyperparameters)
new_model.fit(X_train, y_train)

Traceback (most recent call last):
  File "C:\Users\Lenovo\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 531, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\Lenovo\anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 894, in fit
    X_idx_sorted=X_idx_sorted)
  File "C:\Users\Lenovo\anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 231, in fit
    % self.min_samples_split)
ValueError: min_samples_split must be an integer greater than 1 or a float in (0.0, 1.0]; got the integer 1

Traceback (most recent call last):
  File "C:\Users\Lenovo\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 531, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\Lenovo\anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 894, in fit
    X_idx_sorted=X_idx_sorted)
  File "C:\Users\Lenovo\anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 231, in fit
   

GridSearchCV(estimator=DecisionTreeClassifier(),
             param_grid={'criterion': ['entropy', 'gini'],
                         'min_samples_split': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]})

## Evaluation

In [51]:
print('Best criterion:', new_model.best_estimator_.get_params()['criterion'])
print('Best spliting:', new_model.best_estimator_.get_params()['min_samples_split'])
y_pred_acc = new_model.predict(X_test)
print('Accuracy Score : ' + str(accuracy_score(y_test,y_pred_acc)))
print('Precision Score : ' + str(precision_score(y_test,y_pred_acc)))
print('Recall Score : ' + str(recall_score(y_test,y_pred_acc)))
print('F1 Score : ' + str(f1_score(y_test,y_pred_acc)))
print(confusion_matrix(y_pred_acc, y_test))

Best criterion: gini
Best spliting: 3
Accuracy Score : 0.9239766081871345
Precision Score : 0.9454545454545454
Recall Score : 0.9369369369369369
F1 Score : 0.9411764705882352
[[ 54   7]
 [  6 104]]
