# Parameter_Tunning_Grid_Search

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [2]:
import numpy as np
from sklearn.model_selection import GridSearchCV
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import cross_val_score, KFold

In [3]:
import pickle as pkl
with open('/content/drive/MyDrive/Datasets/credit.pkl', 'rb') as f:
  X_train, y_train, X_test, y_test = pkl.load(f)

In [4]:
X_train.shape, y_train.shape

((1500, 3), (1500,))

In [5]:
X_test.shape, y_test.shape

((500, 3), (500,))

In [6]:
# concat
X_credit = np.concatenate((X_train, X_test), axis=0) # axis=0 concatenate rows
y_credit = np.concatenate((y_train, y_test), axis=0)
X_credit.shape, y_credit.shape

((2000, 3), (2000,))

In [7]:
# function to run Grid Search# function to run Grid Search
def run_grid_search(estimators, param):
  grid = GridSearchCV(estimator=estimators, param_grid=param).fit(X_credit, y_credit)
  print(f"Best Parameter: {grid.best_params_}")
  print(f"Best Index: {grid.best_index_}")
  best_score = round(grid.best_score_,4)
  print(f"Best Score: {best_score}")


# Decision Tree Classifier

In [8]:
# parameters to classifier
parameters_D = {
      'criterion':['gini', 'entropy'],
      'splitter':['best', 'random'],
      'min_samples_split':[2,5,10],
      'min_samples_leaf':[2,6,10]}

In [9]:
run_grid_search(DecisionTreeClassifier(), parameters_D)

Best Parameter: {'criterion': 'entropy', 'min_samples_leaf': 2, 'min_samples_split': 2, 'splitter': 'best'}
Best Index: 18
Best Score: 0.984


# Random Forest

In [10]:
# parameters to classifier
parameters_R = {
      'criterion':['gini', 'entropy'],
      'n_estimators':[10, 50, 100, 200],
      'min_samples_split':[2,5,10],
      'min_samples_leaf':[2,6,10]}

In [11]:
result_RF = GridSearchCV(RandomForestClassifier(), param_grid = parameters_R).fit(X_credit, y_credit)
best_RF = result_RF.best_score_
best_RF

0.9855

# KNN

In [12]:
parameters_K = {
    'n_neighbors':[3, 5, 10, 20, 35],
    'p':[1,2]}

In [13]:
run_grid_search(KNeighborsClassifier(), parameters_K)

Best Parameter: {'n_neighbors': 35, 'p': 2}
Best Index: 9
Best Score: 0.9815


In [14]:
result_knn = GridSearchCV(KNeighborsClassifier(), param_grid = parameters_K).fit(X_credit, y_credit)
best_knn = result_knn.best_score_
best_knn

0.9814999999999999

# Logistic Regression

In [15]:
parameters_L ={
    'tol': [0.1, 0.01, 0.001, 0.0001, 0.00001],
    'C': [1.0, 1.5, 2.0, 2.5, 3.0],
    'solver': ['lbfgs', 'sag', 'saga']
}

In [16]:
run_grid_search(LogisticRegression(), parameters_L)

Best Parameter: {'C': 3.0, 'solver': 'sag', 'tol': 0.1}
Best Index: 65
Best Score: 0.9495


In [17]:
result_lr = GridSearchCV(LogisticRegression(), param_grid = parameters_L).fit(X_credit, y_credit)
best_lr = result_lr.best_score_
best_lr

0.9494999999999999

# SVM

In [18]:
parameters_S = {
    'tol': [0.1, 0.01, 0.001, 0.0001, 0.00001],
    'C': [1.0, 1.5, 2.0, 2.5, 3.0],
    'kernel':['rbf', 'linear', 'poly', 'sigmoid']}

In [19]:
run_grid_search(SVC(), parameters_S)

Best Parameter: {'C': 3.0, 'kernel': 'rbf', 'tol': 0.1}
Best Index: 80
Best Score: 0.984


In [20]:
result_svm = GridSearchCV(SVC(), param_grid = parameters_S).fit(X_credit, y_credit)
best_svm = result_svm.best_score_
best_svm

0.984

# Neural Netwoks

In [21]:
parameters_N = {
    'activation':['relu', 'logistic', 'tahn'],
    'solver':['adam', 'sgd'],
    'batch_size':[10, 42]}

In [22]:
run_grid_search(MLPClassifier(), parameters_N)

20 fits failed out of a total of 60.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
20 fits failed with the following error:
Traceback (most recent call last):
  File "/usr/local/lib/python3.7/dist-packages/sklearn/model_selection/_validation.py", line 680, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/usr/local/lib/python3.7/dist-packages/sklearn/neural_network/_multilayer_perceptron.py", line 752, in fit
    return self._fit(X, y, incremental=False)
  File "/usr/local/lib/python3.7/dist-packages/sklearn/neural_network/_multilayer_perceptron.py", line 384, in _fit
    self._validate_hyperparameters()
  File "/usr/local/lib/python3.7/dist-packages/sklearn/neural_network/_multilayer_perceptron.py", line 495

Best Parameter: {'activation': 'relu', 'batch_size': 42, 'solver': 'adam'}
Best Index: 2
Best Score: 0.996




In [23]:
result_nn = GridSearchCV(MLPClassifier(), param_grid = parameters_N).fit(X_credit, y_credit)
best_nn = result_nn.best_score_
best_nn

20 fits failed out of a total of 60.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
20 fits failed with the following error:
Traceback (most recent call last):
  File "/usr/local/lib/python3.7/dist-packages/sklearn/model_selection/_validation.py", line 680, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/usr/local/lib/python3.7/dist-packages/sklearn/neural_network/_multilayer_perceptron.py", line 752, in fit
    return self._fit(X, y, incremental=False)
  File "/usr/local/lib/python3.7/dist-packages/sklearn/neural_network/_multilayer_perceptron.py", line 384, in _fit
    self._validate_hyperparameters()
  File "/usr/local/lib/python3.7/dist-packages/sklearn/neural_network/_multilayer_perceptron.py", line 495

0.9960000000000001

# Cross Validation

In [24]:
def cross_validation(algorithm, X, y):
  list_result = []
  for i in range(30):
    kfold = KFold(n_splits=10, shuffle=True, random_state=i)
    
    clf = algorithm
    scores = cross_val_score(clf, X, y, cv = kfold)
    list_result.append(scores.mean())
  print(f'Mean of : {list_result}')

In [25]:
list_algorithm = [DecisionTreeClassifier(), RandomForestClassifier(),
                  LogisticRegression(), SVC(), MLPClassifier()]

In [26]:
for i in list_algorithm:
  print(i)
  cross_validation(i, X_credit, y_credit)

DecisionTreeClassifier()
Mean of : [0.983, 0.9855, 0.9854999999999998, 0.983, 0.986, 0.9880000000000001, 0.9864999999999998, 0.9835, 0.9844999999999999, 0.986, 0.9824999999999999, 0.982, 0.9855, 0.9835, 0.9865, 0.9829999999999999, 0.984, 0.9834999999999999, 0.982, 0.9879999999999999, 0.985, 0.9855, 0.984, 0.9864999999999998, 0.985, 0.9834999999999999, 0.9834999999999999, 0.9875, 0.9814999999999999, 0.982]
RandomForestClassifier()
Mean of : [0.9865, 0.9869999999999999, 0.9869999999999999, 0.9879999999999999, 0.9869999999999999, 0.9875, 0.9904999999999999, 0.9879999999999999, 0.9889999999999999, 0.9894999999999998, 0.9865, 0.9879999999999999, 0.9870000000000001, 0.9879999999999999, 0.9869999999999999, 0.984, 0.9855, 0.9889999999999999, 0.9899999999999999, 0.9890000000000001, 0.9875, 0.99, 0.9879999999999999, 0.9879999999999999, 0.9889999999999999, 0.9889999999999999, 0.9844999999999999, 0.9879999999999999, 0.9849999999999998, 0.9884999999999998]
LogisticRegression()
Mean of : [0.9475, 0.



Mean of : [0.9959999999999999, 0.9949999999999999, 0.9955, 0.9955, 0.9964999999999999, 0.9970000000000001, 0.9945, 0.9970000000000001, 0.9955, 0.9964999999999999, 0.9939999999999998, 0.9944999999999998, 0.9955, 0.9969999999999999, 0.9960000000000001, 0.9964999999999999, 0.9944999999999998, 0.9964999999999999, 0.9964999999999998, 0.9964999999999999, 0.9955, 0.9969999999999999, 0.9964999999999999, 0.9959999999999999, 0.994, 0.9945, 0.9944999999999998, 0.9960000000000001, 0.9955, 0.9955]




# Loading a trained classifier

In [27]:
neural = MLPClassifier(activation='relu', batch_size=56, solver='adam').fit(X_credit, y_credit)



In [28]:
decision_tree = DecisionTreeClassifier(criterion='entropy', min_samples_leaf=1,
                                       min_samples_split=5, splitter='best').fit(X_credit, y_credit)

In [49]:
svm = SVC(C=2.0, kernel='rbf', probability=True).fit(X_credit, y_credit)

In [50]:
# creating files
import pickle as pkl
pkl.dump(neural, open('neural_final.sav', 'wb'))
pkl.dump(decision_tree, open('tree_final.sav', 'wb'))
pkl.dump(svm, open('svm_final.sav', 'wb'))

In [51]:
# loading
neural = pkl.load(open('neural_final.sav', 'rb'))
tree = pkl.load(open('tree_final.sav', 'rb'))
svm = pkl.load(open('svm_final.sav', 'rb'))

In [32]:
new_register = X_credit[1999]

In [33]:
# prediction
new_register = new_register.reshape(1,-1)

In [34]:
# neural network
output = neural.predict(new_register)
if output == 0:
  print('Crédito Consedido!')
else:
  print('Crédito Negado!')

Crédito Negado!


In [35]:
# decision tree
output = tree.predict(new_register)
if output == 0:
  print('Crédito Consedido!')
else:
  print('Crédito Negado!')

Crédito Negado!


In [36]:
# svm
output = svm.predict(new_register)
if output == 0:
  print('Crédito Consedido!')
else:
  print('Crédito Negado!')

Crédito Negado!


# Classifier Combination

In [40]:
new_register = X_credit[0]
new_register = new_register.reshape(1,-1)
new_register, new_register.shape

(array([[-1.3754462 ,  0.50631087,  0.10980934]]), (1, 3))

In [41]:
neural_answer = neural.predict(new_register)
tree_answer = decision_tree.predict(new_register)
svm_answer = svm.predict(new_register)

In [42]:
print(f'Neural answer: {neural_answer}')
print(f'Decision Tree answer: {tree_answer}')
print(f'SVM answer: {svm_answer}')

Neural answer: [0]
Decision Tree answer: [0]
SVM answer: [0]


In [43]:
pay = 0
no_pay = 0

if neural_answer[0] == 1:
  no_pay += 1
else:
  pay += 1

if tree_answer[0] == 1:
  no_pay += 1
else:
  pay += 1

if svm_answer[0] == 1:
  no_pay += 1
else:
  pay += 1

if pay > no_pay:
  print('Customer will pay')
elif pay == no_pay:
  print('The values are equal')
else:
  print("Costumer won't pay the loan")

Customer will pay


# Rejection Classifier

In [44]:
neural_network_prob = result_nn.predict_proba(new_register)
neural_network_prob

array([[1.0000000e+00, 1.5814392e-19]])

In [45]:
neural_network_confidence = neural_network_prob.max()
neural_network_confidence

1.0

In [47]:
RF_prob = result_RF.predict_proba(new_register).max()
RF_prob

1.0

In [52]:
svm_prob = svm.predict_proba(new_register).max()
svm_prob

0.9999976803192171

In [53]:
pay = 0
no_pay = 0
min_confidence = 0.999999
algorithms = 0


if neural_network_confidence >= min_confidence:
  algorithms += 1
  if neural_answer[0] == 1:
    no_pay += 1
  else:
    pay += 1

if RF_prob >= min_confidence:
  algorithms += 1
  if tree_answer[0] == 1:
    no_pay += 1
  else:
    pay += 1

if svm_prob >= min_confidence:
  algorithms += 1
  if svm_answer[0] == 1:
    no_pay += 1
  else:
    pay += 1

if pay > no_pay:
  print(f'Customer will pay, based in {algorithms} algorithms')
elif pay == no_pay:
  print(f'The values are equal, based in {algorithms} algorithms')
else:
  print(f"Costumer won't pay the loan, based in {algorithms} algorithms")

Customer will pay, based in 2 algorithms
