In [3]:
import warnings
warnings.filterwarnings('ignore')

In [4]:
import pandas as pd
df=pd.read_csv('diabetes.csv')
df.head()

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome
0,6,148,72,35,0,33.6,0.627,50,1
1,1,85,66,29,0,26.6,0.351,31,0
2,8,183,64,0,0,23.3,0.672,32,1
3,1,89,66,23,94,28.1,0.167,21,0
4,0,137,40,35,168,43.1,2.288,33,1


In [5]:
import numpy as np
df['Glucose']=np.where(df['Glucose']==0,df['Glucose'].median(),df['Glucose'])
df['Insulin']=np.where(df['Insulin']==0,df['Insulin'].median(),df['Insulin'])
df.head()

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome
0,6,148.0,72,35,30.5,33.6,0.627,50,1
1,1,85.0,66,29,30.5,26.6,0.351,31,0
2,8,183.0,64,0,30.5,23.3,0.672,32,1
3,1,89.0,66,23,94.0,28.1,0.167,21,0
4,0,137.0,40,35,168.0,43.1,2.288,33,1


In [6]:
#### Independent And Dependent features
X=df.drop('Outcome',axis=1)
y=df['Outcome']

In [7]:
print(X.head())
print(y.head())

   Pregnancies  Glucose  BloodPressure  SkinThickness  Insulin   BMI  \
0            6    148.0             72             35     30.5  33.6   
1            1     85.0             66             29     30.5  26.6   
2            8    183.0             64              0     30.5  23.3   
3            1     89.0             66             23     94.0  28.1   
4            0    137.0             40             35    168.0  43.1   

   DiabetesPedigreeFunction  Age  
0                     0.627   50  
1                     0.351   31  
2                     0.672   32  
3                     0.167   21  
4                     2.288   33  
0    1
1    0
2    1
3    0
4    1
Name: Outcome, dtype: int64


In [8]:
pd.DataFrame(X,columns=df.columns[:-1])

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age
0,6,148.0,72,35,30.5,33.6,0.627,50
1,1,85.0,66,29,30.5,26.6,0.351,31
2,8,183.0,64,0,30.5,23.3,0.672,32
3,1,89.0,66,23,94.0,28.1,0.167,21
4,0,137.0,40,35,168.0,43.1,2.288,33
...,...,...,...,...,...,...,...,...
763,10,101.0,76,48,180.0,32.9,0.171,63
764,2,122.0,70,27,30.5,36.8,0.340,27
765,5,121.0,72,23,112.0,26.2,0.245,30
766,1,126.0,60,0,30.5,30.1,0.349,47


In [9]:
#### Train Test Split
from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.20,random_state=0)

In [10]:
from sklearn.ensemble import RandomForestClassifier
rf_classifier=RandomForestClassifier(n_estimators=10).fit(X_train,y_train)
prediction=rf_classifier.predict(X_test)

In [11]:
y.value_counts()

0    500
1    268
Name: Outcome, dtype: int64

In [12]:
from sklearn.metrics import confusion_matrix,classification_report,accuracy_score
print(confusion_matrix(y_test,prediction))
print(accuracy_score(y_test,prediction))
print(classification_report(y_test,prediction))

[[91 16]
 [20 27]]
0.7662337662337663
              precision    recall  f1-score   support

           0       0.82      0.85      0.83       107
           1       0.63      0.57      0.60        47

    accuracy                           0.77       154
   macro avg       0.72      0.71      0.72       154
weighted avg       0.76      0.77      0.76       154



In [13]:
### Manual Hyperparameter Tuning
model=RandomForestClassifier(n_estimators=300,criterion='entropy',
                             max_features='sqrt',min_samples_leaf=10,random_state=100).fit(X_train,y_train)
predictions=model.predict(X_test)
print(confusion_matrix(y_test,predictions))
print(accuracy_score(y_test,predictions))
print(classification_report(y_test,predictions))

[[98  9]
 [18 29]]
0.8246753246753247
              precision    recall  f1-score   support

           0       0.84      0.92      0.88       107
           1       0.76      0.62      0.68        47

    accuracy                           0.82       154
   macro avg       0.80      0.77      0.78       154
weighted avg       0.82      0.82      0.82       154



In [14]:
import numpy as np
from sklearn.model_selection import RandomizedSearchCV
# Number of trees in random forest
n_estimators = [int(x) for x in np.linspace(start = 200, stop = 2000, num = 10)]
# Number of features to consider at every split
max_features = ['auto', 'sqrt','log2']
# Maximum number of levels in tree
max_depth = [int(x) for x in np.linspace(10, 1000,10)]
# Minimum number of samples required to split a node
min_samples_split = [2, 5, 10,14]
# Minimum number of samples required at each leaf node
min_samples_leaf = [1, 2, 4,6,8]
# Create the random grid
random_grid = {'n_estimators': n_estimators,
               'max_features': max_features,
               'max_depth': max_depth,
               'min_samples_split': min_samples_split,
               'min_samples_leaf': min_samples_leaf,
              'criterion':['entropy','gini']}
print(random_grid)

{'n_estimators': [200, 400, 600, 800, 1000, 1200, 1400, 1600, 1800, 2000], 'max_features': ['auto', 'sqrt', 'log2'], 'max_depth': [10, 120, 230, 340, 450, 560, 670, 780, 890, 1000], 'min_samples_split': [2, 5, 10, 14], 'min_samples_leaf': [1, 2, 4, 6, 8], 'criterion': ['entropy', 'gini']}


In [15]:
rf=RandomForestClassifier()
rf_randomcv=RandomizedSearchCV(estimator=rf,param_distributions=random_grid,n_iter=100,cv=3,verbose=2,
                               random_state=100,n_jobs=-1)
### fit the randomized model
rf_randomcv.fit(X_train,y_train)

Fitting 3 folds for each of 100 candidates, totalling 300 fits


RandomizedSearchCV(cv=3, estimator=RandomForestClassifier(), n_iter=100,
                   n_jobs=-1,
                   param_distributions={'criterion': ['entropy', 'gini'],
                                        'max_depth': [10, 120, 230, 340, 450,
                                                      560, 670, 780, 890,
                                                      1000],
                                        'max_features': ['auto', 'sqrt',
                                                         'log2'],
                                        'min_samples_leaf': [1, 2, 4, 6, 8],
                                        'min_samples_split': [2, 5, 10, 14],
                                        'n_estimators': [200, 400, 600, 800,
                                                         1000, 1200, 1400, 1600,
                                                         1800, 2000]},
                   random_state=100, verbose=2)

In [16]:
rf_randomcv.best_params_

{'n_estimators': 1800,
 'min_samples_split': 5,
 'min_samples_leaf': 1,
 'max_features': 'auto',
 'max_depth': 450,
 'criterion': 'gini'}

In [17]:
rf_randomcv

RandomizedSearchCV(cv=3, estimator=RandomForestClassifier(), n_iter=100,
                   n_jobs=-1,
                   param_distributions={'criterion': ['entropy', 'gini'],
                                        'max_depth': [10, 120, 230, 340, 450,
                                                      560, 670, 780, 890,
                                                      1000],
                                        'max_features': ['auto', 'sqrt',
                                                         'log2'],
                                        'min_samples_leaf': [1, 2, 4, 6, 8],
                                        'min_samples_split': [2, 5, 10, 14],
                                        'n_estimators': [200, 400, 600, 800,
                                                         1000, 1200, 1400, 1600,
                                                         1800, 2000]},
                   random_state=100, verbose=2)

In [18]:
best_random_grid=rf_randomcv.best_estimator_

In [19]:
from sklearn.metrics import accuracy_score
y_pred=best_random_grid.predict(X_test)
print(confusion_matrix(y_test,y_pred))
print("Accuracy Score {}".format(accuracy_score(y_test,y_pred)))
print("Classification report: {}".format(classification_report(y_test,y_pred)))

[[94 13]
 [16 31]]
Accuracy Score 0.8116883116883117
Classification report:               precision    recall  f1-score   support

           0       0.85      0.88      0.87       107
           1       0.70      0.66      0.68        47

    accuracy                           0.81       154
   macro avg       0.78      0.77      0.77       154
weighted avg       0.81      0.81      0.81       154



In [20]:
##GridSearch CV##

In [21]:
rf_randomcv.best_params_

{'n_estimators': 1800,
 'min_samples_split': 5,
 'min_samples_leaf': 1,
 'max_features': 'auto',
 'max_depth': 450,
 'criterion': 'gini'}

In [22]:
from sklearn.model_selection import GridSearchCV

param_grid = {
    'criterion': [rf_randomcv.best_params_['criterion']],
    'max_depth': [rf_randomcv.best_params_['max_depth']],
    'max_features': [rf_randomcv.best_params_['max_features']],
    'min_samples_leaf': [rf_randomcv.best_params_['min_samples_leaf'], 
                         rf_randomcv.best_params_['min_samples_leaf']+2, 
                         rf_randomcv.best_params_['min_samples_leaf'] + 4],
    'min_samples_split': [rf_randomcv.best_params_['min_samples_split'] - 2,
                          rf_randomcv.best_params_['min_samples_split'] - 1,
                          rf_randomcv.best_params_['min_samples_split'], 
                          rf_randomcv.best_params_['min_samples_split'] +1,
                          rf_randomcv.best_params_['min_samples_split'] + 2],
    'n_estimators': [rf_randomcv.best_params_['n_estimators'] - 200, rf_randomcv.best_params_['n_estimators'] - 100, 
                     rf_randomcv.best_params_['n_estimators'], 
                     rf_randomcv.best_params_['n_estimators'] + 100, rf_randomcv.best_params_['n_estimators'] + 200]
}

print(param_grid)

{'criterion': ['gini'], 'max_depth': [450], 'max_features': ['auto'], 'min_samples_leaf': [1, 3, 5], 'min_samples_split': [3, 4, 5, 6, 7], 'n_estimators': [1600, 1700, 1800, 1900, 2000]}


In [23]:
#### Fit the grid_search to the data
rf=RandomForestClassifier()
grid_search=GridSearchCV(estimator=rf,param_grid=param_grid,cv=10,n_jobs=-1,verbose=2)
grid_search.fit(X_train,y_train)

Fitting 10 folds for each of 75 candidates, totalling 750 fits


GridSearchCV(cv=10, estimator=RandomForestClassifier(), n_jobs=-1,
             param_grid={'criterion': ['gini'], 'max_depth': [450],
                         'max_features': ['auto'],
                         'min_samples_leaf': [1, 3, 5],
                         'min_samples_split': [3, 4, 5, 6, 7],
                         'n_estimators': [1600, 1700, 1800, 1900, 2000]},
             verbose=2)

In [24]:
grid_search.best_estimator_

RandomForestClassifier(max_depth=450, min_samples_leaf=5, min_samples_split=7,
                       n_estimators=1800)

In [25]:
best_grid=grid_search.best_estimator_

In [26]:
best_grid

RandomForestClassifier(max_depth=450, min_samples_leaf=5, min_samples_split=7,
                       n_estimators=1800)

In [27]:
y_pred=best_grid.predict(X_test)
print(confusion_matrix(y_test,y_pred))
print("Accuracy Score {}".format(accuracy_score(y_test,y_pred)))
print("Classification report: {}".format(classification_report(y_test,y_pred)))

[[97 10]
 [18 29]]
Accuracy Score 0.8181818181818182
Classification report:               precision    recall  f1-score   support

           0       0.84      0.91      0.87       107
           1       0.74      0.62      0.67        47

    accuracy                           0.82       154
   macro avg       0.79      0.76      0.77       154
weighted avg       0.81      0.82      0.81       154



In [28]:
pip install hyperopt

Note: you may need to restart the kernel to use updated packages.


In [29]:
from hyperopt import hp,fmin,tpe,STATUS_OK,Trials

In [30]:
space = {'criterion': hp.choice('criterion', ['entropy', 'gini']),
        'max_depth': hp.quniform('max_depth', 10, 1200, 10),
        'max_features': hp.choice('max_features', ['auto', 'sqrt','log2', None]),
        'min_samples_leaf': hp.uniform('min_samples_leaf', 0, 0.5),
        'min_samples_split' : hp.uniform ('min_samples_split', 0, 1),
        'n_estimators' : hp.choice('n_estimators', [10, 50, 300, 750, 1200,1300,1500])
    }
space


{'criterion': <hyperopt.pyll.base.Apply at 0x23fa8af4760>,
 'max_depth': <hyperopt.pyll.base.Apply at 0x23fa8af4c70>,
 'max_features': <hyperopt.pyll.base.Apply at 0x23fa64f5310>,
 'min_samples_leaf': <hyperopt.pyll.base.Apply at 0x23fa649d670>,
 'min_samples_split': <hyperopt.pyll.base.Apply at 0x23fa649d610>,
 'n_estimators': <hyperopt.pyll.base.Apply at 0x23fa8ad26a0>}

In [31]:
def objective(space):
    model = RandomForestClassifier(criterion = space['criterion'], max_depth = space['max_depth'],
                                 max_features = space['max_features'],
                                 min_samples_leaf = space['min_samples_leaf'],
                                 min_samples_split = space['min_samples_split'],
                                 n_estimators = space['n_estimators'], 
                                 )
    
    accuracy = cross_val_score(model, X_train, y_train, cv = 5).mean()

    # We aim to maximize accuracy, therefore we return it as a negative value
    return {'loss': -accuracy, 'status': STATUS_OK }

In [32]:
from sklearn.model_selection import cross_val_score
trials = Trials()
best = fmin(fn= objective,
            space= space,
            algo= tpe.suggest,
            max_evals = 80,
            trials= trials)
best

100%|███████████████████████████████████████████████| 80/80 [07:57<00:00,  5.97s/trial, best loss: -0.7670931627349059]


{'criterion': 1,
 'max_depth': 710.0,
 'max_features': 2,
 'min_samples_leaf': 0.06728187432776261,
 'min_samples_split': 0.02895457766446234,
 'n_estimators': 6}

In [33]:
crit = {0: 'entropy', 1: 'gini'}
feat = {0: 'auto', 1: 'sqrt', 2: 'log2', 3: None}
est = {0: 10, 1: 50, 2: 300, 3: 750, 4: 1200,5:1300,6:1500}


print(crit[best['criterion']])
print(feat[best['max_features']])
print(est[best['n_estimators']])

gini
log2
1500


In [34]:
best['min_samples_leaf']

0.06728187432776261

In [35]:
trainedforest = RandomForestClassifier(criterion = crit[best['criterion']], max_depth = best['max_depth'], 
                                       max_features = feat[best['max_features']], 
                                       min_samples_leaf = best['min_samples_leaf'], 
                                       min_samples_split = best['min_samples_split'], 
                                       n_estimators = est[best['n_estimators']]).fit(X_train,y_train)
predictionforest = trainedforest.predict(X_test)
print(confusion_matrix(y_test,predictionforest))
print(accuracy_score(y_test,predictionforest))
print(classification_report(y_test,predictionforest))
acc5 = accuracy_score(y_test,predictionforest)

[[98  9]
 [21 26]]
0.8051948051948052
              precision    recall  f1-score   support

           0       0.82      0.92      0.87       107
           1       0.74      0.55      0.63        47

    accuracy                           0.81       154
   macro avg       0.78      0.73      0.75       154
weighted avg       0.80      0.81      0.80       154



In [36]:
import numpy as np
from sklearn.model_selection import RandomizedSearchCV
# Number of trees in random forest
n_estimators = [int(x) for x in np.linspace(start = 200, stop = 2000, num = 10)]
# Number of features to consider at every split
max_features = ['auto', 'sqrt','log2']
# Maximum number of levels in tree
max_depth = [int(x) for x in np.linspace(10, 1000,10)]
# Minimum number of samples required to split a node
min_samples_split = [2, 5, 10,14]
# Minimum number of samples required at each leaf node
min_samples_leaf = [1, 2, 4,6,8]
# Create the random grid
param = {'n_estimators': n_estimators,
               'max_features': max_features,
               'max_depth': max_depth,
               'min_samples_split': min_samples_split,
               'min_samples_leaf': min_samples_leaf,
              'criterion':['entropy','gini']}
print(param)

{'n_estimators': [200, 400, 600, 800, 1000, 1200, 1400, 1600, 1800, 2000], 'max_features': ['auto', 'sqrt', 'log2'], 'max_depth': [10, 120, 230, 340, 450, 560, 670, 780, 890, 1000], 'min_samples_split': [2, 5, 10, 14], 'min_samples_leaf': [1, 2, 4, 6, 8], 'criterion': ['entropy', 'gini']}


In [38]:
pip install tpot

Collecting tpot
  Downloading TPOT-0.11.7-py3-none-any.whl (87 kB)
Collecting deap>=1.2
  Downloading deap-1.3.3-cp39-cp39-win_amd64.whl (114 kB)
Collecting update-checker>=0.16
  Downloading update_checker-0.18.0-py3-none-any.whl (7.0 kB)
Collecting xgboost>=1.1.0
  Downloading xgboost-1.7.5-py3-none-win_amd64.whl (70.9 MB)
Collecting stopit>=1.1.1
  Downloading stopit-1.1.2.tar.gz (18 kB)
Building wheels for collected packages: stopit
  Building wheel for stopit (setup.py): started
  Building wheel for stopit (setup.py): finished with status 'done'
  Created wheel for stopit: filename=stopit-1.1.2-py3-none-any.whl size=11956 sha256=c103ec0998b28381d282c58086f3ba78f256f91188842678a71894878f1061c9
  Stored in directory: c:\users\sugam arora\appdata\local\pip\cache\wheels\48\8c\93\3afb1916772591fe6bcc25cdf8b1c5bdc362f0ec8e2f0fd413
Successfully built stopit
Installing collected packages: xgboost, update-checker, stopit, deap, tpot
Successfully installed deap-1.3.3 stopit-1.1.2 tpot-0.11.

In [39]:
from tpot import TPOTClassifier


tpot_classifier = TPOTClassifier(generations= 5, population_size= 24, offspring_size= 12,
                                 verbosity= 2, early_stop= 12,
                                 config_dict={'sklearn.ensemble.RandomForestClassifier': param}, 
                                 cv = 4, scoring = 'accuracy')
tpot_classifier.fit(X_train,y_train)

Optimization Progress:   0%|          | 0/84 [00:00<?, ?pipeline/s]


Generation 1 - Current best internal CV score: 0.7638358373652492

Generation 2 - Current best internal CV score: 0.7638358373652492

Generation 3 - Current best internal CV score: 0.7638358373652492

Generation 4 - Current best internal CV score: 0.7638358373652492

Generation 5 - Current best internal CV score: 0.7638358373652492

Best pipeline: RandomForestClassifier(RandomForestClassifier(input_matrix, criterion=gini, max_depth=560, max_features=log2, min_samples_leaf=6, min_samples_split=10, n_estimators=2000), criterion=gini, max_depth=560, max_features=auto, min_samples_leaf=1, min_samples_split=5, n_estimators=400)


TPOTClassifier(config_dict={'sklearn.ensemble.RandomForestClassifier': {'criterion': ['entropy',
                                                                                      'gini'],
                                                                        'max_depth': [10,
                                                                                      120,
                                                                                      230,
                                                                                      340,
                                                                                      450,
                                                                                      560,
                                                                                      670,
                                                                                      780,
                                                                                 

In [40]:
accuracy = tpot_classifier.score(X_test, y_test)
print(accuracy)

0.8246753246753247


In [41]:
pip install optuna

Collecting optuna
  Downloading optuna-3.1.1-py3-none-any.whl (365 kB)
Collecting cmaes>=0.9.1
  Downloading cmaes-0.9.1-py3-none-any.whl (21 kB)
Collecting alembic>=1.5.0
  Downloading alembic-1.10.3-py3-none-any.whl (212 kB)
Collecting colorlog
  Downloading colorlog-6.7.0-py2.py3-none-any.whl (11 kB)
Collecting Mako
  Downloading Mako-1.2.4-py3-none-any.whl (78 kB)
Installing collected packages: Mako, colorlog, cmaes, alembic, optuna
Successfully installed Mako-1.2.4 alembic-1.10.3 cmaes-0.9.1 colorlog-6.7.0 optuna-3.1.1
Note: you may need to restart the kernel to use updated packages.


In [42]:
import optuna
import sklearn.svm
def objective(trial):

    classifier = trial.suggest_categorical('classifier', ['RandomForest', 'SVC'])
    
    if classifier == 'RandomForest':
        n_estimators = trial.suggest_int('n_estimators', 200, 2000,10)
        max_depth = int(trial.suggest_float('max_depth', 10, 100, log=True))

        clf = sklearn.ensemble.RandomForestClassifier(
            n_estimators=n_estimators, max_depth=max_depth)
    else:
        c = trial.suggest_float('svc_c', 1e-10, 1e10, log=True)
        
        clf = sklearn.svm.SVC(C=c, gamma='auto')

    return sklearn.model_selection.cross_val_score(
        clf,X_train,y_train, n_jobs=-1, cv=3).mean()

In [43]:
study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=100)

trial = study.best_trial

print('Accuracy: {}'.format(trial.value))
print("Best hyperparameters: {}".format(trial.params))

[32m[I 2023-04-20 19:12:34,850][0m A new study created in memory with name: no-name-31bf85e6-e496-4dc7-af60-88c8fbbb9a3f[0m
[32m[I 2023-04-20 19:12:38,735][0m Trial 0 finished with value: 0.640068547744301 and parameters: {'classifier': 'SVC', 'svc_c': 12.111658358752335}. Best is trial 0 with value: 0.640068547744301.[0m
[32m[I 2023-04-20 19:12:41,245][0m Trial 1 finished with value: 0.7508130081300813 and parameters: {'classifier': 'RandomForest', 'n_estimators': 970, 'max_depth': 24.828179974652866}. Best is trial 1 with value: 0.7508130081300813.[0m
[32m[I 2023-04-20 19:12:44,598][0m Trial 2 finished with value: 0.7524469950581859 and parameters: {'classifier': 'RandomForest', 'n_estimators': 1640, 'max_depth': 24.12377517033149}. Best is trial 2 with value: 0.7524469950581859.[0m
[32m[I 2023-04-20 19:12:46,764][0m Trial 3 finished with value: 0.7475609756097561 and parameters: {'classifier': 'RandomForest', 'n_estimators': 1280, 'max_depth': 67.33753565284957}. Best 

[32m[I 2023-04-20 19:13:44,814][0m Trial 35 finished with value: 0.7540650406504065 and parameters: {'classifier': 'RandomForest', 'n_estimators': 1800, 'max_depth': 18.383235739303654}. Best is trial 25 with value: 0.7589430894308943.[0m
[32m[I 2023-04-20 19:13:46,473][0m Trial 36 finished with value: 0.7540490993145226 and parameters: {'classifier': 'RandomForest', 'n_estimators': 1130, 'max_depth': 16.89341331359065}. Best is trial 25 with value: 0.7589430894308943.[0m
[32m[I 2023-04-20 19:13:46,510][0m Trial 37 finished with value: 0.640068547744301 and parameters: {'classifier': 'SVC', 'svc_c': 0.00024560755481018077}. Best is trial 25 with value: 0.7589430894308943.[0m
[32m[I 2023-04-20 19:13:48,877][0m Trial 38 finished with value: 0.7524310537223019 and parameters: {'classifier': 'RandomForest', 'n_estimators': 1570, 'max_depth': 24.075745541547096}. Best is trial 25 with value: 0.7589430894308943.[0m
[32m[I 2023-04-20 19:13:48,912][0m Trial 39 finished with value

[32m[I 2023-04-20 19:14:57,942][0m Trial 70 finished with value: 0.7524310537223019 and parameters: {'classifier': 'RandomForest', 'n_estimators': 1230, 'max_depth': 11.01282110860811}. Best is trial 25 with value: 0.7589430894308943.[0m
[32m[I 2023-04-20 19:15:00,046][0m Trial 71 finished with value: 0.7475370636059302 and parameters: {'classifier': 'RandomForest', 'n_estimators': 1400, 'max_depth': 16.86763119148031}. Best is trial 25 with value: 0.7589430894308943.[0m
[32m[I 2023-04-20 19:15:02,406][0m Trial 72 finished with value: 0.7507970667941973 and parameters: {'classifier': 'RandomForest', 'n_estimators': 1530, 'max_depth': 15.917449737134532}. Best is trial 25 with value: 0.7589430894308943.[0m
[32m[I 2023-04-20 19:15:04,562][0m Trial 73 finished with value: 0.7491710505340348 and parameters: {'classifier': 'RandomForest', 'n_estimators': 1400, 'max_depth': 20.33053970725848}. Best is trial 25 with value: 0.7589430894308943.[0m
[32m[I 2023-04-20 19:15:07,466][0

Accuracy: 0.7589430894308943
Best hyperparameters: {'classifier': 'RandomForest', 'n_estimators': 1770, 'max_depth': 19.330810275889245}


In [44]:
trial

FrozenTrial(number=25, state=TrialState.COMPLETE, values=[0.7589430894308943], datetime_start=datetime.datetime(2023, 4, 20, 19, 13, 20, 200114), datetime_complete=datetime.datetime(2023, 4, 20, 19, 13, 22, 799554), params={'classifier': 'RandomForest', 'n_estimators': 1770, 'max_depth': 19.330810275889245}, user_attrs={}, system_attrs={}, intermediate_values={}, distributions={'classifier': CategoricalDistribution(choices=('RandomForest', 'SVC')), 'n_estimators': IntDistribution(high=2000, log=False, low=200, step=10), 'max_depth': FloatDistribution(high=100.0, log=True, low=10.0, step=None)}, trial_id=25, value=None)

In [45]:
study.best_params


{'classifier': 'RandomForest',
 'n_estimators': 1770,
 'max_depth': 19.330810275889245}

In [46]:
rf=RandomForestClassifier(n_estimators=330,max_depth=30)
rf.fit(X_train,y_train)

RandomForestClassifier(max_depth=30, n_estimators=330)

In [47]:
y_pred=rf.predict(X_test)
print(confusion_matrix(y_test,y_pred))
print(accuracy_score(y_test,y_pred))
print(classification_report(y_test,y_pred))

[[95 12]
 [15 32]]
0.8246753246753247
              precision    recall  f1-score   support

           0       0.86      0.89      0.88       107
           1       0.73      0.68      0.70        47

    accuracy                           0.82       154
   macro avg       0.80      0.78      0.79       154
weighted avg       0.82      0.82      0.82       154

