In [18]:
from sklearn.model_selection import GridSearchCV,RandomizedSearchCV

from sklearn.ensemble import RandomForestRegressor
import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression,LogisticRegression
from sklearn.model_selection import KFold,cross_val_score,cross_validate,cross_val_predict,StratifiedKFold
from tqdm import tqdm

<h1>Grid Search</h1>

In [12]:
cancer = pd.read_csv('../Cases/Wisconsin/BreastCancer.csv',index_col=0)

lr = LogisticRegression()
kfolds = StratifiedKFold(n_splits=5,shuffle=True,random_state=25)

X = cancer.drop('Class',axis = 1)
y = cancer['Class']

param = {'solver' : ['lbfgs','liblinear','newton-cg','sag','saga','newton-cholesky'],
         'C' : np.linspace(0.001,4,20)}

gcv = GridSearchCV(lr,param_grid=param,cv=kfolds,scoring='roc_auc')
gcv.fit(X,y)

print(gcv.best_params_)
pd.DataFrame(gcv.best_score_)




{'C': np.float64(0.2114736842105263), 'solver': 'lbfgs'}


ValueError: DataFrame constructor not properly called!

<h1>Randomised Search</h1>

In [6]:
df_cv = pd.DataFrame(gcv.cv_results_)           

In [15]:
cancer = pd.read_csv('../Cases/Wisconsin/BreastCancer.csv',index_col=0)

lr = LogisticRegression()
kfolds = StratifiedKFold(n_splits=5,shuffle=True,random_state=25)

X = cancer.drop('Class',axis = 1)
y = cancer['Class']

param = {'solver' : ['lbfgs','liblinear','newton-cg','sag','saga','newton-cholesky'],
         'C' : np.linspace(0.001,4,20)}

rgcv = RandomizedSearchCV(lr,param_distributions=param,cv=kfolds,scoring='roc_auc',n_iter=20,verbose=3)
rgcv.fit(X,y)

print(rgcv.best_params_)
print(rgcv.best_score_)
pd.DataFrame(rgcv.cv_results_)


Fitting 5 folds for each of 20 candidates, totalling 100 fits
[CV 1/5] END ..C=2.737157894736842, solver=saga;, score=0.988 total time=   0.0s
[CV 2/5] END ..C=2.737157894736842, solver=saga;, score=0.995 total time=   0.0s
[CV 3/5] END ..C=2.737157894736842, solver=saga;, score=0.994 total time=   0.0s
[CV 4/5] END ..C=2.737157894736842, solver=saga;, score=0.998 total time=   0.0s
[CV 5/5] END ..C=2.737157894736842, solver=saga;, score=0.993 total time=   0.0s
[CV 1/5] END C=1.2638421052631577, solver=newton-cholesky;, score=0.993 total time=   0.0s
[CV 2/5] END C=1.2638421052631577, solver=newton-cholesky;, score=0.997 total time=   0.0s
[CV 3/5] END C=1.2638421052631577, solver=newton-cholesky;, score=0.989 total time=   0.0s
[CV 4/5] END C=1.2638421052631577, solver=newton-cholesky;, score=0.999 total time=   0.0s
[CV 5/5] END C=1.2638421052631577, solver=newton-cholesky;, score=0.994 total time=   0.0s
[CV 1/5] END .C=1.0533684210526315, solver=saga;, score=0.989 total time=   0.



[CV 5/5] END .C=1.0533684210526315, solver=saga;, score=0.993 total time=   0.0s
[CV 1/5] END C=3.7895263157894736, solver=lbfgs;, score=0.993 total time=   0.0s
[CV 2/5] END C=3.7895263157894736, solver=lbfgs;, score=0.997 total time=   0.0s
[CV 3/5] END C=3.7895263157894736, solver=lbfgs;, score=0.989 total time=   0.0s
[CV 4/5] END C=3.7895263157894736, solver=lbfgs;, score=0.999 total time=   0.0s
[CV 5/5] END C=3.7895263157894736, solver=lbfgs;, score=0.995 total time=   0.0s
[CV 1/5] END C=3.368578947368421, solver=newton-cholesky;, score=0.993 total time=   0.0s
[CV 2/5] END C=3.368578947368421, solver=newton-cholesky;, score=0.997 total time=   0.0s
[CV 3/5] END C=3.368578947368421, solver=newton-cholesky;, score=0.989 total time=   0.0s
[CV 4/5] END C=3.368578947368421, solver=newton-cholesky;, score=0.999 total time=   0.0s
[CV 5/5] END C=3.368578947368421, solver=newton-cholesky;, score=0.995 total time=   0.0s
[CV 1/5] END C=2.5266842105263154, solver=newton-cg;, score=0.99



[CV 5/5] END ...C=2.947631578947368, solver=sag;, score=0.994 total time=   0.0s
[CV 1/5] END C=1.6847894736842104, solver=liblinear;, score=0.992 total time=   0.0s
[CV 2/5] END C=1.6847894736842104, solver=liblinear;, score=0.996 total time=   0.0s
[CV 3/5] END C=1.6847894736842104, solver=liblinear;, score=0.990 total time=   0.0s
[CV 4/5] END C=1.6847894736842104, solver=liblinear;, score=0.999 total time=   0.0s
[CV 5/5] END C=1.6847894736842104, solver=liblinear;, score=0.994 total time=   0.0s
[CV 1/5] END ..C=0.4219473684210526, solver=sag;, score=0.992 total time=   0.0s
[CV 2/5] END ..C=0.4219473684210526, solver=sag;, score=0.995 total time=   0.0s
[CV 3/5] END ..C=0.4219473684210526, solver=sag;, score=0.991 total time=   0.0s
[CV 4/5] END ..C=0.4219473684210526, solver=sag;, score=0.999 total time=   0.0s
[CV 5/5] END ..C=0.4219473684210526, solver=sag;, score=0.994 total time=   0.0s
[CV 1/5] END ..C=3.368578947368421, solver=saga;, score=0.988 total time=   0.0s
[CV 2/5]



[CV 1/5] END ...C=2.105736842105263, solver=sag;, score=0.992 total time=   0.0s
[CV 2/5] END ...C=2.105736842105263, solver=sag;, score=0.996 total time=   0.0s
[CV 3/5] END ...C=2.105736842105263, solver=sag;, score=0.990 total time=   0.0s
[CV 4/5] END ...C=2.105736842105263, solver=sag;, score=0.999 total time=   0.0s
[CV 5/5] END ...C=2.105736842105263, solver=sag;, score=0.994 total time=   0.0s
[CV 1/5] END .C=1.2638421052631577, solver=saga;, score=0.989 total time=   0.0s
[CV 2/5] END .C=1.2638421052631577, solver=saga;, score=0.995 total time=   0.0s
[CV 3/5] END .C=1.2638421052631577, solver=saga;, score=0.994 total time=   0.0s
[CV 4/5] END .C=1.2638421052631577, solver=saga;, score=0.998 total time=   0.0s
[CV 5/5] END .C=1.2638421052631577, solver=saga;, score=0.993 total time=   0.0s
[CV 1/5] END C=3.1581052631578945, solver=liblinear;, score=0.993 total time=   0.0s
[CV 2/5] END C=3.1581052631578945, solver=liblinear;, score=0.996 total time=   0.0s
[CV 3/5] END C=3.158



Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_solver,param_C,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,mean_test_score,std_test_score,rank_test_score
0,0.014015,0.000862,0.003483,0.000494,saga,2.737158,"{'solver': 'saga', 'C': 2.737157894736842}",0.988451,0.995018,0.993659,0.997982,0.993132,0.993648,0.003096,19
1,0.00655,0.000594,0.003466,0.000346,newton-cholesky,1.263842,"{'solver': 'newton-cholesky', 'C': 1.263842105...",0.99298,0.99683,0.98913,0.998879,0.994277,0.994419,0.003339,10
2,0.013852,0.003641,0.003284,0.000665,saga,1.053368,"{'solver': 'saga', 'C': 1.0533684210526315}",0.988678,0.995018,0.993659,0.997982,0.993132,0.993694,0.003021,16
3,0.013376,0.001452,0.003224,0.000257,lbfgs,3.789526,"{'solver': 'lbfgs', 'C': 3.7895263157894736}",0.992754,0.99683,0.98913,0.998879,0.994505,0.99442,0.00336,5
4,0.005565,0.000306,0.00289,0.000171,newton-cholesky,3.368579,"{'solver': 'newton-cholesky', 'C': 3.368578947...",0.992754,0.99683,0.98913,0.998879,0.994505,0.99442,0.00336,5
5,0.009606,0.000598,0.00301,0.000356,newton-cg,2.526684,"{'solver': 'newton-cg', 'C': 2.5266842105263154}",0.992754,0.99683,0.98913,0.998879,0.994505,0.99442,0.00336,5
6,0.007108,0.001214,0.003202,0.000356,newton-cholesky,3.158105,"{'solver': 'newton-cholesky', 'C': 3.158105263...",0.992754,0.99683,0.98913,0.998879,0.994505,0.99442,0.00336,5
7,0.009333,0.000275,0.002781,0.00026,newton-cg,1.474316,"{'solver': 'newton-cg', 'C': 1.474315789473684}",0.99298,0.99683,0.98913,0.998879,0.994505,0.994465,0.003339,3
8,0.009438,0.000677,0.002703,7.1e-05,newton-cg,2.737158,"{'solver': 'newton-cg', 'C': 2.737157894736842}",0.992754,0.99683,0.98913,0.998879,0.994505,0.99442,0.00336,5
9,0.011305,0.000773,0.002983,0.000437,sag,2.947632,"{'solver': 'sag', 'C': 2.947631578947368}",0.991621,0.995697,0.990489,0.998654,0.994048,0.994102,0.002915,15


<h1>Concrete</h1>

In [22]:
concrete = pd.read_csv('../Cases/Concrete_Strength/Concrete_Data.csv')
tst_concrete = pd.read_csv('../Cases/Concrete_Strength/testConcrete.csv')
kfold = KFold(n_splits=5,shuffle=True,random_state=25)
y = concrete['Strength']
X = concrete.drop('Strength',axis = 1)

rf = RandomForestRegressor()
param = {'max_features':[3,4,5,6,7],
         'max_depth':[3,4,None],
         'min_samples_split':[2,5,10,20],
         'min_samples_leaf':[1,5,10,20]}

gcv = GridSearchCV(rf,param_grid=param,cv =kfold,scoring='r2')
gcv.fit(X,y)

print(gcv.best_params_,gcv.best_score_)

{'max_depth': None, 'max_features': 4, 'min_samples_leaf': 1, 'min_samples_split': 2} 0.9147951654150491


In [23]:

bm = gcv.best_estimator_
bm.predict()


NameError: name 'tst' is not defined