In [31]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import RandomizedSearchCV

# using the previous demo grid (containing the hyperparameters with diff tunning)
grid = {"n_estimators":[10,100,200,500,1000,1200],
       "max_depth":[None,5,10,20,30],
       "max_features":["auto","sqrt"],
       "min_samples_split":[2,4,6],
       "min_samples_leaf":[1,2,4]}

#read the data 
heart_disease = pd.read_csv("../Data/heart-disease.csv")

# split the data into x and y
x = heart_disease.drop("target",axis= 1)
y = heart_disease["target"]

# now split into train and test
x_train,x_test,y_train,y_test = train_test_split(x,y,train_size=0.8)


# --------------- this below part will gonna save --------------------------------
# choosing the model
clf = RandomForestClassifier(n_jobs=-1)

# train the data
# clf.fit(x_train,y_train)

# find the score
# clf.score(x_test,y_test)

# we can also use hyperparameter for increasing the score
rs_clf = RandomizedSearchCV(estimator=clf, 
                            param_distributions=grid,
                            n_iter=10,
                            cv=5,
                            verbose=2                         
                         )

# traing(will see all the tuning based training of model) 
rs_clf.fit(x_train,y_train)



Fitting 5 folds for each of 10 candidates, totalling 50 fits
[CV] END max_depth=30, max_features=auto, min_samples_leaf=1, min_samples_split=2, n_estimators=1200; total time=   0.0s
[CV] END max_depth=30, max_features=auto, min_samples_leaf=1, min_samples_split=2, n_estimators=1200; total time=   0.0s
[CV] END max_depth=30, max_features=auto, min_samples_leaf=1, min_samples_split=2, n_estimators=1200; total time=   0.0s
[CV] END max_depth=30, max_features=auto, min_samples_leaf=1, min_samples_split=2, n_estimators=1200; total time=   0.0s
[CV] END max_depth=30, max_features=auto, min_samples_leaf=1, min_samples_split=2, n_estimators=1200; total time=   0.0s
[CV] END max_depth=5, max_features=sqrt, min_samples_leaf=2, min_samples_split=4, n_estimators=200; total time=   3.2s
[CV] END max_depth=5, max_features=sqrt, min_samples_leaf=2, min_samples_split=4, n_estimators=200; total time=   0.3s
[CV] END max_depth=5, max_features=sqrt, min_samples_leaf=2, min_samples_split=4, n_estimators=2

15 fits failed out of a total of 50.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
15 fits failed with the following error:
Traceback (most recent call last):
  File "C:\Users\Riya\Desktop\machine-learning\env\lib\site-packages\sklearn\model_selection\_validation.py", line 732, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\Riya\Desktop\machine-learning\env\lib\site-packages\sklearn\base.py", line 1144, in wrapper
    estimator._validate_params()
  File "C:\Users\Riya\Desktop\machine-learning\env\lib\site-packages\sklearn\base.py", line 637, in _validate_params
    validate_parameter_constraints(
  File "C:\Users\Riya\Desktop\machine-learning\env\lib\site-packages\sklearn\utils\_param_validation.py

In [32]:
# now let's check the best combination made by RandomizedSearchCV
rs_clf.best_params_

{'n_estimators': 200,
 'min_samples_split': 4,
 'min_samples_leaf': 2,
 'max_features': 'sqrt',
 'max_depth': 5}

In [33]:
# score finding
# takes the best tuning and for result
rs_clf.score(x_test,y_test)

0.7868852459016393

### Two ways to save and load machine learning models
* With python's pickle module
* With the joblib module(prefered)

### using joblib


In [34]:
from joblib import dump , load

# save model to file
dump(rs_clf , filename="saving-model-with-joblib-final.joblib")

['saving-model-with-joblib-final.joblib']

In [35]:
# import a saved joblib model
model = load(filename="saving-model-with-joblib-final.joblib")

In [36]:
model

In [37]:
model.score(x_test,y_test)

0.7868852459016393