Model Building

In [51]:
from sklearn.ensemble import RandomForestRegressor
regressor = RandomForestRegressor(n_estimators = 10, random_state = 0)
regressor.fit(x_train, y_train)
y_pred = regressor.predict(x_test)

In [52]:
from sklearn import metrics
print('MAE:',metrics.mean_absolute_error(y_test,y_pred))
print('MSE:',metrics.mean_squared_error(y_test,y_pred))
print('RMSE:',np.sqrt(metrics.mean_squared_error(y_test,y_pred)))

MAE: 0.17370689655172414
MSE: 0.11508620689655172
RMSE: 0.3392435804795011


In [59]:
#accuracy of the model
metrics.r2_score(y_test, y_pred)

0.794062302603145

In [60]:
#Predicting the test set result  
y_pred= regressor.predict(x_test)

In [61]:
y_pred

array([0.3, 2. , 1. , 0. , 1.1, 2. , 0. , 1.1, 0.8, 0. , 2. , 0.5, 1. ,
       0.3, 2. , 1.8, 1. , 1.1, 1.5, 1.7, 0. , 0. , 0. , 1.1, 1. , 1.1,
       1. , 1.5, 2. , 1. , 1. , 1. , 1. , 1.2, 0.4, 0.8, 0.4, 0.2, 0.8,
       0.6, 1.8, 0.7, 0.8, 1.8, 0.7, 0. , 0.1, 1.6, 0.1, 0.7, 0. , 0. ,
       0. , 0. , 2. , 0. , 0. , 1. , 0.6, 1. , 2. , 1. , 0.9, 2. , 0. ,
       0. , 0.3, 0. , 1. , 0. , 0.9, 1.1, 0.6, 0.6, 1. , 0. , 1. , 1.2,
       2.1, 1.8, 1.1, 0. , 0. , 0.6, 1.1, 0. , 0. , 1.2, 0.6, 0. , 1.1,
       0. , 1.1, 0.5, 1.7, 0.5, 0.6, 0. , 2. , 1.3, 1. , 0. , 1.1, 0. ,
       2.1, 1.5, 1.6, 0. , 0. , 1.7, 1.6, 1.4, 1. , 1. , 0.2, 0. , 0. ,
       0. , 0.9, 0. , 0.2, 1. , 1.5, 0. , 1. , 0. , 0.5, 0. , 1. , 1.1,
       2. , 1.3, 0.9, 1. , 1.4, 0.9, 0. , 1.2, 1.1, 1.6, 1.8, 1. , 1. ,
       0. , 0.9, 0.1, 1. , 0. , 1.5, 0. , 1.9, 0.7, 0. , 1. , 0. , 0. ,
       2. , 1. , 1. , 1.8, 0.4, 1.8, 0. , 0. , 0. , 2. , 0. , 0.4, 0.9,
       1.6, 1.3, 1.5, 0. , 1. , 0. , 0.9, 0. , 0. , 0. , 0.9, 0.

Hyperparameter tuning

In [63]:
from sklearn.model_selection import GridSearchCV

In [64]:
import numpy as np


In [65]:
max_features_range = np.arange(1,6,1)
print(max_features_range)

[1 2 3 4 5]


In [66]:
n_estimators_range = np.arange(10,210,10)
print(n_estimators_range)

[ 10  20  30  40  50  60  70  80  90 100 110 120 130 140 150 160 170 180
 190 200]


In [67]:
param_grid = dict(max_features=max_features_range, n_estimators=n_estimators_range)
print(param_grid)

{'max_features': array([1, 2, 3, 4, 5]), 'n_estimators': array([ 10,  20,  30,  40,  50,  60,  70,  80,  90, 100, 110, 120, 130,
       140, 150, 160, 170, 180, 190, 200])}


In [68]:
rf = RandomForestRegressor()

In [69]:
grid = GridSearchCV(estimator=rf, param_grid=param_grid, cv=2)
print(grid)

GridSearchCV(cv=2, estimator=RandomForestRegressor(),
             param_grid={'max_features': array([1, 2, 3, 4, 5]),
                         'n_estimators': array([ 10,  20,  30,  40,  50,  60,  70,  80,  90, 100, 110, 120, 130,
       140, 150, 160, 170, 180, 190, 200])})


In [70]:
grid.fit(x_train, y_train)

GridSearchCV(cv=2, estimator=RandomForestRegressor(),
             param_grid={'max_features': array([1, 2, 3, 4, 5]),
                         'n_estimators': array([ 10,  20,  30,  40,  50,  60,  70,  80,  90, 100, 110, 120, 130,
       140, 150, 160, 170, 180, 190, 200])})

In [71]:
print("Optimal parameters %s accuracy score of  %0.2f"
      % (grid.best_params_, grid.best_score_))

Optimal parameters {'max_features': 5, 'n_estimators': 190} accuracy score of  0.81


In [72]:
from sklearn import datasets
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import KFold, cross_val_score

X, y = datasets.load_iris(return_X_y=True)

clf = RandomForestRegressor(random_state=42)

k_folds = KFold(n_splits = 5)

scores = cross_val_score(clf, X, y, cv = k_folds)

print("Cross Validation Scores: ", scores)
print("Average CV Score: ", scores.mean())
print("Number of CV Scores used in Average: ", len(scores))

Cross Validation Scores:  [1.       0.984085 0.       0.77569  0.      ]
Average CV Score:  0.551955
Number of CV Scores used in Average:  5


In [77]:
import pickle 
pickle.dump(y_pred, open('wqi.pkl', 'wb')) 
model = pickle.load(open('wqi.pkl','rb'))

Training model on ibm 

In [78]:
from ibm_watson_machine_learning import APIClient

In [79]:
wml_credentials = {
    "apikey":"EGyk5k6320eQVTUSqS3piGoS3eAKWeZu5ywrJQHajo-3",
    "url":"https://us-south.ml.cloud.ibm.com"
}

In [80]:
wml_client = APIClient(wml_credentials)
wml_client.spaces.list()

Python 3.7 and 3.8 frameworks are deprecated and will be removed in a future release. Use Python 3.9 framework instead.
Note: 'limit' is not provided. Only first 50 records will be displayed if the number of records exceed 50
--  ----  -------
ID  NAME  CREATED
--  ----  -------


In [None]:
MODEL_NAME = 'Water-Quality'
DEPLOYMENT_NAME = 'water-quality-prediction'
DEPLOY_MODEL = regressor

In [None]:
software_spec_uid = wml_client.software_specifications.get_id_by_name('runtime-22.1-py3.9')

In [None]:
model_props = {
    wml_client.repository.ModelMetaNames.NAME: MODEL_NAME, 
    wml_client.repository.ModelMetaNames.TYPE: 'scikit-learn_1.0', 
    wml_client.repository.ModelMetaNames.SOFTWARE_SPEC_UID: software_spec_uid 

In [None]:
model_details = wml_client.repository.store_model(
    model=DEPLOY_MODEL, 
    meta_props=model_props, 
    training_data=x_train, 
    training_target=y_train
)