In [3]:
import joblib
import pandas as pd

from sklearn.datasets import load_breast_cancer, load_iris, load_boston
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
from sklearn.model_selection import GridSearchCV

# Process Dataset Breast Cancer

In [30]:
data_breast_cancer = load_breast_cancer(as_frame=True)
data_breast_cancer = data_breast_cancer.frame

data_breast_cancer.to_csv('data_breast_cancer.csv', index=False)

In [21]:
X_breast_cancer = data_breast_cancer.loc[:, data_breast_cancer.columns != 'target']
y_breast_cancer = data_breast_cancer.target

In [35]:
grid = {'max_depth':[2,5], 'bootstrap': [True], 'oob_score': [True], 'max_samples': [0.8, 0.9]}
classifier = RandomForestClassifier(oob_score=True, random_state=42)
grid_classifier = GridSearchCV(classifier, grid, cv=5)
grid_classifier.fit(X_breast_cancer, y_breast_cancer)
rf = grid_classifier.best_estimator_

print('Parameters of best prediction model:')
print(grid_classifier.best_params_)
print('OOB accuracy of prediction model:')
print(rf.oob_score_)

Parameters of best prediction model:
{'bootstrap': True, 'max_depth': 5, 'max_samples': 0.9, 'oob_score': True}
OOB accuracy of prediction model:
0.9595782073813708


In [36]:
filename_model = 'random_forest_breat_cancer.joblib'
joblib.dump(rf, open(filename_model, 'wb'))

# Process Dataset Iris

In [31]:
data_iris = load_iris(as_frame=True)
data_iris = data_iris.frame

data_iris.to_csv('data_iris.csv', index=False)

In [32]:
X_iris = data_iris.loc[:, data_iris.columns != 'target']
y_iris = data_iris.target

In [33]:
grid = {'n_estimators': [500],'max_features': [2],'max_depth': [2], 'bootstrap': [True], 'oob_score': [True]}
classifier = RandomForestClassifier(oob_score=True, random_state=42)
grid_classifier = GridSearchCV(classifier, grid, cv=5)
grid_classifier.fit(X_iris, y_iris)
rf = grid_classifier.best_estimator_

print('Parameters of best prediction model:')
print(grid_classifier.best_params_)
print('OOB accuracy of prediction model:')
print(rf.oob_score_)

Parameters of best prediction model:
{'bootstrap': True, 'max_depth': 2, 'max_features': 2, 'n_estimators': 500, 'oob_score': True}
OOB accuracy of prediction model:
0.94


In [34]:
filename_model = 'random_forest_iris.joblib'
joblib.dump(rf, open(filename_model, 'wb'))

# Process Dataset Boston

In [17]:
data = load_boston()

data_boston = pd.DataFrame(columns=data['feature_names'], index=range(data['data'].shape[0]))
data_boston.loc[:,:] = data['data']
data_boston['target'] = data['target']

data_boston.to_csv('data_boston.csv', index=False)

In [18]:
X_boston = data_boston.loc[:, data_boston.columns != 'target']
y_boston = data_boston.target

In [21]:
grid = {'n_estimators': [500],'max_features': [2],'max_depth': [2], 'bootstrap': [True], 'oob_score': [True]}
classifier = RandomForestRegressor(oob_score=True, random_state=42)
grid_classifier = GridSearchCV(classifier, grid, cv=5)
grid_classifier.fit(X_boston, y_boston)
rf = grid_classifier.best_estimator_

print('Parameters of best prediction model:')
print(grid_classifier.best_params_)
print('OOB MSE of prediction model:')
print(rf.oob_score_)

Parameters of best prediction model:
{'bootstrap': True, 'max_depth': 2, 'max_features': 2, 'n_estimators': 500, 'oob_score': True}
OOB MSE of prediction model:
0.5714019899313769


In [22]:
filename_model = 'random_forest_boston.joblib'
joblib.dump(rf, open(filename_model, 'wb'))