In [1]:
import pandas as pd 
import numpy as np 
import matplotlib.pyplot as plt 
from sklearn import datasets

In [2]:
df = datasets.load_iris()
df.keys()

dict_keys(['data', 'target', 'frame', 'target_names', 'DESCR', 'feature_names', 'filename'])

In [3]:
data = df['data']
target = df['target']
featureNames = df['feature_names']

In [4]:
data = pd.DataFrame(data, columns = [featureNames[0],featureNames[1],featureNames[2],featureNames[3]])
data.head()

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm)
0,5.1,3.5,1.4,0.2
1,4.9,3.0,1.4,0.2
2,4.7,3.2,1.3,0.2
3,4.6,3.1,1.5,0.2
4,5.0,3.6,1.4,0.2


In [5]:
target = pd.DataFrame(target, columns = ['Target'])
target

Unnamed: 0,Target
0,0
1,0
2,0
3,0
4,0
...,...
145,2
146,2
147,2
148,2


In [6]:
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor

x_train, x_test, y_train, y_test = train_test_split(data,target, test_size = 0.2)

In [7]:
rf_reg = RandomForestRegressor()

In [8]:
rf_reg.fit(x_train, y_train.values.ravel())

RandomForestRegressor()

In [9]:
from sklearn.metrics import accuracy_score,mean_squared_error, r2_score
y_test_pred = rf_reg.predict(x_test)
mean_squared_error(y_test, y_test_pred),r2_score(y_test,y_test_pred),rf_reg.score(x_train, y_train)

(0.016723333333333326, 0.9812798507462687, 0.9911506276150628)

In [10]:
from sklearn.model_selection import GridSearchCV
param_grid = [{
    'n_estimators': [3,10,30,40,50,60,80,100], 'max_features':[2,4]},
    {'bootstrap':[False], 'n_estimators':[3,10],'max_features':[2,3,4]},
]
rf_reg = RandomForestRegressor()
grid_search = GridSearchCV(rf_reg, param_grid, cv = 5, scoring = 'neg_mean_squared_error', return_train_score = True )
grid_search.fit(x_train, y_train.values.ravel())

GridSearchCV(cv=5, estimator=RandomForestRegressor(),
             param_grid=[{'max_features': [2, 4],
                          'n_estimators': [3, 10, 30, 40, 50, 60, 80, 100]},
                         {'bootstrap': [False], 'max_features': [2, 3, 4],
                          'n_estimators': [3, 10]}],
             return_train_score=True, scoring='neg_mean_squared_error')

In [11]:
grid_search.best_estimator_, grid_search.best_params_, grid_search.best_score_

(RandomForestRegressor(max_features=4, n_estimators=30),
 {'max_features': 4, 'n_estimators': 30},
 -0.038907407407407404)

In [12]:
y_test_pred = grid_search.predict(x_test)
mean_squared_error(y_test, y_test_pred),r2_score(y_test,y_test_pred),grid_search.score(x_train, y_train)

(0.01648148148148148, 0.9815505804311775, -0.0060833333333333356)

In [13]:
x_train

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm)
18,5.7,3.8,1.7,0.3
48,5.3,3.7,1.5,0.2
73,6.1,2.8,4.7,1.2
90,5.5,2.6,4.4,1.2
47,4.6,3.2,1.4,0.2
...,...,...,...,...
22,4.6,3.6,1.0,0.2
45,4.8,3.0,1.4,0.3
71,6.1,2.8,4.0,1.3
34,4.9,3.1,1.5,0.2


In [14]:
from sklearn.model_selection import cross_val_score, RepeatedStratifiedKFold
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from sklearn.pipeline import Pipeline

X = data.astype('float32')
y = target.astype('float32')
trans = MinMaxScaler()
model = RandomForestClassifier()
pipeline = Pipeline(steps=[('t', trans), ('m', model)])
cv = RepeatedStratifiedKFold(n_splits=10, n_repeats=3, random_state=1)
n_scores = cross_val_score(pipeline, X, y, scoring='accuracy', cv=cv, n_jobs=-1, error_score='raise')
print('Accuracy: %.3f (%.3f)' % (np.mean(n_scores), np.std(n_scores)))

Accuracy: 0.949 (0.048)


In [15]:
trans_std = StandardScaler()
model = RandomForestClassifier()
pipeline = Pipeline(steps=[('t', trans_std), ('m', model)])
n_scores1 = cross_val_score(pipeline, X, y, scoring='accuracy', cv=cv, n_jobs=-1, error_score='raise')
print('Accuracy: %.3f (%.3f)' % (np.mean(n_scores1), np.std(n_scores1)))

Accuracy: 0.958 (0.040)


In [16]:
from sklearn.preprocessing import Normalizer
scaled_x = trans.fit_transform(X)
model1 = RandomForestRegressor().fit(X,y.values.ravel())
model2 = RandomForestRegressor().fit(scaled_x,y.values.ravel())
pred_y = model1.predict(X)
pred_y_scaled = model2.predict(scaled_x)
print("Model One Score : ",model1.score(X, y))
print("Model Two Score : ",model2.score(scaled_x,y))
print("------------------- ")
print("Model One MSE Score : ",mean_squared_error(pred_y,y))
print("Model Two MSE Score : ", mean_squared_error(pred_y_scaled,y))
print("------------------- ")
print("Model One RMSE Score : ",np.sqrt(mean_squared_error(pred_y,y)))
print("Model Two RMSE Score : ", np.sqrt(mean_squared_error(pred_y_scaled,y)))
print("------------------- ")
print("Model One R2 Score : ",r2_score(pred_y,y))
print("Model Two R2 Score : ",r2_score(pred_y_scaled,y))
print("------------------- ")
print("MinMax Scaler RFClassifier Accuracy: %.3f (%.3f)" % (np.mean(n_scores), np.std(n_scores)))
print("Standard Scaler RFClassifier'Accuracy: %.3f (%.3f)" % (np.mean(n_scores1), np.std(n_scores1)))

Model One Score :  0.992081
Model Two Score :  0.993468
------------------- 
Model One MSE Score :  0.005279333333333335
Model Two MSE Score :  0.004354666666666668
------------------- 
Model One RMSE Score :  0.07265902100450662
Model Two RMSE Score :  0.06598989821682306
------------------- 
Model One R2 Score :  0.9918402787020352
Model Two R2 Score :  0.9933027749942925
------------------- 
MinMax Scaler RFClassifier Accuracy: 0.949 (0.048)
Standard Scaler RFClassifier'Accuracy: 0.958 (0.040)
