# Random Forest Regression

## Importing the libraries

In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

In [2]:
import pickle

## Importing the dataset

In [3]:
dataset = pd.read_csv('./test_data.csv')
x = dataset.iloc[:, 1:-1].values
y = dataset.iloc[:, -1].values

In [4]:
print(x)

[[1920  237   31    5   29]
 [1636  179   73   19    3]
 [1279  173   48    8   17]
 ...
 [1381  360   59   11   22]
 [2172   14   29    6   21]
 [ 497  237   22   18   18]]


In [5]:
print(y)

[2 4 5 ... 1 5 3]


## Splitting the Dataset into Training and Test Set

In [6]:

from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.20, random_state = 0)

In [7]:
print(x_train)

[[ 317  465   28    4   27]
 [ 990  177   91    3   20]
 [2213  109   13   19   13]
 ...
 [ 607  134   85    6   11]
 [2537  349   58   14   10]
 [ 448  203   83    2   18]]


In [8]:
print(y_train)

[4 2 2 ... 2 4 1]


In [9]:
print(x_test)

[[ 569  420   28    7   11]
 [2907  307   15    9   17]
 [2816  298   26    7   19]
 ...
 [1281   83   68    9   20]
 [ 774   65   54   18    2]
 [ 271  275   54    6    1]]


In [10]:
print(y_test)

[5 5 3 ... 2 3 5]


## Feature Scaling

In [11]:
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
x_train = sc.fit_transform(x_train)
x_test = sc.transform(x_test)

In [12]:
print(x_train)

[[-1.36001218  1.49268789 -0.77619947 -1.10903821  1.32613252]
 [-0.58283192 -0.50759181  1.40808044 -1.28192024  0.51585646]
 [ 0.82948821 -0.97988007 -1.29626611  1.48419221 -0.29441959]
 ...
 [-1.02512024 -0.80624468  1.20005378 -0.76327416 -0.52592704]
 [ 1.20364334  0.68701968  0.26393382  0.61978207 -0.64168076]
 [-1.2087334  -0.327011    1.13071156 -1.45480227  0.28434902]]


In [13]:
print(x_test)

[[-1.06900263  1.18014419 -0.77619947 -0.59039213 -0.52592704]
 [ 1.63091925  0.39531222 -1.22692389 -0.24462807  0.1685953 ]
 [ 1.52583247  0.33280348 -0.84554169 -0.59039213  0.40010274]
 ...
 [-0.24678519 -1.16046088  0.61064492 -0.24462807  0.51585646]
 [-0.83226868 -1.28547836  0.12524938  1.31131019 -1.56771053]
 [-1.41313297  0.17305892  0.12524938 -0.76327416 -1.68346426]]


## Training the Random Forest Regression model on the whole dataset

In [14]:
from sklearn.ensemble import RandomForestRegressor
regressor = RandomForestRegressor(n_estimators = 1000, random_state = 0)
regressor.fit(x_train, y_train)

RandomForestRegressor(n_estimators=1000, random_state=0)

In [15]:
import pickle
with open('score','wb') as f:
    pickle.dump(regressor,f)

In [17]:
arr = [[857,473,29,1,4],[2600,187,33,20,17],[1829,206,79,5,8]]
with open('score','rb') as f:
    regressor=pickle.load(f)
    res = regressor.predict(sc.transform(arr))
    print(res)

[3.564 1.6   2.755]


In [20]:
y_pred = regressor.predict(x_test)

In [21]:
from sklearn.metrics import r2_score
r2_score(y_test, y_pred)

0.16325039342944403