# Random Forest Classification

## Importing the libraries

In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

## Importing the dataset

In [2]:
dataset = pd.read_excel('Research data.xlsx')
y = dataset.iloc[:, :-1].values
X = dataset.iloc[:, [-1]].values

## Splitting the dataset into the Training set and Test set

In [3]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.25, random_state = 0)

In [4]:
print(X_train)

[[188.13 ]
 [247.93 ]
 [154.39 ]
 ...
 [ 16.556]
 [137.73 ]
 [249.2  ]]


In [5]:
print(y_train)

[[0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 ...
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]]


In [6]:
print(X_test)

[[134.31 ]
 [126.88 ]
 [ 52.497]
 ...
 [ 99.893]
 [240.75 ]
 [121.61 ]]


In [7]:
print(y_test)

[[0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 1]
 ...
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]]


## Feature Scaling

In [8]:
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

In [9]:
print(X_train)

[[ 0.11024675]
 [ 0.76572709]
 [-0.25958447]
 ...
 [-1.77041186]
 [-0.44219823]
 [ 0.77964783]]


In [10]:
print(X_test)

[[-0.47968556]
 [-0.56112735]
 [-1.37645502]
 ...
 [-0.85693752]
 [ 0.6870256 ]
 [-0.61889293]]


## Training the Random Forest Classification model on the Training set

In [11]:
from sklearn.multioutput import MultiOutputClassifier
from sklearn.ensemble import RandomForestClassifier


RF = RandomForestClassifier(n_estimators = 10, criterion = 'entropy', random_state = 0)
classifier = MultiOutputClassifier(RF, n_jobs=-1)
classifier.fit(X_train,y_train)



MultiOutputClassifier(estimator=RandomForestClassifier(criterion='entropy',
                                                       n_estimators=10,
                                                       random_state=0),
                      n_jobs=-1)

## Predicting the Test set results

In [12]:
y_pred = classifier.predict(X_test)

In [13]:
X_test = sc.inverse_transform(X_test)

In [14]:
print(X_test[5000:5010])

[[155.06 ]
 [183.67 ]
 [135.1  ]
 [167.93 ]
 [130.78 ]
 [223.81 ]
 [156.01 ]
 [ 22.382]
 [189.78 ]
 [ 75.873]]


In [15]:
print(y_test[5000:5010])

[[0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]]


In [16]:
print(y_pred[5000:5010])

[[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]]


## Evaluation

In [17]:
classifier.score(X_test,y_test)

0.7616510150266709

In [21]:
from sklearn.metrics import accuracy_score, f1_score

In [22]:
accuracy_score(y_test.flatten(), y_pred.flatten())

0.9855878560712203

In [23]:
f1_score(y_test.flatten(), y_pred.flatten(), average = 'weighted')

0.9805340185282286

## Geometric Mean

In [24]:
from imblearn.metrics import geometric_mean_score

In [25]:
geometric_mean_score(y_test.flatten(), y_pred.flatten(), average='weighted')

0.16403547300555713

## Hamming Loss

In [26]:
np.sum(np.not_equal(y_test, y_pred))/float(y_test.size)

0.014412143928779621