# Importing Libraries

In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

# below lines in order to be able to visualize the x and y axis instead having everything in black to be in white format
from jupyterthemes import jtplot

jtplot.style(theme="monokai", context="notebook", ticks=True, grid=False)

# Importing Data Set

In [2]:
mining_df = pd.read_csv("mining_data.csv")
X = mining_df.drop(columns="% Silica Concentrate")
y = mining_df["% Silica Concentrate"]

In [3]:
X.shape

(245700, 22)

In [4]:
y.shape

(245700,)

In [5]:
# converting data frame into numpy array
X = np.array(X)
y = np.array(y)

In [6]:
# reshapping y into 2 Dimenssional Array
y = y.reshape(-1, 1)
y.shape

(245700, 1)

# SPLITTING THE DATASET INTO THE TRAINING SET AND TEST SET

In [7]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

In [8]:
print(f"X_train : {X_train.shape} ")
print(f"X_test  : {X_test.shape}  ")
print(f"y_train : {y_train.shape} ")
print(f"y_test  : {y_test.shape}  ")

X_train : (196560, 22) 
X_test  : (49140, 22)  
y_train : (196560, 1) 
y_test  : (49140, 1)  


# FEATURE SCALING

In [9]:
from sklearn.preprocessing import StandardScaler, MinMaxScaler

SC = StandardScaler()
SC.fit_transform(X_train, y_train)

array([[ 0.65606824, -0.4892553 ,  0.17200099, ...,  0.89382929,
         0.60865804, -0.54385344],
       [-0.01709948, -0.23941257, -0.0328783 , ...,  0.97166296,
         0.76684215, -0.15931918],
       [ 1.50189281, -1.2343745 , -0.79610592, ...,  0.81900471,
         1.80417406,  1.37881785],
       ...,
       [-1.40223422,  1.25376516,  1.5845373 , ..., -1.87169865,
        -0.42488048,  0.80648779],
       [-0.23049559,  0.18972906, -1.71470032, ..., -0.60772706,
        -0.19247641, -0.74953456],
       [ 0.12063801, -0.29966876, -0.54292083, ...,  0.40650619,
         0.73036507, -1.9057792 ]])

# EVALUATE RANDOM FOREST

In [10]:
from sklearn.ensemble import RandomForestRegressor

RandomForest_model = RandomForestRegressor(n_estimators=100, max_depth=10)
RandomForest_model.fit(X_train, y_train.ravel())  # we rashaped the y earlier

accuracy_RandomForest = RandomForest_model.score(X_test, y_test)
accuracy_RandomForest

0.8904087891375919