# Importing Libraries

In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

# below lines in order to be able to visualize the x and y axis instead having everything in black to be in white format
from jupyterthemes import jtplot

jtplot.style(theme="monokai", context="notebook", ticks=True, grid=False)

# Importing Data Set

In [2]:
mining_df = pd.read_csv("mining_data.csv")
X = mining_df.drop(columns="% Silica Concentrate")
y = mining_df["% Silica Concentrate"]

In [3]:
X.shape

(245700, 22)

In [4]:
y.shape

(245700,)

In [5]:
# converting data frame into numpy array
X = np.array(X)
y = np.array(y)

In [6]:
# reshapping y into 2 Dimenssional Array
y = y.reshape(-1, 1)
y.shape

(245700, 1)

# SPLITTING THE DATASET INTO THE TRAINING SET AND TEST SET

In [7]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

In [8]:
print(f"X_train : {X_train.shape} ")
print(f"X_test  : {X_test.shape}  ")
print(f"y_train : {y_train.shape} ")
print(f"y_test  : {y_test.shape}  ")

X_train : (196560, 22) 
X_test  : (49140, 22)  
y_train : (196560, 1) 
y_test  : (49140, 1)  


# FEATURE SCALING

In [9]:
from sklearn.preprocessing import StandardScaler, MinMaxScaler

SC = StandardScaler()
SC.fit_transform(X_train, y_train)

array([[-0.26830757,  0.04965069,  0.37448908, ...,  1.22579252,
         1.00941353,  0.62584319],
       [ 1.49871073, -1.23225136,  0.2126441 , ...,  1.38984732,
         1.10022018, -0.66231383],
       [-0.71393719,  0.82055398,  0.81470844, ..., -0.85782808,
        -0.53795809, -0.56391295],
       ...,
       [ 0.6171391 , -0.81522939,  0.32177039, ..., -0.98813275,
        -2.1103941 ,  0.92999138],
       [ 0.09207116, -0.04138932, -1.22635964, ..., -0.92846544,
        -1.09539787,  1.01050119],
       [-0.68099935,  0.37710104,  0.69638503, ...,  0.25243378,
         0.46295733,  0.66162533]])

# EVALUATE RANDOM FOREST

In [10]:
from sklearn.ensemble import RandomForestRegressor

RandomForest_model = RandomForestRegressor(n_estimators=100, max_depth=10)
RandomForest_model.fit(X_train, y_train.ravel())  # we rashaped the y earlier

accuracy_RandomForest = RandomForest_model.score(X_test, y_test)
accuracy_RandomForest

0.8934149656655873