# Importing Libraries

In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

# below lines in order to be able to visualize the x and y axis instead having everything in black to be in white format
from jupyterthemes import jtplot

jtplot.style(theme="monokai", context="notebook", ticks=True, grid=False)

# Importing Data Set

In [2]:
mining_df = pd.read_csv("mining_data.csv")
X = mining_df.drop(columns="% Silica Concentrate")
y = mining_df["% Silica Concentrate"]

In [3]:
X.shape

(245700, 22)

In [4]:
y.shape

(245700,)

In [5]:
# converting data frame into numpy array
X = np.array(X)
y = np.array(y)

In [6]:
# reshapping y into 2 Dimenssional Array
y = y.reshape(-1, 1)
y.shape

(245700, 1)

# SPLITTING THE DATASET INTO THE TRAINING SET AND TEST SET

In [7]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

In [8]:
print(f"X_train : {X_train.shape} ")
print(f"X_test  : {X_test.shape}  ")
print(f"y_train : {y_train.shape} ")
print(f"y_test  : {y_test.shape}  ")

X_train : (196560, 22) 
X_test  : (49140, 22)  
y_train : (196560, 1) 
y_test  : (49140, 1)  


# FEATURE SCALING

In [9]:
from sklearn.preprocessing import StandardScaler, MinMaxScaler

SC = StandardScaler()
SC.fit_transform(X_train, y_train)

array([[ 1.5000449 , -1.23232096, -2.22564902, ..., -0.84292953,
         0.92890882,  1.2886592 ],
       [-0.21510754,  0.31979972, -0.68770967, ...,  0.22453003,
        -0.29151316,  0.5289419 ],
       [-0.80687394,  0.53586197,  2.25490488, ..., -1.01949267,
         0.03191082, -0.07883194],
       ...,
       [ 0.84619154, -0.93982852,  0.21841144, ...,  1.38060753,
         1.40946095, -0.70448148],
       [-0.47897715,  0.92683176,  0.80822447, ...,  0.06754677,
         1.03177485,  0.36806059],
       [-0.51584128,  0.65197706, -0.29135656, ..., -0.92359583,
        -0.87879099, -0.40059456]])

# EVALUATE LINEAR REGRESSION MODEL

In [10]:
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, accuracy_score

LinearRegression_model = LinearRegression()
LinearRegression_model.fit(X_train, y_train)

accuracy_LinearRegression = LinearRegression_model.score(X_test, y_test)
accuracy_LinearRegression

0.6772177587445032