In [15]:
# Importing the libraries 

import pandas as pd
import numpy as np 
import matplotlib.pyplot as plt 
import seaborn as sns
%matplotlib inline

# Ignore harmless warnings 

import warnings 
warnings.filterwarnings("ignore")

# Set to display all the columns in dataset

pd.set_option("display.max_columns", None)

# Import psql to run queries 

import pandasql as psql

In [2]:
# load the Concrete dataset 

concrete = pd.read_csv(r"E:\win7\files\13,14-08-22\Concrete_Data_V1.0.csv", header=0)

# copy the file to back-up file

concrete_bk = concrete.copy()

# display first 5 records

concrete.head()

Unnamed: 0,Cement,Slag,Flyash,Water,SuperPlasticizer,CoarseAggregate,FineAggregate,Age,CSinMPa
0,540.0,0.0,0.0,162.0,2.5,1040.0,676.0,28,79.99
1,540.0,0.0,0.0,162.0,2.5,1055.0,676.0,28,61.89
2,332.5,142.5,0.0,228.0,0.0,932.0,594.0,270,40.27
3,332.5,142.5,0.0,228.0,0.0,932.0,594.0,365,41.05
4,198.6,132.4,0.0,192.0,0.0,978.4,825.5,360,44.3


In [3]:
# Display concrete data information

concrete.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1030 entries, 0 to 1029
Data columns (total 9 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   Cement            1030 non-null   float64
 1   Slag              1030 non-null   float64
 2   Flyash            1030 non-null   float64
 3   Water             1030 non-null   float64
 4   SuperPlasticizer  1030 non-null   float64
 5   CoarseAggregate   1030 non-null   float64
 6   FineAggregate     1030 non-null   float64
 7   Age               1030 non-null   int64  
 8   CSinMPa           1030 non-null   float64
dtypes: float64(8), int64(1)
memory usage: 72.5 KB


In [4]:
# Display the columns in concrete dataset

concrete.columns

Index(['Cement', 'Slag', 'Flyash', 'Water', 'SuperPlasticizer',
       'CoarseAggregate', 'FineAggregate', 'Age', 'CSinMPa'],
      dtype='object')

In [5]:
# Prepare cols1 for scaling

cols1 = ['Cement', 'Slag', 'Flyash', 'Water', 'SuperPlasticizer', 'CoarseAggregate', 'FineAggregate', 'Age']

#cols1 = ['Cement', 'Slag', 'Flyash', 'Water', 'SuperPlasticizer', 'CoarseAggregate', 'FineAggregate', 'Age']

In [6]:
# Identify the independent and Target variables

IndepVar = []
for col in concrete.columns:
    if col != 'CSinMPa':
        IndepVar.append(col)

TargetVar = 'CSinMPa'

x = concrete[IndepVar]
y = concrete[TargetVar]

In [7]:
# Split the data into train and test

from sklearn.model_selection import train_test_split 

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.3, random_state=143)

# Display the shape of the train_data and test_data

x_train.shape, x_test.shape, y_train.shape, y_test.shape

((721, 8), (309, 8), (721,), (309,))

In [8]:
# Scaling the features by using MinMaxScaler

from sklearn.preprocessing import MinMaxScaler

mmscaler = MinMaxScaler(feature_range=(0, 1))

x_train[cols1] = mmscaler.fit_transform(x_train[cols1])
x_train = pd.DataFrame(x_train)

x_test[cols1] = mmscaler.fit_transform(x_test[cols1])
x_test = pd.DataFrame(x_test)

In [10]:
#train the algorithm and build the model with train dataset

from sklearn.ensemble import RandomForestRegressor

# Create model object

ModelRFR = RandomForestRegressor()



ModelRFR.fit(x_train, y_train)

y_pred=ModelRFR.predict(x_test)

# Evaluation metrics for Regression analysis

from sklearn import metrics

print('Mean Absolute Error (MAE):',  round(metrics.mean_absolute_error(y_test, y_pred),3))  
print('Mean Squared Error (MSE):', round(metrics.mean_squared_error(y_test, y_pred),3))  
print('Root Mean Squared Error (RMSE):', round(np.sqrt(metrics.mean_squared_error(y_test, y_pred)),3))
print('R2_score:', round(metrics.r2_score(y_test, y_pred),6))
print('Root Mean Squared Log Error (RMSLE):', round(np.log(np.sqrt(metrics.mean_squared_error(y_test,y_pred))),3))

# Define the function to calculate the MAPE - Mean Absolute Percentage Error

def MAPE (y_test, y_pred): 
    y_test, y_pred = np.array(y_test), np.array(y_pred)
    return np.mean(np.abs((y_test - y_pred) / y_test)) * 100

# Evaluation of MAPE 

result = MAPE(y_test, y_pred)
print('Mean Absolute Percentage Error (MAPE):', round(result, 3), '%')
#Sruthi G21:55
#print('Root Mean Squared Log Error (RMSLE):', round(np.log(np.sqrt(metrics.mean_squared_error(y_test,y_pred))),3))

# Calculate Adjusted R squared values 

r_squared = round(metrics.r2_score(y_test, y_pred),6)
adjusted_r_squared = round(1 - (1-r_squared)*(len(y)-1)/(len(y)-x.shape[1]-1),6)
print('Adj R Square: ', adjusted_r_squared)

Mean Absolute Error (MAE): 4.777
Mean Squared Error (MSE): 41.61
Root Mean Squared Error (RMSE): 6.451
R2_score: 0.841178
Root Mean Squared Log Error (RMSLE): 1.864
Mean Absolute Percentage Error (MAPE): 14.883 %
Adj R Square:  0.839934


In [11]:
# Predict the values with ET algorithm

y_predF = ModelRFR.predict(x_test)

In [12]:
Results = pd.DataFrame({'CSinMPa_A':y_test, 'CSinMPa_P_F':y_predF})

# Merge two Dataframes on index of both the dataframes

ResultsFinal = concrete_bk.merge(Results, left_index=True, right_index=True)
ResultsFinal.sample(10)

Unnamed: 0,Cement,Slag,Flyash,Water,SuperPlasticizer,CoarseAggregate,FineAggregate,Age,CSinMPa,CSinMPa_A,CSinMPa_P_F
582,170.3,155.5,0.0,185.7,0.0,1026.6,724.3,28,25.73,25.73,27.6256
609,236.0,0.0,0.0,193.0,0.0,968.0,885.0,180,24.1,24.1,28.5712
570,295.8,0.0,0.0,185.7,0.0,1091.4,769.3,28,25.22,25.22,22.7578
659,108.3,162.4,0.0,203.5,0.0,938.2,849.0,90,29.23,29.23,32.2135
544,289.0,0.0,0.0,192.0,0.0,913.2,895.3,7,14.6,14.6,15.8866
918,145.0,0.0,179.0,202.0,8.0,824.0,869.0,28,10.54,10.54,10.6794
1007,155.6,243.5,0.0,180.3,10.7,1022.0,697.7,28,37.36,37.36,31.3458
541,333.0,0.0,0.0,192.0,0.0,931.2,842.6,3,15.62,15.62,11.697
921,155.0,0.0,143.0,193.0,9.0,877.0,868.0,28,9.74,9.74,10.9024
690,288.0,192.0,0.0,192.0,0.0,932.0,717.8,7,23.52,23.52,22.603


In [13]:
# Calculate the %of Error

ResultsFinal['%Error'] = round(((ResultsFinal['CSinMPa_A']-ResultsFinal['CSinMPa_P_F'])/ResultsFinal['CSinMPa_A'])*100,3)

In [14]:
# Display the results

ResultsFinal.sample(5)

Unnamed: 0,Cement,Slag,Flyash,Water,SuperPlasticizer,CoarseAggregate,FineAggregate,Age,CSinMPa,CSinMPa_A,CSinMPa_P_F,%Error
222,166.1,0.0,163.3,176.5,4.5,1058.6,780.1,56,28.63,28.63,24.605,14.059
657,108.3,162.4,0.0,203.5,0.0,938.2,849.0,28,20.59,20.59,29.3987,-42.781
277,251.4,0.0,118.3,188.5,5.8,1028.4,757.7,56,36.97,36.97,32.7319,11.464
832,147.0,115.0,89.0,202.0,9.0,860.0,829.0,28,19.99,19.99,28.5095,-42.619
383,451.0,0.0,0.0,165.0,11.3,1030.0,745.0,28,78.8,78.8,58.469789,25.8
