In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

data1 = pd.read_csv("Concrete_Data.csv",header=0, skipinitialspace=True)
data1.dropna(inplace=True)
data = data1[(data1 != 0).all(axis=1)]

zero_counts = (data == 0).sum()
print("Count of zero values in each column:")
print(zero_counts)

Count of zero values in each column:
Cement (component 1)(kg in a m^3 mixture)                0
Blast Furnace Slag (component 2)(kg in a m^3 mixture)    0
Fly Ash (component 3)(kg in a m^3 mixture)               0
Water  (component 4)(kg in a m^3 mixture)                0
Superplasticizer (component 5)(kg in a m^3 mixture)      0
Coarse Aggregate  (component 6)(kg in a m^3 mixture)     0
Fine Aggregate (component 7)(kg in a m^3 mixture)        0
Age (day)                                                0
Concrete compressive strength(MPa, megapascals)          0
dtype: int64


In [17]:
data.describe()

Unnamed: 0,Cement (component 1)(kg in a m^3 mixture),Blast Furnace Slag (component 2)(kg in a m^3 mixture),Fly Ash (component 3)(kg in a m^3 mixture),Water (component 4)(kg in a m^3 mixture),Superplasticizer (component 5)(kg in a m^3 mixture),Coarse Aggregate (component 6)(kg in a m^3 mixture),Fine Aggregate (component 7)(kg in a m^3 mixture),Age (day),"Concrete compressive strength(MPa, megapascals)"
count,225.0,225.0,225.0,225.0,225.0,225.0,225.0,225.0,225.0
mean,250.212,86.445778,117.048,176.358222,8.820444,946.156,755.316889,31.071111,38.302044
std,106.433393,58.340748,38.543678,21.302834,3.466515,78.637377,58.366609,23.754404,14.220164
min,132.0,11.0,24.5,121.8,1.7,814.0,612.0,3.0,7.32
25%,167.0,24.0,94.0,162.0,6.5,879.6,712.0,14.0,28.63
50%,213.8,97.0,122.0,175.1,8.4,942.0,764.4,28.0,36.44
75%,314.0,129.9,141.0,190.6,10.9,1006.3,793.5,28.0,48.67
max,491.0,214.0,195.0,247.0,22.1,1080.8,880.0,100.0,76.24


In [2]:
data.head(5)


Unnamed: 0,Cement (component 1)(kg in a m^3 mixture),Blast Furnace Slag (component 2)(kg in a m^3 mixture),Fly Ash (component 3)(kg in a m^3 mixture),Water (component 4)(kg in a m^3 mixture),Superplasticizer (component 5)(kg in a m^3 mixture),Coarse Aggregate (component 6)(kg in a m^3 mixture),Fine Aggregate (component 7)(kg in a m^3 mixture),Age (day),"Concrete compressive strength(MPa, megapascals)"
224,168.0,42.1,163.8,121.8,5.7,1058.7,780.1,3,7.75
225,168.0,42.1,163.8,121.8,5.7,1058.7,780.1,14,17.82
226,168.0,42.1,163.8,121.8,5.7,1058.7,780.1,28,24.24
227,168.0,42.1,163.8,121.8,5.7,1058.7,780.1,56,32.85
228,168.0,42.1,163.8,121.8,5.7,1058.7,780.1,100,39.23


In [3]:
Y = data['Concrete compressive strength(MPa, megapascals) ']
X = data.drop(['Concrete compressive strength(MPa, megapascals) '],axis = 1)
print(X.shape)
print(Y.shape)

(225, 8)
(225,)


In [18]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler


X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.2, random_state=42)

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)


lin_reg = LinearRegression(normalize=True)
lin_reg.fit(X_train_scaled,y_train)


LinearRegression(normalize=True)

In [5]:
Y_pred = lin_reg.predict(X_test_scaled)

r2_value = r2_score(y_test, Y_pred)


print("Coefficients: ", lin_reg.coef_)
print("Intercept: ", lin_reg.intercept_)


print('R-squared Coefficient of determination: %.2f'
      %r2_value)

Coefficients:  [11.55046312  7.22277124  1.50555734 -1.16393692  0.98132138  1.86181695
  2.95299484  8.98746715]
Intercept:  37.919388888888896
R-squared Coefficient of determination: 0.71


In [22]:

df = pd.DataFrame({'Actual':y_test,'Predicted':Y_pred,'Diff(%)':((y_test - Y_pred) / y_test) * 100})
print(df)

      Actual  Predicted     Diff(%)
233    53.90  61.508215  -14.115427
930    30.88  35.615348  -15.334677
509    72.10  46.057623   36.119802
987    33.76  40.962395  -21.334108
838    27.68  35.012469  -26.490133
1001   53.52  39.652449   25.910970
928    24.58  29.180117  -18.714878
475    35.36  40.015282  -13.165390
924    26.97  34.563351  -28.154806
920    29.07  34.738413  -19.499185
446    53.72  44.839937   16.530274
289     7.32  19.228637 -162.686299
970    46.23  46.012631    0.470190
354    30.45  34.289328  -12.608632
359    27.42  28.118024   -2.545674
400    67.87  49.392448   27.224918
426    29.55  27.323848    7.533511
922    33.80  35.260366   -4.320608
490    34.77  39.777308  -14.401230
889    33.72  35.921351   -6.528325
463    56.06  56.370021   -0.553017
292    43.50  39.078865   10.163529
894    29.72  30.433639   -2.401207
848    33.40  36.543893   -9.412852
485    50.24  49.895959    0.684794
471    57.03  49.378597   13.416453
948    26.23  34.773272  -32