In [23]:
import pandas as pd
import numpy as np
import keras
from keras.models import Sequential
from keras.layers import Dense
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error

In [5]:
concrete_data = pd.read_csv('concrete_data.csv')
concrete_data.describe()

Unnamed: 0,Cement,Blast Furnace Slag,Fly Ash,Water,Superplasticizer,Coarse Aggregate,Fine Aggregate,Age,Strength
count,1030.0,1030.0,1030.0,1030.0,1030.0,1030.0,1030.0,1030.0,1030.0
mean,281.167864,73.895825,54.18835,181.567282,6.20466,972.918932,773.580485,45.662136,35.817961
std,104.506364,86.279342,63.997004,21.354219,5.973841,77.753954,80.17598,63.169912,16.705742
min,102.0,0.0,0.0,121.8,0.0,801.0,594.0,1.0,2.33
25%,192.375,0.0,0.0,164.9,0.0,932.0,730.95,7.0,23.71
50%,272.9,22.0,0.0,185.0,6.4,968.0,779.5,28.0,34.445
75%,350.0,142.95,118.3,192.0,10.2,1029.4,824.0,56.0,46.135
max,540.0,359.4,200.1,247.0,32.2,1145.0,992.6,365.0,82.6


In [7]:
print(concrete_data.shape)
print(concrete_data.isnull().sum())

(1030, 9)
Cement                0
Blast Furnace Slag    0
Fly Ash               0
Water                 0
Superplasticizer      0
Coarse Aggregate      0
Fine Aggregate        0
Age                   0
Strength              0
dtype: int64


In [19]:
concrete_data_columns = concrete_data.columns
predictors = concrete_data[concrete_data_columns[concrete_data_columns != 'Strength']]
target = concrete_data['Strength']
n_cols = predictors.shape[1]

<b>FIRST TASK</b>

In [20]:
def regression_model():
    model = Sequential()
    model.add(Dense(10, activation='relu', input_shape=(n_cols,)))
    model.add(Dense(1))
    
    model.compile(optimizer='adam', loss='mean_squared_error')
    return model

In [26]:
MSE_list = list()
for i in range(0,50):
    X_train, X_test, y_train, y_test = train_test_split(predictors, target, test_size=0.3, random_state=42)
    model = regression_model()
    model.fit(X_train, y_train, validation_split=0.3, epochs=50, verbose=None)
    predition = model.predict(X_test)   
    MSE_list.append(mean_squared_error(y_test, predition))
print(MSE_list)

[227.80482083325325, 2333.4074088445705, 547.5996777693703, 2126.3784269511557, 525.3704402571547, 1387.9108681557004, 101.83875123391277, 237.65781327017064, 505.6752690360669, 324.84889577977884, 115.77875308342173, 495.00093668804226, 425.07255690429014, 177.81568219782562, 148.6635929434591, 1050.039260567705, 706.5531234152342, 761.5702497023066, 1298.323444837426, 358.206949931823, 490.7430414577923, 1891.719380395875, 1045.0845915063012, 549.0960610358802, 307.4546404571514, 1456.943379038083, 120.93280005221075, 112.78897712866095, 110.53568489324545, 108.8796534458692, 148.5197334362635, 161.79187944900679, 1122.6442527448917, 184.2216419538953, 116.45474971302109, 179.2454987547365, 119.71118533878636, 410.3882025651524, 165.07137602293486, 172.73897903521356, 1652.9698864257616, 272.76857155129215, 498.9344955300187, 211.05241721577292, 140.95710239491018, 240.35788951389324, 226.93885234267051, 141.65184430039076, 1103.963410701231, 1566.8533455290053]


In [32]:
print(f"Average mean squared error is : {np.average(MSE_list)}")

Average mean squared error is : 577.7386089266518


<b>SECOND TASK</b>

In [33]:
MSE_list_2 = list()
predictors_norm = (predictors - predictors.mean()) / predictors.std()
for i in range(0,50):
    X_train, X_test, y_train, y_test = train_test_split(predictors_norm, target, test_size=0.3, random_state=42)
    model = regression_model()
    model.fit(X_train, y_train, validation_split=0.3, epochs=50, verbose=None)
    predition = model.predict(X_test)   
    MSE_list_2.append(mean_squared_error(y_test, predition))
print(MSE_list_2)

[454.83119039856047, 789.569373405125, 764.924542413991, 522.9682292561955, 538.6696315459776, 707.9346372185004, 462.16817539496327, 675.1773696257821, 558.9001440139149, 542.7736852821569, 625.1629791373325, 560.591171957843, 496.0972472683416, 713.2532840951305, 871.9829290219045, 701.833696715101, 499.5301826141796, 545.923554734284, 553.158504522508, 557.1097218536694, 782.7901496460937, 590.253530489631, 745.3245337291843, 812.0000215516886, 645.9653223951318, 597.1660157641898, 542.3840407282919, 836.4178009412692, 630.6335945382309, 437.6833023238092, 555.5473737325719, 454.51216912262265, 892.7061184366553, 515.767375325599, 612.5911091861095, 742.9565402575533, 538.8716501603028, 855.5908325492327, 561.6727364606998, 487.5021123098637, 680.6994579832033, 508.50345131785184, 702.6254243957367, 1013.3597950485877, 483.51600060246915, 599.1175878828024, 917.5712743741348, 528.5832611767672, 527.2491846908894, 589.8187467458563]


In [43]:
print(f"Average mean squared error for standardized data is : {np.average(MSE_list_2)}")

Average mean squared error for standardized data is : 460.92834752297426


<b>THIRD TASK</b>

In [37]:
MSE_list_3 = list()
for i in range(0,50):
    X_train, X_test, y_train, y_test = train_test_split(predictors_norm, target, test_size=0.3, random_state=42)
    model = regression_model()
    model.fit(X_train, y_train, validation_split=0.3, epochs=100, verbose=None)
    predition = model.predict(X_test)   
    MSE_list_3.append(mean_squared_error(y_test, predition))
print(MSE_list_3)

[302.53859980135496, 197.07302596682422, 184.19558203330146, 262.3704452218192, 178.46480830620163, 209.91851874695394, 249.15549343622496, 182.1620006921027, 226.12415600330468, 179.2491777512071, 212.31977634275736, 230.0688165237667, 172.466245293051, 195.11422633599352, 227.39410552616627, 236.75333557967178, 204.30501504321393, 197.70918436217286, 178.39943365707887, 193.28491734133712, 214.1962648101789, 219.08772955654715, 185.49344406184076, 192.9740829494499, 179.135348428374, 165.64507357587564, 235.30219314062208, 323.4848404051216, 175.8824681511732, 183.00795878229073, 176.47222701108265, 187.07654714292119, 203.0211717137979, 193.6338676621514, 201.9350325337595, 188.26824375754754, 473.7942785590258, 216.10724718732567, 298.27802818513214, 173.8367905874648, 214.37609362014214, 196.53423916139187, 178.29121322260409, 161.21985638589604, 497.45639291119124, 226.27128416870033, 168.71856839015484, 196.9762978888651, 191.19835005427973, 199.7449278376951]


In [38]:
print(f"Average mean squared error for standardized data and 100 epochs is : {np.average(MSE_list_3)}")

Average mean squared error for standardized data and 100 epochs is : 216.72973851614213


<b> FOURTH TASK </b>

In [40]:
def regression_model_2():
    model = Sequential()
    model.add(Dense(10, activation='relu', input_shape=(n_cols,)))
    model.add(Dense(10, activation='relu'))
    model.add(Dense(10, activation='relu'))
    model.add(Dense(1))
    
    model.compile(optimizer='adam', loss='mean_squared_error')
    return model

In [41]:
MSE_list_4 = list()
for i in range(0,50):
    X_train, X_test, y_train, y_test = train_test_split(predictors_norm, target, test_size=0.3, random_state=42)
    model = regression_model_2()
    model.fit(X_train, y_train, validation_split=0.3, epochs=50, verbose=None)
    predition = model.predict(X_test)   
    MSE_list_4.append(mean_squared_error(y_test, predition))
print(MSE_list_4)

[125.19314219133616, 139.38007159230685, 139.65537663093826, 150.67187543877188, 142.32998195292356, 157.6256820794507, 143.44726196865136, 136.06814162734443, 146.23366485431208, 149.7134573262652, 145.95296423336262, 153.09664392061765, 153.25444698523296, 139.04446940930112, 122.22285127237267, 129.66079307347746, 153.39391480845143, 145.69590508050683, 152.68205958231906, 150.050538571318, 136.95613001023423, 138.129188525634, 138.89402013756603, 144.31752337495556, 146.66430169752277, 163.97415683162296, 150.74958976937643, 145.8045384944239, 122.60301389486095, 156.21361005075656, 136.5491087419695, 156.30757960372324, 161.65113037218768, 143.84462871694205, 119.27124975753708, 138.394347612793, 142.21587534342888, 137.54395893728247, 140.36691807036232, 165.1839717944515, 131.96092519038018, 149.3056988864381, 168.6908864099617, 127.7683830240198, 158.05250225137513, 140.42701464713497, 143.918410358887, 143.45575335386766, 139.34231991895854, 137.211529879273]


In [42]:
print(f"Average mean squared error for standardized data and 3 hidden layers is : {np.average(MSE_list_4)}")

Average mean squared error for standardized data and 3 hidden layers is : 144.02283016514374
