In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression, LogisticRegression, Ridge, Lasso
from sklearn.preprocessing import PolynomialFeatures, StandardScaler
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.svm import SVR
from sklearn.metrics import mean_squared_error
from sklearn.pipeline import make_pipeline
from sklearn.linear_model import SGDRegressor

In [4]:
# Load the data into a pandas DataFrame
data = pd.read_csv('Concrete_Data.csv')

In [5]:
data.head()

Unnamed: 0,Cement (component 1)(kg in a m^3 mixture),Blast Furnace Slag (component 2)(kg in a m^3 mixture),Fly Ash (component 3)(kg in a m^3 mixture),Water (component 4)(kg in a m^3 mixture),Superplasticizer (component 5)(kg in a m^3 mixture),Coarse Aggregate (component 6)(kg in a m^3 mixture),Fine Aggregate (component 7)(kg in a m^3 mixture),Age (day),"Concrete compressive strength(MPa, megapascals)"
0,540.0,0.0,0.0,162.0,2.5,1040.0,676.0,28,79.99
1,540.0,0.0,0.0,162.0,2.5,1055.0,676.0,28,61.89
2,332.5,142.5,0.0,228.0,0.0,932.0,594.0,270,40.27
3,332.5,142.5,0.0,228.0,0.0,932.0,594.0,365,41.05
4,198.6,132.4,0.0,192.0,0.0,978.4,825.5,360,44.3


In [6]:
data.columns

Index(['Cement (component 1)(kg in a m^3 mixture)',
       'Blast Furnace Slag (component 2)(kg in a m^3 mixture)',
       'Fly Ash (component 3)(kg in a m^3 mixture)',
       'Water  (component 4)(kg in a m^3 mixture)',
       'Superplasticizer (component 5)(kg in a m^3 mixture)',
       'Coarse Aggregate  (component 6)(kg in a m^3 mixture)',
       'Fine Aggregate (component 7)(kg in a m^3 mixture)', 'Age (day)',
       'Concrete compressive strength(MPa, megapascals) '],
      dtype='object')

In [7]:
# Split the data into features (X) and target variable (y)
X = data.drop('Concrete compressive strength(MPa, megapascals) ', axis=1)
y = data['Concrete compressive strength(MPa, megapascals) ']


In [8]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Linear Regression

In [9]:
linear_reg = LinearRegression()
linear_reg.fit(X_train, y_train)
linear_reg_predictions = linear_reg.predict(X_test)
linear_reg_mse = mean_squared_error(y_test, linear_reg_predictions)
print("Linear Regression Predictions:", linear_reg_predictions)
print("Linear Regression MSE:", linear_reg_mse)

Linear Regression Predictions: [59.65804162 52.0412113  63.53513339 51.57545676 17.22001733 39.86561321
 26.6982471  44.73648869 29.36619785 38.18612542 27.76438338 19.81055461
 66.8940356  52.28562112 29.96928612 44.45926401 29.2284856  26.53340084
 31.98162681 32.03208356 36.82327256 31.77175527 38.07286862 25.12353728
 33.0993284  33.85670933 14.24140345 40.33250901 41.88282591 21.33261142
 35.71648095 30.96693001 43.87446203 45.52648807 30.7629146  29.59299935
 28.90650584 38.85005738 20.34631763 38.45267413 21.16537625 15.88280149
 31.06077771 50.70684031 20.26957631 57.64501168 50.90245564 60.96378987
 20.00393916 19.22241454 40.4816289  36.23627397 30.01525382 33.42739866
 46.66492833 51.47376399 27.73605695 15.69651113 29.69654891 18.12447629
 38.63227644 20.52725493 31.56412397 55.48969297 22.93752175 21.34689286
 32.26933604 16.70816961 25.40482587 25.56327425 17.83478081 18.51545189
 13.0590327  27.71461739 28.60182091 19.97722078 60.05545831 44.65212951
 54.60286692 23.5104

# Polynomial Regression

In [10]:
poly_features = PolynomialFeatures(degree=2)
X_poly = poly_features.fit_transform(X_train)
poly_reg = LinearRegression()
poly_reg.fit(X_poly, y_train)
X_test_poly = poly_features.transform(X_test)
poly_reg_predictions = poly_reg.predict(X_test_poly)
poly_reg_mse = mean_squared_error(y_test, poly_reg_predictions)
print("Polynomial Regression Predictions:", poly_reg_predictions)
print("Polynomial Regression MSE:", poly_reg_mse)

Polynomial Regression Predictions: [38.32051572 49.51480123 63.19972932 47.95378188 16.12908377 43.39425269
 29.47721838 58.5734343  25.80900575 46.3237506  36.15004489  8.96543959
 43.8481675  43.30577896 28.73816612 32.55630044 32.15202739 22.25414428
 37.34998935 29.02613991 35.43208955 49.04427447 43.05669635 15.53692126
 30.36681015 29.99871182 10.73013848 44.79239078 55.62677304 21.438927
 39.54061096 33.15758329 43.97418029 63.20659602 24.79873173 31.26607408
 26.75404138 41.58089166 13.76772677 44.79050324 19.90703085  7.57849511
 38.00708217 46.33510481 14.57597492 66.50370076 43.79405246 31.97440978
 22.37154999 12.97751705 45.22875194 43.30443232 27.15867048 23.47443359
 45.15390917 47.38726146 20.75402822 14.75281173 34.05985295 19.37722861
 41.38081893 17.47345138 42.47223738 51.75187668 24.22808129 17.12020218
 37.59343422 16.47370325 28.82721689 22.30560887 12.31830386 20.36385586
 10.30762447 36.10164571 28.07828806 11.58602242 36.34300667 58.0856611
 61.81542061 13.718

# Ridge Regression

In [11]:
ridge_reg = Ridge(alpha=0.5)
ridge_reg.fit(X_train, y_train)
ridge_reg_predictions = ridge_reg.predict(X_test)
ridge_reg_mse = mean_squared_error(y_test, ridge_reg_predictions)
print("Ridge Regression Predictions:", ridge_reg_predictions)
print("Ridge Regression MSE:", ridge_reg_mse)

Ridge Regression Predictions: [59.65802105 52.04118748 63.53501943 51.57543298 17.22001451 39.86563789
 26.6982666  44.73647159 29.36617275 38.18618    27.76433362 19.81060007
 66.89382937 52.28558375 29.96933889 44.45935246 29.22848644 26.53342661
 31.98156807 32.03208589 36.82332746 31.77186926 38.07286066 25.1235639
 33.09935004 33.856665   14.24140213 40.33248547 41.88281008 21.33264181
 35.71652165 30.96698537 43.87450601 45.52647437 30.76291898 29.59299888
 28.90650244 38.85005268 20.34636696 38.45264574 21.16539176 15.88284787
 31.06080666 50.70680955 20.26955483 57.64500171 50.90238548 60.96382009
 20.00394293 19.22243131 40.48165517 36.236321   30.01525348 33.42745343
 46.66498372 51.47375469 27.73603202 15.69650974 29.69650448 18.12447243
 38.63227655 20.52732905 31.56410127 55.48960492 22.93753751 21.34682271
 32.26933103 16.70813515 25.40480896 25.56324793 17.83482579 18.51546361
 13.05904335 27.71456744 28.60184622 19.97721723 60.0554379  44.65209714
 54.6029464  23.510404

# Lasso Regression

In [12]:
lasso_reg = Lasso(alpha=0.5)
lasso_reg.fit(X_train, y_train)
lasso_reg_predictions = lasso_reg.predict(X_test)
lasso_reg_mse = mean_squared_error(y_test, lasso_reg_predictions)
print("Lasso Regression Predictions:", lasso_reg_predictions)
print("Lasso Regression MSE:", lasso_reg_mse)

Lasso Regression Predictions: [59.60737103 51.95278051 63.24206303 51.48804013 17.23801635 39.93807321
 26.75835187 44.65395124 29.2823025  38.34823055 27.69978983 19.98985511
 66.40471319 52.15629022 30.06076567 44.61955724 29.26020438 26.62564932
 31.8727102  32.00412061 36.93862732 31.98423588 38.02075622 25.20477895
 33.16256119 33.72913197 14.26216749 40.31029715 41.80956077 21.47061658
 35.82782784 31.14475461 43.96317296 45.46002302 30.74201066 29.65687033
 28.87190117 38.86108824 20.48109459 38.3629878  21.22734478 16.01595844
 31.11711877 50.57416175 20.20886049 57.57345838 50.74215578 61.02423629
 20.05239522 19.28832083 40.55568454 36.35709431 30.02221713 33.52884007
 46.75770918 51.41564829 27.65571115 15.73462257 29.57220708 18.14957972
 38.65494093 20.81307667 31.47881479 55.26889743 23.00714195 21.23389634
 32.27532187 16.67480688 25.34111606 25.48734268 17.99382843 18.58022923
 13.13828319 27.64967213 28.68244363 19.94837609 60.01282987 44.55692416
 54.74632159 23.46049

# Decision Tree Regression

In [13]:
dt_reg = DecisionTreeRegressor()
dt_reg.fit(X_train, y_train)
dt_reg_predictions = dt_reg.predict(X_test)
dt_reg_mse = mean_squared_error(y_test, dt_reg_predictions)
print("Decision Tree Regression Predictions:", dt_reg_predictions)
print("Decision Tree Regression MSE:", dt_reg_mse)

Decision Tree Regression Predictions: [51.73 39.4  71.3  35.3  10.54 44.28 23.7  46.23 33.94 38.89 44.39 17.2
 36.3  35.1  24.99 21.82 40.06 20.42 37.17 31.81 38.8  40.57 45.08  9.56
 33.73 33.94 12.47 41.05 59.3  14.99 41.05 31.35 42.33 47.82 19.42 36.45
 33.94 41.54 10.39 51.26 17.54  2.33 41.72 56.34 13.36 61.09 55.65 34.49
 27.53  9.69 57.22 47.71 24.34 17.2  43.94 35.3  33.73 11.48 39.15 19.69
 41.54 13.52 29.39 55.6  25.75 18.03 33.4  12.18 31.54 20.73 11.36 28.02
  6.94 44.39 26.15 13.82 51.73 51.06 59.59 10.39 39.36 49.2  38.89 31.35
 43.7  35.3  44.61 33.72 26.77 22.32 31.84 73.7   9.45 56.62 43.38 46.93
 21.06 40.23 19.2  34.49 30.14 43.8  33.12 24.07 69.3  12.05 55.02 30.12
 47.4  67.57 39.36 45.08 25.89 40.57 35.75 55.25 20.28 33.76 55.51 43.38
 17.24 27.63 46.64 27.34 24.58 48.67 52.61 49.2  42.13 32.4  11.48 28.68
 17.34 66.    6.47 41.15 31.45 43.58 22.95 37.36 28.47 31.18 18.02 32.33
 46.23 43.58 26.86 13.2   8.06 14.84 34.74 18.03 45.9  24.45 33.56 40.86
 23.84 69.66 3

# Random Forest Regression

In [14]:
rf_reg = RandomForestRegressor()
rf_reg.fit(X_train, y_train)
rf_reg_predictions = rf_reg.predict(X_test)
rf_reg_mse = mean_squared_error(y_test, rf_reg_predictions)
print("Random Forest Regression Predictions:", rf_reg_predictions)
print("Random Forest Regression MSE:", rf_reg_mse)

Random Forest Regression Predictions: [51.4457     38.4689     70.8378     34.628      11.6662     44.6003
 24.4499     48.5326     37.4427     43.2228     42.1247     15.3276
 38.46863333 33.98843333 25.1126     22.4448     36.1257     17.3447
 38.0223     31.6877     36.063      36.7681     46.3143     11.5641
 32.7104     39.1322     11.4503     45.2049     53.1701     14.921
 60.27083333 33.5947     42.3787     48.73095    18.0496     40.9146
 36.1663     44.7368      9.5489     51.1682     16.004       4.9765
 41.082      48.373      13.1965     64.2991     53.573625   34.3644
 26.7685      8.4838     55.0291     41.5622     26.7353     18.1067
 45.5834     34.8914     27.3227     12.2289     35.3884     19.9209
 44.8529     14.8845     36.7252     51.40476667 32.0785     25.1827
 35.0004     13.4728     31.0442     24.4781     12.0659     29.3275
  9.5272     42.1295     25.9759     11.5954     51.9139     50.7357
 57.1588      9.801      37.9755     43.8331     37.5446     39.97

# Gradient Boosting Regression

In [15]:

gb_reg = GradientBoostingRegressor()
gb_reg.fit(X_train, y_train)
gb_reg_predictions = gb_reg.predict(X_test)
gb_reg_mse = mean_squared_error(y_test, gb_reg_predictions)
print("Gradient Boosting Regression Predictions:", gb_reg_predictions)
print("Gradient Boosting Regression MSE:", gb_reg_mse)

Gradient Boosting Regression Predictions: [48.97709196 45.76321527 69.29974613 34.17165924 12.77409397 42.78513374
 25.79041357 50.55478217 30.7921554  41.47610681 40.01980445 15.52875237
 40.65048559 42.39599239 29.5259735  21.94430917 36.8486475  19.88376954
 37.89734327 29.74477933 39.2997397  38.69568425 46.33504461 10.95944287
 38.42206553 33.32891578 10.03105277 45.5796405  54.05249279 12.82847036
 49.94238443 35.19920787 46.83674447 54.69618672 17.39000746 35.31605575
 30.68173036 42.80178697 11.83869237 47.71032557 15.7795485   8.24559792
 37.58230776 51.75869738 11.42913182 75.79862669 52.22575223 34.53187655
 26.53566374  8.35007739 46.41337261 41.18517313 25.8584379  17.71344231
 41.89081029 33.73868877 26.90181879  9.29341699 37.69299106 23.60805537
 43.49241388 13.98646545 38.11599427 50.99401406 31.21076577 24.52173979
 31.94921772 15.47461954 33.17564059 23.84644052 12.62623441 28.42212757
  8.28673644 40.01980445 27.50173113 11.21105967 49.49416601 51.09433996
 52.54971

# Support Vector Regression

In [16]:
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)
svr_reg = SVR()
svr_reg.fit(X_train_scaled, y_train)
svr_reg_predictions = svr_reg.predict(X_test_scaled)
svr_reg_mse = mean_squared_error(y_test, svr_reg_predictions)
print("Support Vector Regression Predictions:", svr_reg_predictions)
print("Support Vector Regression MSE:", svr_reg_mse)
     

Support Vector Regression Predictions: [42.02837189 53.07178175 55.4244467  52.25058179 27.15432674 41.22365799
 26.95118384 47.40291511 31.32277358 43.28318885 34.6409369  20.64832546
 46.85470404 44.67936286 26.7697804  34.53273197 31.99436603 25.82984409
 33.88688392 27.83015034 32.0766513  37.40398295 41.39953655 26.31465017
 31.75662963 35.14831839 16.7140779  45.70059883 44.53785265 22.78996674
 37.58345848 33.15714416 39.86020844 46.99425817 25.63722485 35.14087252
 30.44223862 41.21976612 21.01493474 41.87018131 19.44796208 19.65825568
 33.41468603 47.23060742 19.46405782 61.1972441  42.85999254 39.01843491
 22.32984782 21.63059085 42.85887622 40.46806868 30.82688732 28.15760928
 42.06606084 51.34657107 28.70469115 17.85449175 34.87772226 21.08940793
 40.98866094 26.54925385 40.0213611  55.33368625 23.88083109 26.88969139
 34.53063954 22.45847868 28.91575364 24.3361942  20.9310734  20.10855879
 15.28413169 34.59239126 28.7671655  18.49734528 42.45549697 48.17558871
 42.4804513 

# Gradient Descent

In [17]:
gd_reg = make_pipeline(StandardScaler(), SGDRegressor())
gd_reg.fit(X_train, y_train)
gd_reg_predictions = gd_reg.predict(X_test)
gd_reg_mse = mean_squared_error(y_test, gd_reg_predictions)
print("Gradient Descent Regression Predictions:", gd_reg_predictions)
print("Gradient Descent Regression MSE:", gd_reg_mse)

Gradient Descent Regression Predictions: [59.52431467 51.76374771 63.5312954  51.30419734 17.11232721 40.05402653
 26.71512866 44.55544904 29.1331619  38.53683835 28.32711411 20.73926774
 67.01003743 51.75417743 29.76115429 43.95167059 29.21542973 26.70969383
 31.98539381 31.68857876 36.68852211 31.33439156 37.66015205 25.14812824
 33.56176525 33.65044947 14.43793136 40.83553775 41.76903035 21.91406187
 36.08641706 31.4138076  43.67357347 45.61201933 30.42976636 29.97122048
 28.67499976 39.16227525 20.37891531 38.23879387 21.38726235 15.91388986
 30.80260508 50.08165713 19.89148174 57.17333547 51.13464212 61.05976298
 20.13604053 19.27475556 40.49908502 36.35779047 29.751075   33.11017768
 46.16126771 51.08429306 27.52473561 16.1152108  29.583812   18.46718499
 38.95152049 21.72956515 31.03557824 55.49156299 23.08299651 21.96213284
 32.22382765 17.17732967 24.77705297 25.2345473  18.19731348 18.66057724
 13.37000537 28.27661493 28.92001205 19.4323534  60.01162808 44.75901035
 53.903875

# Performance (A lower MSE indicates better performance)

In [18]:

print("Linear Regression MSE:", linear_reg_mse)
print("Polynomial Regression MSE:", poly_reg_mse)
print("Ridge Regression MSE:", ridge_reg_mse)
print("Lasso Regression MSE:", lasso_reg_mse)
print("Decision Tree Regression MSE:", dt_reg_mse)
print("Random Forest Regression MSE:", rf_reg_mse)
print("Gradient Boosting Regression MSE:", gb_reg_mse)
print("Support Vector Regression MSE:", svr_reg_mse)
print("Gradient Descent Regression MSE:", gd_reg_mse)

Linear Regression MSE: 95.97094009110677
Polynomial Regression MSE: 55.58245787949472
Ridge Regression MSE: 95.97091781212978
Lasso Regression MSE: 95.95234699192571
Decision Tree Regression MSE: 43.28373689320387
Random Forest Regression MSE: 31.536991748433035
Gradient Boosting Regression MSE: 30.01233397506631
Support Vector Regression MSE: 88.95941487206821
Gradient Descent Regression MSE: 95.68193787934574


In [19]:
# Initialize the best MSE variable
best_mse = float('inf') # It should be an initial value that is higher than the maximum possible MSE to ensure that the first model's MSE is assigned correctly.
best_model = None

# Compare the MSE values and find the best model
if linear_reg_mse < best_mse:
    best_mse = linear_reg_mse
    best_model = "Linear Regression"

if poly_reg_mse < best_mse:
    best_mse = poly_reg_mse
    best_model = "Polynomial Regression"

if ridge_reg_mse < best_mse:
    best_mse = ridge_reg_mse
    best_model = "Ridge Regression"

if lasso_reg_mse < best_mse:
    best_mse = lasso_reg_mse
    best_model = "Lasso Regression"

if dt_reg_mse < best_mse:
    best_mse = dt_reg_mse
    best_model = "Decision Tree Regression"

if rf_reg_mse < best_mse:
    best_mse = rf_reg_mse
    best_model = "Random Forest Regression"

if gb_reg_mse < best_mse:
    best_mse = gb_reg_mse
    best_model = "Gradient Boosting Regression"

if svr_reg_mse < best_mse:
    best_mse = svr_reg_mse
    best_model = "Support Vector Regression"

if gd_reg_mse < best_mse:
    best_mse = gd_reg_mse
    best_model = "Gradient Descent Regression"

print("The best model is:", best_model)


The best model is: Gradient Boosting Regression
