In [28]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression, LogisticRegression, Ridge, Lasso
from sklearn.preprocessing import PolynomialFeatures, StandardScaler
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.svm import SVR
from sklearn.metrics import mean_squared_error
from sklearn.pipeline import make_pipeline
from sklearn.linear_model import SGDRegressor

In [29]:
# Load the data into a pandas DataFrame
data = pd.read_excel('Concrete_Data.xls')

In [30]:
data.head()

Unnamed: 0,Cement (component 1)(kg in a m^3 mixture),Blast Furnace Slag (component 2)(kg in a m^3 mixture),Fly Ash (component 3)(kg in a m^3 mixture),Water (component 4)(kg in a m^3 mixture),Superplasticizer (component 5)(kg in a m^3 mixture),Coarse Aggregate (component 6)(kg in a m^3 mixture),Fine Aggregate (component 7)(kg in a m^3 mixture),Age (day),"Concrete compressive strength(MPa, megapascals)"
0,540.0,0.0,0.0,162.0,2.5,1040.0,676.0,28,79.986111
1,540.0,0.0,0.0,162.0,2.5,1055.0,676.0,28,61.887366
2,332.5,142.5,0.0,228.0,0.0,932.0,594.0,270,40.269535
3,332.5,142.5,0.0,228.0,0.0,932.0,594.0,365,41.05278
4,198.6,132.4,0.0,192.0,0.0,978.4,825.5,360,44.296075


In [31]:
data.columns


Index(['Cement (component 1)(kg in a m^3 mixture)',
       'Blast Furnace Slag (component 2)(kg in a m^3 mixture)',
       'Fly Ash (component 3)(kg in a m^3 mixture)',
       'Water  (component 4)(kg in a m^3 mixture)',
       'Superplasticizer (component 5)(kg in a m^3 mixture)',
       'Coarse Aggregate  (component 6)(kg in a m^3 mixture)',
       'Fine Aggregate (component 7)(kg in a m^3 mixture)', 'Age (day)',
       'Concrete compressive strength(MPa, megapascals) '],
      dtype='object')

In [32]:
# Split the data into features (X) and target variable (y)
X = data.drop('Concrete compressive strength(MPa, megapascals) ', axis=1)
y = data['Concrete compressive strength(MPa, megapascals) ']


In [33]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

**Linear Regression**

In [34]:
linear_reg = LinearRegression()
linear_reg.fit(X_train, y_train)
linear_reg_predictions = linear_reg.predict(X_test)
linear_reg_mse = mean_squared_error(y_test, linear_reg_predictions)
print("Linear Regression Predictions:", linear_reg_predictions)
print("Linear Regression MSE:", linear_reg_mse)

Linear Regression Predictions: [59.65716303 52.03714377 63.51983933 51.57136586 17.22016023 39.86885347
 26.7014459  44.72001905 29.35815964 38.19395083 27.75960429 19.81902805
 66.86570362 52.27861085 29.97488442 44.46971738 29.22747564 26.53666548
 31.97285179 32.03281435 36.83075422 31.78840646 38.06323109 25.12704389
 33.10262783 33.84686343 14.24402829 40.33017694 41.8734224  21.33722165
 35.72337032 30.97439331 43.87999877 45.52719553 30.76258029 29.59134147
 28.90854895 38.84992727 20.35373267 38.45367912 21.16914452 15.89005619
 31.06597728 50.70863565 20.27776785 57.64263729 50.89344091 60.97165671
 20.00500457 19.22515249 40.48409011 36.24365771 30.01371229 33.43484666
 46.67040038 51.47108005 27.72793697 15.6989103  29.68932338 18.12663446
 38.63280045 20.53807749 31.57288203 55.47763222 22.94017793 21.34046484
 32.26815463 16.70640658 25.3934816  25.54665879 17.84152097 18.51792287
 13.06162611 27.70981144 28.60696944 19.95922713 60.05454252 44.64528228
 54.61922419 23.5112

**Polynomial Regression**

In [35]:
poly_features = PolynomialFeatures(degree=2)
X_poly = poly_features.fit_transform(X_train)
poly_reg = LinearRegression()
poly_reg.fit(X_poly, y_train)
X_test_poly = poly_features.transform(X_test)
poly_reg_predictions = poly_reg.predict(X_test_poly)
poly_reg_mse = mean_squared_error(y_test, poly_reg_predictions)
print("Polynomial Regression Predictions:", poly_reg_predictions)
print("Polynomial Regression MSE:", poly_reg_mse)

Polynomial Regression Predictions: [38.32969507 49.51308903 63.18616195 47.95197497 16.11571166 43.40600477
 29.48518844 58.58563997 25.83642251 46.32049232 36.15828066  8.97066261
 43.82670028 43.29784748 28.7679566  32.56382018 32.16245727 22.25503995
 37.35611088 29.03799502 35.43882022 49.07796101 43.05263098 15.53518332
 30.37473263 29.97349607 10.72959499 44.7982238  55.63284466 21.44083011
 39.54188213 33.15632842 43.96615083 63.20446639 24.80242744 31.26560517
 26.73684731 41.5915874  13.76522645 44.77433289 19.90815185  7.57637458
 38.00729262 46.28828998 14.58065402 66.50499533 43.76712851 31.97353875
 22.37090201 12.97675312 45.22900671 43.30307221 27.15345537 23.48019505
 45.18976495 47.38791491 20.78185831 14.75408343 34.05114687 19.37825813
 41.39236124 17.47856314 42.48352897 51.74216665 24.22961477 17.11948244
 37.59936232 16.48592952 28.83525319 22.33042372 12.32217565 20.36582171
 10.31050357 36.10990482 28.09053384 11.59891373 36.35278194 58.11166926
 61.83318399 13.

**Ridge Regression**

In [36]:
ridge_reg = Ridge(alpha=0.5)
ridge_reg.fit(X_train, y_train)
ridge_reg_predictions = ridge_reg.predict(X_test)
ridge_reg_mse = mean_squared_error(y_test, ridge_reg_predictions)
print("Ridge Regression Predictions:", ridge_reg_predictions)
print("Ridge Regression MSE:", ridge_reg_mse)

Ridge Regression Predictions: [59.65714253 52.0371201  63.51972603 51.57134225 17.22015745 39.86887796
 26.70146525 44.7200025  29.35813515 38.19400505 27.75955478 19.81907312
 66.86549866 52.27857377 29.97493726 44.4698053  29.22747654 26.53669112
 31.97279348 32.03281699 36.83080877 31.78851958 38.06322346 25.12707037
 33.10264897 33.84681928 14.24402692 40.33015349 41.87340704 21.33725186
 35.72341072 30.97444833 43.88004246 45.52718185 30.76258485 29.59134109
 28.9085453  38.84992254 20.35378167 38.45365062 21.16915991 15.89010226
 31.06600602 50.7086048  20.27774621 57.64262739 50.89337113 60.97168658
 20.00500836 19.22516916 40.48411625 36.24370439 30.01371202 33.43490107
 46.67045552 51.47107084 27.72791267 15.69890887 29.68927935 18.12663057
 38.63280051 20.5381511  31.57285915 55.4775447  22.9401936  21.34039501
 32.26814969 16.70637224 25.3934653  25.54663344 17.84156567 18.51793453
 13.06163671 27.70976176 28.60699449 19.95922421 60.05452218 44.64525049
 54.6193029  23.51126

**Lasso Regression**

In [37]:
lasso_reg = Lasso(alpha=0.5)
lasso_reg.fit(X_train, y_train)
lasso_reg_predictions = lasso_reg.predict(X_test)
lasso_reg_mse = mean_squared_error(y_test, lasso_reg_predictions)
print("Lasso Regression Predictions:", lasso_reg_predictions)
print("Lasso Regression MSE:", lasso_reg_mse)

Lasso Regression Predictions: [59.60640005 51.94874612 63.22670111 51.48398379 17.23802425 39.94133849
 26.7614765  44.63855676 29.2754524  38.35612711 27.69487594 19.99826329
 66.37628538 52.14934189 30.06743561 44.63014675 29.25921944 26.62894217
 31.86389608 32.00567441 36.94614026 32.00063721 38.01153797 25.20828342
 33.16515981 33.71905888 14.26458471 40.30793549 41.80108598 21.47525167
 35.83470218 31.15231093 43.96876326 45.46077965 30.74220859 29.65533178
 28.87324133 38.86092436 20.48847259 38.36325606 21.23101275 16.0231511
 31.12223028 50.57524061 20.21618393 57.57113933 50.73305607 61.03197052
 20.0534147  19.2910041  40.5582506  36.36446647 30.0206672  33.53628907
 46.76332767 51.4130384  27.64878423 15.73685053 29.56521949 18.15155748
 38.655436   20.82400323 31.48667054 55.25680151 23.00976156 21.22717961
 32.2741345  16.67279246 25.3307668  25.47259676 18.00057309 18.58262931
 13.14079947 27.6447312  28.68747876 19.93167172 60.01183745 44.55129976
 54.76196102 23.461217

**Decision Tree Regression**

In [38]:
dt_reg = DecisionTreeRegressor()
dt_reg.fit(X_train, y_train)
dt_reg_predictions = dt_reg.predict(X_test)
dt_reg_mse = mean_squared_error(y_test, dt_reg_predictions)
print("Decision Tree Regression Predictions:", dt_reg_predictions)
print("Decision Tree Regression MSE:", dt_reg_mse)

Decision Tree Regression Predictions: [51.73276323 39.39665864 71.29871316 35.3011712  10.53588276 44.27814872
 23.69660064 42.919881   33.94290348 31.35047372 44.86834018 17.20035777
 36.3009114  42.79577532 17.8229546  21.81984697 40.06200298 20.41607384
 37.17103011 31.81242264 38.80439876 39.2311844  45.08483564  9.56165317
 33.72916592 33.94290348 12.73462172 41.05346947 59.30183076 14.98920824
 41.05346947 32.72253096 39.70002808 47.8151606  19.41564416 36.44776979
 33.72916592 42.13387836 10.3938507  47.401475   18.91232668  2.33180783
 41.71950328 46.68441996 13.35515012 61.09446836 55.64760796 34.48758952
 26.84819544  8.06342182 57.21823429 47.7144971  24.3385028  17.20035777
 44.13335876 35.3011712  26.31040416 11.48391226 39.14844728 19.69143456
 42.13387836 13.52062436 34.55653712 54.09628696 25.74503384 18.03393426
 33.39821744 12.18097249 31.53663224 22.31833812 11.36256448 28.02168359
  6.46728488 44.86834018 26.14768782 13.81709904 51.73276323 51.0556978
 59.59042572 1

**Random Forest Regression**

In [39]:
rf_reg = RandomForestRegressor()
rf_reg.fit(X_train, y_train)
rf_reg_predictions = rf_reg.predict(X_test)
rf_reg_mse = mean_squared_error(y_test, rf_reg_predictions)
print("Random Forest Regression Predictions:", rf_reg_predictions)
print("Random Forest Regression MSE:", rf_reg_mse)

Random Forest Regression Predictions: [51.75948732 40.26480151 72.38105258 34.48897241 11.6107     44.03115773
 23.98724925 49.47590144 37.18118609 43.21571447 41.36602962 15.7080835
 38.44823515 34.20146486 23.01185445 22.08551586 37.46669811 17.35950262
 38.18939306 32.16888173 35.71581123 37.39490986 45.18120666 11.91838359
 31.86351281 39.46004217 11.57519888 44.79724141 52.95513523 14.89255848
 58.73986448 34.35184549 41.48670171 47.77568317 17.52470797 39.83846797
 35.81620977 43.9780405   9.54688459 51.19436127 16.70474568  4.96473052
 41.241986   49.23499853 12.93875488 65.75532612 52.44176339 34.55167631
 26.67775697  8.27455316 54.26409163 43.8655387  27.52437834 17.12500494
 44.3885752  34.9541205  27.17085051 12.41567702 35.28462378 20.23100469
 44.08872898 14.98644837 36.0889517  51.59243188 31.9843228  25.33371314
 33.46793036 13.32286885 30.09068878 23.30883934 12.32231507 30.44954233
  9.43840047 41.36602962 26.77958568 11.5229366  51.65629345 51.58783538
 56.80155045  

**Gradient Boosting Regression**

In [40]:
gb_reg = GradientBoostingRegressor()
gb_reg.fit(X_train, y_train)
gb_reg_predictions = gb_reg.predict(X_test)
gb_reg_mse = mean_squared_error(y_test, gb_reg_predictions)
print("Gradient Boosting Regression Predictions:", gb_reg_predictions)
print("Gradient Boosting Regression MSE:", gb_reg_mse)

Gradient Boosting Regression Predictions: [48.8987815  45.47830899 69.37091533 33.9399045  13.31701684 42.66199282
 25.48962823 50.83636176 31.87762522 42.34713171 39.85792829 13.52437931
 37.852787   42.03266311 30.41250904 21.63029521 36.83505406 19.34241974
 37.1863369  30.082273   38.59934748 37.36352354 46.04758481 11.07202762
 37.91688683 33.5831423  10.18882565 45.68938442 53.91821196 13.52039955
 51.08253561 35.33193697 46.71410814 54.56036611 18.32819119 35.04565778
 30.99273072 42.76423449 12.36039104 47.8420023  15.38670196  7.88087353
 37.79975469 51.88965988 11.65798171 75.30653562 52.41000132 34.32620302
 26.42814247  8.57949647 47.34674116 42.42949205 25.0987211  18.21381045
 41.21828985 32.91084006 27.30571433  9.10972232 37.06039167 23.24683943
 43.17599149 14.86123237 37.3482922  51.02007633 31.23104501 22.77541548
 32.97673285 15.30597222 33.27326915 23.38335543 12.49020357 27.85462471
  9.48802288 39.85792829 27.0816892  10.90891339 49.23358392 51.55047307
 54.14169

**Support Vector Regression**

In [41]:
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)
svr_reg = SVR()
svr_reg.fit(X_train_scaled, y_train)
svr_reg_predictions = svr_reg.predict(X_test_scaled)
svr_reg_mse = mean_squared_error(y_test, svr_reg_predictions)
print("Support Vector Regression Predictions:", svr_reg_predictions)
print("Support Vector Regression MSE:", svr_reg_mse)

Support Vector Regression Predictions: [42.03139548 53.0686222  55.41209704 52.24747062 27.15705267 41.22530176
 26.95661374 47.39965278 31.320643   43.2880413  34.64045111 20.65307646
 46.85035782 44.68282126 26.77508431 34.53817218 32.0039248  25.8362313
 33.89445217 27.82583391 32.08121165 37.41921703 41.37440553 26.32420111
 31.75030464 35.14651652 16.71268529 45.70264808 44.52699398 22.79416313
 37.58637062 33.16311887 39.86714291 46.98753962 25.63578668 35.14050366
 30.45314456 41.2170668  21.01705402 41.86836866 19.44799367 19.65667477
 33.41536477 47.22131072 19.47422321 61.19770959 42.83778649 39.02253424
 22.33182812 21.63955798 42.86583148 40.47238901 30.83265194 28.16329678
 42.07347495 51.34693701 28.703185   17.85177742 34.86065419 21.08621996
 40.98687286 26.55513858 40.03659755 55.32099557 23.88654698 26.87749746
 34.53465832 22.45636655 28.90353798 24.31425759 20.93526186 20.11337601
 15.28760712 34.59190132 28.76695961 18.48732332 42.45892694 48.173001
 42.4966478  19

**Gradient Descent**

In [42]:
gd_reg = make_pipeline(StandardScaler(), SGDRegressor())
gd_reg.fit(X_train, y_train)
gd_reg_predictions = gd_reg.predict(X_test)
gd_reg_mse = mean_squared_error(y_test, gd_reg_predictions)
print("Gradient Descent Regression Predictions:", gd_reg_predictions)
print("Gradient Descent Regression MSE:", gd_reg_mse)

Gradient Descent Regression Predictions: [59.81865181 51.78705538 63.65163348 51.32448022 17.43007311 40.27089221
 26.98654115 44.65916305 29.1953155  38.50880513 28.32242921 20.84447622
 67.20899396 51.79105331 29.84247329 44.0270154  29.29722924 26.71626478
 32.09866299 31.78451792 36.64751915 31.4312096  37.68242711 25.24693013
 33.67440122 33.71356274 14.27107093 40.86628026 41.85731932 21.83756302
 35.77664289 31.33889012 43.82732323 45.65669569 30.51070082 29.95344195
 28.72628833 39.35573987 20.58042125 38.20233339 21.19614558 16.14378337
 30.90048706 50.06697353 19.80365149 57.22218139 51.26301803 61.30032729
 20.06272161 19.39451052 40.62992142 36.46708615 30.02652036 33.15049165
 45.77601223 51.09306049 27.57630244 15.9383296  29.59074441 18.30017288
 39.14300311 21.84052889 31.02109917 55.52083994 23.14086535 22.10305274
 32.46081562 17.33286303 24.79626357 25.13830438 18.23095233 18.7592449
 13.35165656 28.27242202 29.18170495 19.39950919 60.26533278 44.93703688
 53.8525798

**Performance** (A lower MSE indicates better performance)

In [43]:
print("Linear Regression MSE:", linear_reg_mse)
print("Polynomial Regression MSE:", poly_reg_mse)
print("Ridge Regression MSE:", ridge_reg_mse)
print("Lasso Regression MSE:", lasso_reg_mse)
print("Decision Tree Regression MSE:", dt_reg_mse)
print("Random Forest Regression MSE:", rf_reg_mse)
print("Gradient Boosting Regression MSE:", gb_reg_mse)
print("Support Vector Regression MSE:", svr_reg_mse)
print("Gradient Descent Regression MSE:", gd_reg_mse)

Linear Regression MSE: 95.97548435337708
Polynomial Regression MSE: 55.589926374568996
Ridge Regression MSE: 95.97546281139233
Lasso Regression MSE: 95.95809284240904
Decision Tree Regression MSE: 47.229535823041076
Random Forest Regression MSE: 32.6430545262479
Gradient Boosting Regression MSE: 30.388351338536868
Support Vector Regression MSE: 88.97832686422466
Gradient Descent Regression MSE: 96.13760013867262


In [44]:
# Initialize the best MSE variable
best_mse = float('inf') # It should be an initial value that is higher than the maximum possible MSE to ensure that the first model's MSE is assigned correctly.
best_model = None

# Compare the MSE values and find the best model
if linear_reg_mse < best_mse:
    best_mse = linear_reg_mse
    best_model = "Linear Regression"

if poly_reg_mse < best_mse:
    best_mse = poly_reg_mse
    best_model = "Polynomial Regression"

if ridge_reg_mse < best_mse:
    best_mse = ridge_reg_mse
    best_model = "Ridge Regression"

if lasso_reg_mse < best_mse:
    best_mse = lasso_reg_mse
    best_model = "Lasso Regression"

if dt_reg_mse < best_mse:
    best_mse = dt_reg_mse
    best_model = "Decision Tree Regression"

if rf_reg_mse < best_mse:
    best_mse = rf_reg_mse
    best_model = "Random Forest Regression"

if gb_reg_mse < best_mse:
    best_mse = gb_reg_mse
    best_model = "Gradient Boosting Regression"

if svr_reg_mse < best_mse:
    best_mse = svr_reg_mse
    best_model = "Support Vector Regression"

if gd_reg_mse < best_mse:
    best_mse = gd_reg_mse
    best_model = "Gradient Descent Regression"

print("The best model is:", best_model)


The best model is: Gradient Boosting Regression
