In [72]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression, LogisticRegression, Ridge, Lasso
from sklearn.preprocessing import PolynomialFeatures, StandardScaler
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.svm import SVR
from sklearn.metrics import mean_squared_error
from sklearn.pipeline import make_pipeline
from sklearn.linear_model import SGDRegressor

In [73]:
# Load the data into a pandas DataFrame
data = pd.read_csv('housing.csv', delimiter=',')

In [74]:
data.head()

Unnamed: 0,CRIM,ZN,INDUS,CHAS,NOX,RM,AGE,DIS,RAD,TAX,PTRATIO,B,LSTAT,MEDV
0,0.00632,18.0,2.31,0,0.538,6.575,65.2,4.09,1,296,15.3,396.9,4.98,24.0
1,0.02731,0.0,7.07,0,0.469,6.421,78.9,4.9671,2,242,17.8,396.9,9.14,21.6
2,0.02729,0.0,7.07,0,0.469,7.185,61.1,4.9671,2,242,17.8,392.83,4.03,34.7
3,0.03237,0.0,2.18,0,0.458,6.998,45.8,6.0622,3,222,18.7,394.63,2.94,33.4
4,0.06905,0.0,2.18,0,0.458,7.147,54.2,6.0622,3,222,18.7,396.9,5.33,36.2


In [75]:
data.isnull()

Unnamed: 0,CRIM,ZN,INDUS,CHAS,NOX,RM,AGE,DIS,RAD,TAX,PTRATIO,B,LSTAT,MEDV
0,False,False,False,False,False,False,False,False,False,False,False,False,False,False
1,False,False,False,False,False,False,False,False,False,False,False,False,False,False
2,False,False,False,False,False,False,False,False,False,False,False,False,False,False
3,False,False,False,False,False,False,False,False,False,False,False,False,False,False
4,False,False,False,False,False,False,False,False,False,False,False,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
501,False,False,False,False,False,False,False,False,False,False,False,False,False,False
502,False,False,False,False,False,False,False,False,False,False,False,False,False,False
503,False,False,False,False,False,False,False,False,False,False,False,False,False,False
504,False,False,False,False,False,False,False,False,False,False,False,False,False,False


In [76]:
# Split the data into features (X) and target variable (y)
X = data.drop('MEDV', axis=1)
y = data['MEDV']

In [77]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

**Linear Regression**

In [78]:
linear_reg = LinearRegression()
linear_reg.fit(X_train, y_train)
linear_reg_predictions = linear_reg.predict(X_test)
linear_reg_mse = mean_squared_error(y_test, linear_reg_predictions)
print("Linear Regression Predictions:", linear_reg_predictions)
print("Linear Regression MSE:", linear_reg_mse)

Linear Regression Predictions: [28.99672362 36.02556534 14.81694405 25.03197915 18.76987992 23.25442929
 17.66253818 14.34119    23.01320703 20.63245597 24.90850512 18.63883645
 -6.08842184 21.75834668 19.23922576 26.19319733 20.64773313  5.79472718
 40.50033966 17.61289074 27.24909479 30.06625441 11.34179277 24.16077616
 17.86058499 15.83609765 22.78148106 14.57704449 22.43626052 19.19631835
 22.43383455 25.21979081 25.93909562 17.70162434 16.76911711 16.95125411
 31.23340153 20.13246729 23.76579011 24.6322925  13.94204955 32.25576301
 42.67251161 17.32745046 27.27618614 16.99310991 14.07009109 25.90341861
 20.29485982 29.95339638 21.28860173 34.34451856 16.04739105 26.22562412
 39.53939798 22.57950697 18.84531367 32.72531661 25.0673037  12.88628956
 22.68221908 30.48287757 31.52626806 15.90148607 20.22094826 16.71089812
 20.52384893 25.96356264 30.61607978 11.59783023 20.51232627 27.48111878
 11.01962332 15.68096344 23.79316251  6.19929359 21.6039073  41.41377225
 18.76548695  8.8793

**Polynomial Regression**

In [79]:
poly_features = PolynomialFeatures(degree=2)
X_poly = poly_features.fit_transform(X_train)
poly_reg = LinearRegression()
poly_reg.fit(X_poly, y_train)
X_test_poly = poly_features.transform(X_test)
poly_reg_predictions = poly_reg.predict(X_test_poly)
poly_reg_mse = mean_squared_error(y_test, poly_reg_predictions)
print("Polynomial Regression Predictions:", poly_reg_predictions)
print("Polynomial Regression MSE:", poly_reg_mse)

Polynomial Regression Predictions: [26.90787238 33.99321145 14.60586834 20.35111618 16.48628265 16.15109301
 16.87917262 14.10528731 22.79861605 19.1264137  18.98775983 21.46493548
 15.41342157 16.99985677 18.66087478 25.32738423 18.82889038  9.5750885
 49.36237282 13.443892   24.99897027 27.92221856 15.5907414  19.5574463
 18.04737997 18.40432703 17.22273475 13.58323312 17.25221348 18.99941391
 20.54168886 24.53950739 19.80340201 26.77886641 14.90328771 17.90461266
 31.35604066 20.35686773 21.20637548 21.755611   16.87336898 31.94806617
 51.88311136 15.0945335  26.38957614 17.47377324 15.49258131 23.91089153
 19.10858554 27.58299375 22.01054311 36.47226036 17.57782024 26.42493302
 45.72348571 24.57069391 15.71556365 32.38245469 25.46753371 16.26613021
 24.16392857 28.69587231 30.31194788 12.29357994 25.58260709 16.22718698
 15.58685547 24.825261   29.98003691 12.88967204 21.73140079 28.47777623
  9.37258518 18.78880852 18.63410825  7.7575773  21.01058221 48.40419328
 12.1368404  11.39

**Ridge Regression**

In [80]:
ridge_reg = Ridge(alpha=0.5)
ridge_reg.fit(X_train, y_train)
ridge_reg_predictions = ridge_reg.predict(X_test)
ridge_reg_mse = mean_squared_error(y_test, ridge_reg_predictions)
print("Ridge Regression Predictions:", ridge_reg_predictions)
print("Ridge Regression MSE:", ridge_reg_mse)

Ridge Regression Predictions: [28.79888253 35.82437262 14.38886358 24.86701456 18.9957324  22.67722701
 17.61478888 14.2651324  22.4415893  20.72164997 24.44407244 18.54412744
 -6.22085808 21.3265193  19.20293902 26.10032461 20.38231522  5.73466131
 40.46061246 17.80700034 27.36441758 29.97377697 11.37287323 24.44554063
 18.06371324 15.39049184 22.40369298 14.73889304 21.95497153 19.34532369
 22.1443304  25.31459181 25.95741602 18.21404619 16.30468225 17.81522012
 31.20719789 20.48329425 23.46121316 24.45624286 14.00080852 32.19188799
 42.57710681 17.11586158 27.19843697 17.0988167  14.10565942 25.73828982
 19.94862982 30.13342164 21.21747395 34.22076083 16.12269392 26.26265244
 39.72055693 22.49224645 19.15523057 32.7648417  24.9929007  12.71537737
 22.90351819 30.77256701 31.60844727 16.20508898 20.4959463  16.1322997
 20.59466833 25.93308783 30.7575546  11.75857458 20.3790273  27.04989293
 11.24804886 16.27725046 23.34428129  6.17520444 21.48074064 41.30110792
 18.69559761  8.368917

**Lasso Regression**

In [81]:
lasso_reg = Lasso(alpha=0.5)
lasso_reg.fit(X_train, y_train)
lasso_reg_predictions = lasso_reg.predict(X_test)
lasso_reg_mse = mean_squared_error(y_test, lasso_reg_predictions)
print("Lasso Regression Predictions:", lasso_reg_predictions)
print("Lasso Regression MSE:", lasso_reg_mse)

Lasso Regression Predictions: [28.45869672 32.70820858 13.68139983 24.92431114 19.44616744 21.70744012
 17.87763271 13.28155749 21.64843398 21.04584171 21.43104965 19.436508
 -5.72822161 20.60098957 20.44656584 24.99222268 20.48771681  6.03628312
 38.72768844 18.38568954 27.55188805 29.40586177 11.87083096 24.86078799
 19.0237276  15.42390009 22.17134113 14.65722702 19.07267788 20.10091305
 22.20349468 25.89221755 24.87342365 21.44203992 15.13373986 20.94526119
 31.08975263 22.22261669 20.61948484 23.99538265 14.34470506 31.52776098
 40.3980735  17.59704735 26.92026137 18.06777032 14.98448843 25.15769219
 19.28226556 30.88830048 22.40676615 33.60036106 16.41404203 26.01597227
 38.96200077 22.53096883 20.05393229 32.46765212 24.84582174 13.77597851
 23.77944381 31.19220714 31.41250642 17.70873522 22.57500142 13.38445719
 20.42401213 26.13602831 30.88161061 12.52888003 20.32744252 26.21755938
 11.75135798 19.64506729 22.50790389  6.0022919  21.42548226 39.25322329
 17.66554481  7.5926783

**Decision Tree Regression**

In [82]:
dt_reg = DecisionTreeRegressor()
dt_reg.fit(X_train, y_train)
dt_reg_predictions = dt_reg.predict(X_test)
dt_reg_mse = mean_squared_error(y_test, dt_reg_predictions)
print("Decision Tree Regression Predictions:", dt_reg_predictions)
print("Decision Tree Regression MSE:", dt_reg_mse)

Decision Tree Regression Predictions: [28.1 33.1 15.2 21.7 23.2 18.5 16.6 16.7 23.  20.7 18.8 27.1  7.4 20.6
 18.9 23.9 20.5 10.5 44.  13.  23.  24.4 13.6 22.8 14.5 14.6 21.  13.5
 19.4 20.7 18.3 23.1 50.  15.  13.3 15.6 33.4 18.5 20.4 24.8 19.8 30.1
 44.  18.2 22.  13.  14.3 23.7 17.7 32.  19.7 36.1 16.6 30.1 43.1 18.5
 15.6 27.9 22.  22.5 26.6 33.4 29.4 16.1 28.  13.4 13.  22.9 27.9 14.1
 21.8 28.7  8.8 18.6 21.4 10.5 19.8 50.  13.3  8.1 21.  12.  19.4 10.5
 14.5 28.4 14.3 23.  22.9 18.  22.9  8.8 18.4 17.5 16.2 18.4 50.  12.
 11.7 10.2 19.  26.4]
Decision Tree Regression MSE: 22.999215686274507


**Random Forest Regression**

In [83]:
rf_reg = RandomForestRegressor()
rf_reg.fit(X_train, y_train)
rf_reg_predictions = rf_reg.predict(X_test)
rf_reg_mse = mean_squared_error(y_test, rf_reg_predictions)
print("Random Forest Regression Predictions:", rf_reg_predictions)
print("Random Forest Regression MSE:", rf_reg_mse)

Random Forest Regression Predictions: [22.814 30.163 17.35  23.527 16.095 21.209 19.216 15.314 21.347 21.301
 19.764 19.654  9.065 21.628 19.377 26.888 19.378  8.294 46.03  14.381
 24.709 23.95  14.378 23.477 14.808 14.935 21.65  13.911 19.179 21.502
 19.488 23.213 30.064 20.449 14.522 16.08  34.223 19.245 20.878 24.412
 19.325 29.94  45.816 19.049 22.296 13.408 14.899 24.458 17.964 28.03
 20.839 33.929 15.807 26.441 43.977 21.823 14.951 32.573 22.478 20.769
 25.625 33.904 29.301 17.971 27.146 17.393 13.511 23.051 27.668 15.743
 20.73  28.486 10.299 21.401 22.303  6.975 20.071 46.301 11.222 12.957
 21.252 13.57  20.006  9.02  20.252 26.933 15.357 23.204 23.51  17.918
 21.522  8.162 19.398 18.784 23.318 19.392 40.872 12.445 12.205 13.896
 20.091 24.03 ]
Random Forest Regression MSE: 8.321780284313725


**Gradient Boosting Regression**

In [84]:
gb_reg = GradientBoostingRegressor()
gb_reg.fit(X_train, y_train)
gb_reg_predictions = gb_reg.predict(X_test)
gb_reg_mse = mean_squared_error(y_test, gb_reg_predictions)
print("Gradient Boosting Regression Predictions:", gb_reg_predictions)
print("Gradient Boosting Regression MSE:", gb_reg_mse)

Gradient Boosting Regression Predictions: [23.4497613  31.17243463 17.49155121 24.02257316 17.6811439  22.32005362
 18.14958762 13.83045241 20.61619342 21.04285745 19.93787694 17.84771967
  8.87847475 21.75820976 19.79454516 25.71887535 19.71027046  8.83726437
 45.07118382 15.96280089 24.1976327  25.26461031 12.51887607 20.89492763
 14.97826629 15.47812803 22.20367497 13.26656609 19.27766026 21.43650957
 19.72895052 23.47588201 19.38092692 19.15659237 14.50437732 16.94131142
 33.09630021 19.80432738 20.7291071  24.14928838 18.30166138 30.36297218
 45.15322353 20.84894217 22.61816613 14.61317262 15.71182991 24.14928838
 18.06929082 28.02569719 20.23520559 35.66832821 16.73377496 24.99108092
 47.7594972  21.39870322 16.51753536 32.24969813 22.03440415 18.08425056
 24.24590305 34.42819637 30.66211615 19.1946555  24.54464101 16.92131694
 14.50851517 23.66112578 28.04012929 15.40476214 21.27417609 24.96901377
 11.04430788 21.36306687 23.33304102  6.34609352 20.27445864 45.63872375
 11.79854

**Support Vector Regression**

In [85]:
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)
svr_reg = SVR()
svr_reg.fit(X_train_scaled, y_train)
svr_reg_predictions = svr_reg.predict(X_test_scaled)
svr_reg_mse = mean_squared_error(y_test, svr_reg_predictions)
print("Support Vector Regression Predictions:", svr_reg_predictions)
print("Support Vector Regression MSE:", svr_reg_mse)

Support Vector Regression Predictions: [25.95624291 28.04561835 17.7362879  23.12477806 16.46106579 21.29654269
 17.3307001  15.82973949 20.00494947 19.27156708 21.49429129 19.28686501
 17.43930814 21.48783194 17.73471571 22.19083229 18.9894571  11.08631371
 33.75890121 15.13271114 25.34802666 27.90013276 14.58874089 22.77417453
 15.52855773 15.29260406 21.54905817 13.55309378 21.29896556 18.66248702
 19.32762599 24.03302498 18.98228382 18.66998107 14.77569959 17.66599856
 30.93214444 19.89511965 22.92251457 23.71900406 15.76069262 29.34922294
 32.32546277 18.2058077  25.50536742 15.42117754 15.21465145 24.69020859
 18.33996605 29.41260567 20.61628038 32.38466877 16.76049199 24.58174296
 36.12087235 22.25553964 16.23381206 28.44060534 24.14823144 15.96957461
 24.12990829 27.87759741 28.6011939  18.62236675 22.92003102 19.33125984
 16.21397525 24.53252211 27.7219972  13.577104   20.36787878 23.38979484
 10.69352134 20.43242723 22.48219365 13.32335982 19.38839185 32.22808749
 14.20683785

**Gradient Descent**

In [86]:
gd_reg = make_pipeline(StandardScaler(), SGDRegressor())
gd_reg.fit(X_train, y_train)
gd_reg_predictions = gd_reg.predict(X_test)
gd_reg_mse = mean_squared_error(y_test, gd_reg_predictions)
print("Gradient Descent Regression Predictions:", gd_reg_predictions)
print("Gradient Descent Regression MSE:", gd_reg_mse)

Gradient Descent Regression Predictions: [29.01655222 35.93972359 15.58521774 25.04701195 18.69120833 23.40301632
 17.68251137 14.49173232 22.86658522 20.78624971 24.74258189 18.66727964
 -6.12021304 21.94216381 19.20107209 26.12332233 20.04179616  5.66028
 40.41190681 17.53687339 27.10210462 29.88974357 11.42988793 24.22260412
 17.75376535 15.66514621 22.93120571 14.54999293 22.41967626 19.35070096
 22.41016598 25.22367599 25.91794431 17.55402827 16.63345622 16.99903487
 31.29044932 20.01007162 23.77825713 24.66503126 14.08414217 31.94288116
 42.3937144  17.52247925 27.41325082 16.86374309 14.13489208 25.94439789
 20.17417585 30.20817381 21.29988242 34.16652688 16.1478752  26.47091294
 39.47770357 22.49714588 18.75787882 32.46973418 25.1090033  12.90182254
 22.60209565 30.28471087 31.43346569 15.96835866 20.70238451 16.66561531
 20.43952593 25.97169056 30.48794268 11.51300248 20.44479621 27.37619553
 10.95165954 15.87016285 23.9463875   6.17831293 21.79930748 41.12504096
 18.70600784 

**Performance** (A lower MSE indicates better performance)

In [87]:
print("Linear Regression MSE:", linear_reg_mse)
print("Polynomial Regression MSE:", poly_reg_mse)
print("Ridge Regression MSE:", ridge_reg_mse)
print("Lasso Regression MSE:", lasso_reg_mse)
print("Decision Tree Regression MSE:", dt_reg_mse)
print("Random Forest Regression MSE:", rf_reg_mse)
print("Gradient Boosting Regression MSE:", gb_reg_mse)
print("Support Vector Regression MSE:", svr_reg_mse)
print("Gradient Descent Regression MSE:", gd_reg_mse)

Linear Regression MSE: 24.291119474973478
Polynomial Regression MSE: 14.183558083680547
Ridge Regression MSE: 24.377609808756354
Lasso Regression MSE: 24.315786841819875
Decision Tree Regression MSE: 22.999215686274507
Random Forest Regression MSE: 8.321780284313725
Gradient Boosting Regression MSE: 6.1777395585298
Support Vector Regression MSE: 25.668539678396044
Gradient Descent Regression MSE: 24.59534697168974


In [88]:
# Initialize the best MSE variable
best_mse = float('inf') # It should be an initial value that is higher than the maximum possible MSE to ensure that the first model's MSE is assigned correctly.
best_model = None

# Compare the MSE values and find the best model
if linear_reg_mse < best_mse:
    best_mse = linear_reg_mse
    best_model = "Linear Regression"

if poly_reg_mse < best_mse:
    best_mse = poly_reg_mse
    best_model = "Polynomial Regression"

if ridge_reg_mse < best_mse:
    best_mse = ridge_reg_mse
    best_model = "Ridge Regression"

if lasso_reg_mse < best_mse:
    best_mse = lasso_reg_mse
    best_model = "Lasso Regression"

if dt_reg_mse < best_mse:
    best_mse = dt_reg_mse
    best_model = "Decision Tree Regression"

if rf_reg_mse < best_mse:
    best_mse = rf_reg_mse
    best_model = "Random Forest Regression"

if gb_reg_mse < best_mse:
    best_mse = gb_reg_mse
    best_model = "Gradient Boosting Regression"

if svr_reg_mse < best_mse:
    best_mse = svr_reg_mse
    best_model = "Support Vector Regression"

if gd_reg_mse < best_mse:
    best_mse = gd_reg_mse
    best_model = "Gradient Descent Regression"

print("The best model is:", best_model)


The best model is: Gradient Boosting Regression
