In [68]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression, LogisticRegression, Ridge, Lasso
from sklearn.preprocessing import PolynomialFeatures, StandardScaler
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.svm import SVR
from sklearn.metrics import mean_squared_error
from sklearn.pipeline import make_pipeline
from sklearn.linear_model import SGDRegressor

In [69]:
# Load the data into a pandas DataFrame
dr = pd.read_csv('winequality-red.csv', delimiter=';')
dw = pd.read_csv('winequality-white.csv', delimiter=';')

In [70]:
dr.head()

Unnamed: 0,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol,quality
0,7.4,0.7,0.0,1.9,0.076,11.0,34.0,0.9978,3.51,0.56,9.4,5
1,7.8,0.88,0.0,2.6,0.098,25.0,67.0,0.9968,3.2,0.68,9.8,5
2,7.8,0.76,0.04,2.3,0.092,15.0,54.0,0.997,3.26,0.65,9.8,5
3,11.2,0.28,0.56,1.9,0.075,17.0,60.0,0.998,3.16,0.58,9.8,6
4,7.4,0.7,0.0,1.9,0.076,11.0,34.0,0.9978,3.51,0.56,9.4,5


In [71]:
dw.head()

Unnamed: 0,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol,quality
0,7.0,0.27,0.36,20.7,0.045,45.0,170.0,1.001,3.0,0.45,8.8,6
1,6.3,0.3,0.34,1.6,0.049,14.0,132.0,0.994,3.3,0.49,9.5,6
2,8.1,0.28,0.4,6.9,0.05,30.0,97.0,0.9951,3.26,0.44,10.1,6
3,7.2,0.23,0.32,8.5,0.058,47.0,186.0,0.9956,3.19,0.4,9.9,6
4,7.2,0.23,0.32,8.5,0.058,47.0,186.0,0.9956,3.19,0.4,9.9,6


In [72]:
# Combine the red and white wine datasets
data = pd.concat([dr, dw])


In [73]:
# Split the data into features (X) and target variable (y)
X = data.drop('quality', axis=1)
y = data['quality']

In [74]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

**Linear Regression**

In [75]:
linear_reg = LinearRegression()
linear_reg.fit(X_train, y_train)
linear_reg_predictions = linear_reg.predict(X_test)
linear_reg_mse = mean_squared_error(y_test, linear_reg_predictions)
print("Linear Regression Predictions:")
for prediction in linear_reg_predictions:
    print(prediction)

print("Linear Regression MSE:", linear_reg_mse)

Linear Regression Predictions:
6.623020438293757
5.015841685201899
6.285817760243866
5.6995311741511685
5.39181504869488
6.195436870489658
6.054268982916362
6.089681247353646
5.113698089073168
6.323571304476189
5.6093322327728785
5.422146406011585
7.088931349709242
4.873261464995252
6.080911586670815
5.57787241611674
4.9477003286857695
5.5649022673314334
6.033165304671591
5.417537852857286
6.037013425950931
6.148485088557507
6.203034898840613
6.79775495536601
5.291470158744595
5.564547843066229
5.340481460380168
5.75271734741554
5.121811233923033
6.560891409271797
5.053541818712652
6.210175857897994
4.8894107306143795
6.25043811275345
5.030702068599361
6.8726643495529345
5.056886558832744
5.311834351175598
6.274594906848748
5.735468940393787
5.924194273890706
5.284725298557376
6.09369367098207
6.04186299429071
5.431234070253005
6.32858444694935
5.729034479096654
6.824030057542203
5.9786195297828115
6.070303995621849
4.929868152081177
6.378745549292262
6.2870624624237195
4.9970512788819

**Polynomial Regression**

In [76]:
poly_features = PolynomialFeatures(degree=2)
X_poly = poly_features.fit_transform(X_train)
poly_reg = LinearRegression()
poly_reg.fit(X_poly, y_train)
X_test_poly = poly_features.transform(X_test)
poly_reg_predictions = poly_reg.predict(X_test_poly)
poly_reg_mse = mean_squared_error(y_test, poly_reg_predictions)
print("Polynomial Regression Predictions:")
for prediction in poly_reg_predictions:
    print(prediction)

print("Polynomial Regression MSE:", poly_reg_mse)

Polynomial Regression Predictions:
6.595871394520145
5.019744046749111
6.512856637935329
5.718735946920788
5.327070388624634
6.185046873624742
5.903395079250913
6.295473672445951
5.017264039190195
6.24572055588942
5.694438116064703
5.427867663624056
7.224186147344881
4.777837912008181
5.721354664119644
5.624094092565429
4.992780811522607
5.5523444897226
6.130411719996118
5.689606495145199
5.885425681130073
6.204285309100669
6.171161286285496
6.882305531842576
5.724384882538288
5.515597691783114
5.577438344429538
5.617763360254685
5.267663868371528
6.791139934130115
5.03379858415974
6.0721695758929854
4.863594091732011
6.260257903029924
4.902112465795653
7.285714164485398
5.018319537344723
5.319636122214433
6.3522010480355675
5.300975378928342
5.67877013848738
5.3113789254402946
6.100838272628607
5.90556997931526
5.462411331716794
6.5179122079298395
6.02724465121355
7.013674467567398
5.831633123962092
5.973004280307578
4.961958489600875
6.69151043212878
6.409476593828003
5.1536992353758

**Ridge Regression**

In [77]:
ridge_reg = Ridge(alpha=0.5)
ridge_reg.fit(X_train, y_train)
ridge_reg_predictions = ridge_reg.predict(X_test)
ridge_reg_mse = mean_squared_error(y_test, ridge_reg_predictions)
print("Ridge Regression Predictions:")
for prediction in ridge_reg_predictions:
    print(prediction)

print("Ridge Regression MSE:", ridge_reg_mse)

Ridge Regression Predictions:
6.594656870975772
5.027872958342234
6.250325311204888
5.692035559544843
5.394830107963413
6.195696006577906
6.044514549627664
6.052898968410629
5.140333034807476
6.324183417539109
5.608330962391186
5.392989842939041
7.033192217619156
4.797824094873029
6.160948818073621
5.573417121516016
4.977454211091015
5.597092381846096
6.013689680054199
5.4541582159477215
6.062967662393444
6.12582685408664
6.151504305167251
6.7618463348912226
5.333330614052942
5.602225273857143
5.334690139752222
5.728365956328543
5.15306968455545
6.565974672184922
5.093282265689004
6.196194383990621
4.900543898101848
6.22766381407402
5.003259170425048
6.837250476497861
5.111516516675698
5.3467040146870435
6.271786264036967
5.668961785841732
5.973503993631379
5.319712778687045
6.107936074049723
6.0327363803538905
5.4808539960151474
6.33546398610067
5.703376105713087
6.886821259555434
6.042482544317529
6.100777214266726
4.976368736437065
6.44329869343731
6.260878455070028
5.01864510947298

**Lasso Regression**

In [78]:
lasso_reg = Lasso(alpha=0.5)
lasso_reg.fit(X_train, y_train)
lasso_reg_predictions = lasso_reg.predict(X_test)
lasso_reg_mse = mean_squared_error(y_test, lasso_reg_predictions)
print("Lasso Regression Predictions:")
for prediction in lasso_reg_predictions:
    print(prediction)

print("Lasso Regression MSE:", lasso_reg_mse)

Lasso Regression Predictions:
5.755526802961866
5.789314581691795
5.824890335540731
5.863788631947469
5.785969824017398
5.891265499949313
5.71726353265527
5.922931129172717
5.777132930246263
5.937950722932221
5.792960191078817
5.747728768827398
5.889789484101228
5.844154228313284
5.7648705120934265
5.770788696843285
5.779647805731015
5.640010091676334
5.868264123524136
5.717505832777185
5.85964731475832
5.8383753618614405
5.849345512697398
5.810516875439667
5.676485386863923
5.819030702381583
5.7006982638986745
5.8126168098295965
5.867790630838603
5.972729917266405
5.756910944544348
5.79841599070144
5.885255440933851
5.766739253919738
5.881114123744703
5.905062485540944
5.740991809446192
5.832214876989667
5.868102590109525
5.8487579306296675
5.773395446593639
5.861457504993923
5.833910977843072
5.810113041903143
5.917174477837468
5.882233750088675
5.858678114270661
5.881275657159312
5.90336638468754
5.7600719536875395
5.871608881198532
5.86340701352754
5.865587714624774
5.8383057027124

**Decision Tree Regression**

In [79]:
dt_reg = DecisionTreeRegressor()
dt_reg.fit(X_train, y_train)
dt_reg_predictions = dt_reg.predict(X_test)
dt_reg_mse = mean_squared_error(y_test, dt_reg_predictions)
print("Decision Tree Regression Predictions:")
for prediction in dt_reg_predictions:
    print(prediction)

print("Decision Tree Regression MSE:", dt_reg_mse)

Decision Tree Regression Predictions:
6.0
5.0
7.0
5.0
5.0
6.0
6.0
6.0
5.0
7.0
6.0
5.0
7.0
5.0
6.0
6.0
5.0
5.0
6.0
6.0
5.0
6.0
7.0
7.0
7.0
6.0
5.0
6.0
6.0
7.0
5.0
7.0
4.0
6.0
4.0
7.0
5.0
6.0
6.0
5.0
7.0
5.0
6.0
5.0
5.0
6.0
6.0
7.0
6.0
7.0
5.0
6.0
7.0
5.0
6.0
5.0
4.0
5.0
6.0
5.0
5.0
6.0
6.0
5.0
7.0
6.0
5.0
8.0
6.0
6.0
4.0
6.0
5.0
6.0
4.0
6.0
7.0
6.0
6.0
9.0
7.0
7.0
4.0
7.0
5.0
5.0
5.0
3.0
6.0
6.0
5.0
6.0
6.0
5.0
5.0
7.0
6.0
5.0
6.0
5.0
5.0
5.0
5.0
5.0
5.0
6.0
6.0
7.0
6.0
6.0
8.0
5.0
5.0
7.0
5.0
6.0
6.0
5.0
6.0
5.0
6.0
7.0
5.0
5.0
7.0
6.0
5.0
5.0
6.0
3.0
5.0
6.0
6.0
6.0
6.0
8.0
6.0
5.0
5.0
6.0
6.0
6.0
5.0
6.0
6.0
6.0
5.0
5.0
7.0
6.0
5.0
5.0
5.0
7.0
5.0
7.0
6.0
6.0
6.0
5.0
6.0
5.0
5.0
6.0
5.0
6.0
8.0
6.0
5.0
5.0
7.0
4.0
6.0
7.0
6.0
6.0
5.0
5.0
5.0
6.0
6.0
5.0
5.0
7.0
7.0
5.0
5.0
4.0
7.0
5.0
5.0
5.0
4.0
6.0
5.0
5.0
6.0
5.0
5.0
5.0
6.0
6.0
5.0
6.0
7.0
6.0
4.0
5.0
7.0
5.0
6.0
6.0
7.0
6.0
6.0
7.0
5.0
6.0
5.0
5.0
6.0
6.0
6.0
6.0
6.0
6.0
5.0
5.0
5.0
6.0
6.0
6.0
7.0
7.0
6.0
6.0
5.0
5.0
5.0
6.0
7.

**Random Forest Regression**

In [80]:
rf_reg = RandomForestRegressor()
rf_reg.fit(X_train, y_train)
rf_reg_predictions = rf_reg.predict(X_test)
rf_reg_mse = mean_squared_error(y_test, rf_reg_predictions)
print("Random Forest Regression Predictions:")
for prediction in rf_reg_predictions:
    print(prediction)

print("Random Forest Regression MSE:", rf_reg_mse)

Random Forest Regression Predictions:
6.44
5.22
6.96
5.36
5.1
6.09
5.8
6.07
5.1
6.31
5.77
5.23
6.96
5.16
5.99
5.68
5.05
5.36
6.33
5.83
5.19
6.09
6.84
7.03
6.8
5.88
5.07
5.91
5.88
6.5
5.23
6.61
4.69
6.19
5.0
6.96
5.33
5.79
6.48
5.52
5.92
5.52
6.0
6.3
5.41
6.5
6.11
6.81
5.93
6.71
4.93
6.54
6.57
5.27
5.83
5.27
6.13
5.17
5.44
5.34
5.24
5.47
5.46
5.48
6.84
6.59
5.11
7.31
5.95
6.16
4.93
5.74
4.96
6.15
6.07
6.22
7.24
6.54
6.15
7.16
6.44
6.25
5.26
6.06
5.1
5.09
5.2
5.35
5.28
6.0
5.06
6.16
6.16
5.47
4.97
6.04
5.89
5.64
4.88
5.53
5.28
5.12
5.06
4.58
5.07
6.2
6.32
5.58
6.08
6.01
7.06
5.02
5.3
5.53
5.05
6.24
6.22
5.53
5.99
5.05
5.91
6.54
4.94
5.12
6.11
5.62
5.26
6.01
6.24
4.65
5.19
6.2
5.93
5.95
5.52
7.35
6.27
5.52
5.38
6.32
5.5
6.07
5.21
6.04
6.06
5.81
6.18
5.34
6.8
5.41
4.8
5.13
5.15
6.76
5.53
5.93
5.79
5.92
6.45
5.04
6.35
5.36
5.09
6.28
5.48
6.58
7.27
5.77
5.01
5.38
6.58
4.76
5.67
5.83
6.0
5.43
5.07
5.12
5.06
6.77
6.25
5.04
4.98
6.77
6.99
5.65
5.72
4.49
6.63
5.15
5.38
5.01
5.04
5.29
6.3
5.99
5.

**Gradient Boosting Regression**

In [81]:
gb_reg = GradientBoostingRegressor()
gb_reg.fit(X_train, y_train)
gb_reg_predictions = gb_reg.predict(X_test)
gb_reg_mse = mean_squared_error(y_test, gb_reg_predictions)
print("Gradient Boosting Regression Predictions:")
for prediction in gb_reg_predictions:
    print(prediction)

print("Gradient Boosting Regression MSE:", gb_reg_mse)

Gradient Boosting Regression Predictions:
6.391106587635991
5.104391567621602
6.701389424009392
5.5160369858528275
5.597362398208081
6.322775086098275
5.650308135154024
6.239231757312434
5.259755840539898
6.349210168522346
5.465821485945398
5.2455100106570285
6.964126653483073
5.151149142026281
6.190914124746063
5.542858042736057
5.090229027274667
5.421638171102995
6.441711402554693
5.708967191125504
5.8479844058992585
6.394337829252167
6.335679533595921
6.962941011723846
5.6781622455187115
5.498678204534567
5.165521904619086
5.78853009430475
5.426787069473469
6.801060978666093
5.310615785182289
6.543637431390864
5.04178539898532
6.135403954992241
5.15430168279674
6.9086448429857725
5.238553937210967
5.4053751112573805
6.230987323912883
5.433453061899711
5.967298092402898
5.3123616942073975
6.209177324548453
6.094039941192186
5.3912356019541345
6.483950232679429
6.132528684766133
6.7795029645373495
5.9401491831171995
6.233419190305632
5.031034895573095
6.3664059053927
6.493292181788283

**Support Vector Regression**

In [82]:
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)
svr_reg = SVR()
svr_reg.fit(X_train_scaled, y_train)
svr_reg_predictions = svr_reg.predict(X_test_scaled)
svr_reg_mse = mean_squared_error(y_test, svr_reg_predictions)
print("Support Vector Regression Predictions:")
for prediction in svr_reg_predictions:
    print(prediction)

print("Support Vector Regression MSE:", svr_reg_mse)

Support Vector Regression Predictions:
6.24361732969501
5.023637691729004
6.949741301222114
5.425127776382699
5.24073929397189
6.283031151003822
5.505040786989854
6.156770065530697
4.852668592311581
6.294487372439458
5.504623903903536
5.4031374257823925
6.705967826966984
4.98276984715542
6.042577927602332
5.397454795206567
5.04349771125281
5.908853571276975
6.297674205648991
5.654602873824036
5.977741267716932
6.429405484695645
6.352281520084514
6.989882868602388
5.896319465630045
5.317453466065215
5.393461366954589
5.882035709009579
5.439876544572187
6.798485649737122
4.966544261336473
6.193939110659077
4.904602354579787
6.115956324564724
4.895056876879249
7.030020216438816
4.9072312771400535
5.571472178335825
6.20824746036463
5.832671500851404
5.787461728736026
5.301566291689745
6.100801866661623
5.883954955360881
5.568209489634027
6.545705017610451
6.331447964676512
6.778651848114335
5.815765841371529
5.978799579026461
4.9000029482488126
6.872727801599758
6.487757069168425
5.1484743

**Gradient Descent**

In [83]:
gd_reg = make_pipeline(StandardScaler(), SGDRegressor())
gd_reg.fit(X_train, y_train)
gd_reg_predictions = gd_reg.predict(X_test)
gd_reg_mse = mean_squared_error(y_test, gd_reg_predictions)
print("Gradient Descent Predictions:")
for prediction in gd_reg_predictions:
    print(prediction)

print("Gradient Descent MSE:", gd_reg_mse)

Gradient Descent Predictions:
6.577170695714823
5.045465711300978
6.255638867467357
5.667098678585538
5.362983397845601
6.242220515759218
6.023452932260613
6.073368284947986
5.1456603747086564
6.306065716765669
5.555944771619916
5.442004979824011
7.099628005451056
4.925624840567003
6.136302725045833
5.582750057006072
4.982390594394188
5.541546628839422
6.021255253576215
5.3801266183120795
6.028798730751284
6.141472110981621
6.200857247365347
6.775877840484393
5.26337706417867
5.550856104155661
5.313991626860445
5.733693426700446
5.197721278092241
6.6169555766711206
5.096977754708731
6.223010535126322
4.960333169979364
6.261162514185494
5.096773701719372
6.860010549087492
5.102416886170489
5.390503373356927
6.261611317014962
5.700182123216701
5.929610513556817
5.365793662271875
6.086703970041204
6.035183235421288
5.512216789439761
6.333222940477836
5.709378730424802
6.868423705729454
6.017107710591728
6.056876979656765
4.999494066350859
6.436420376450332
6.288934325647034
5.063838978978

**Performance** (A lower MSE indicates better performance)

In [84]:
print("Linear Regression MSE:", linear_reg_mse)
print("Polynomial Regression MSE:", poly_reg_mse)
print("Ridge Regression MSE:", ridge_reg_mse)
print("Lasso Regression MSE:", lasso_reg_mse)
print("Decision Tree Regression MSE:", dt_reg_mse)
print("Random Forest Regression MSE:", rf_reg_mse)
print("Gradient Boosting Regression MSE:", gb_reg_mse)
print("Support Vector Regression MSE:", svr_reg_mse)
print("Gradient Descent Regression MSE:", gd_reg_mse)

Linear Regression MSE: 0.5466964419580577
Polynomial Regression MSE: 0.5599678017873138
Ridge Regression MSE: 0.5470103581078078
Lasso Regression MSE: 0.7372478174064122
Decision Tree Regression MSE: 0.7238461538461538
Random Forest Regression MSE: 0.3693716153846154
Gradient Boosting Regression MSE: 0.4616660895198461
Support Vector Regression MSE: 0.4598981606935245
Gradient Descent Regression MSE: 0.548066480861596


In [85]:
# Initialize the best MSE variable
best_mse = float('inf') # It should be an initial value that is higher than the maximum possible MSE to ensure that the first model's MSE is assigned correctly.
best_model = None

# Compare the MSE values and find the best model
if linear_reg_mse < best_mse:
    best_mse = linear_reg_mse
    best_model = "Linear Regression"

if poly_reg_mse < best_mse:
    best_mse = poly_reg_mse
    best_model = "Polynomial Regression"

if ridge_reg_mse < best_mse:
    best_mse = ridge_reg_mse
    best_model = "Ridge Regression"

if lasso_reg_mse < best_mse:
    best_mse = lasso_reg_mse
    best_model = "Lasso Regression"

if dt_reg_mse < best_mse:
    best_mse = dt_reg_mse
    best_model = "Decision Tree Regression"

if rf_reg_mse < best_mse:
    best_mse = rf_reg_mse
    best_model = "Random Forest Regression"

if gb_reg_mse < best_mse:
    best_mse = gb_reg_mse
    best_model = "Gradient Boosting Regression"

if svr_reg_mse < best_mse:
    best_mse = svr_reg_mse
    best_model = "Support Vector Regression"

if gd_reg_mse < best_mse:
    best_mse = gd_reg_mse
    best_model = "Gradient Descent Regression"

print("The best model is:", best_model)


The best model is: Random Forest Regression
