In [1]:
import numpy as np
import pandas as pd
from sklearn import metrics
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import KFold
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import Lasso
from sklearn.linear_model import Ridge
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import AdaBoostRegressor
from sklearn.ensemble import BaggingRegressor
from sklearn.ensemble import ExtraTreesRegressor
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.ensemble import RandomForestRegressor
from xgboost import XGBRegressor
from xgboost import XGBRFRegressor

In [18]:
red_wine = pd.read_csv("dataset/winequality-red.csv", sep= ";")

In [19]:
red_wine.head()

Unnamed: 0,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol,quality
0,7.4,0.7,0.0,1.9,0.076,11.0,34.0,0.9978,3.51,0.56,9.4,5
1,7.8,0.88,0.0,2.6,0.098,25.0,67.0,0.9968,3.2,0.68,9.8,5
2,7.8,0.76,0.04,2.3,0.092,15.0,54.0,0.997,3.26,0.65,9.8,5
3,11.2,0.28,0.56,1.9,0.075,17.0,60.0,0.998,3.16,0.58,9.8,6
4,7.4,0.7,0.0,1.9,0.076,11.0,34.0,0.9978,3.51,0.56,9.4,5


In [20]:
X = red_wine[["fixed acidity", "volatile acidity", "citric acid", "residual sugar",
       "chlorides", "free sulfur dioxide", "total sulfur dioxide", "density",
       "pH", "sulphates", "alcohol"]]
y = red_wine[["quality"]]

In [21]:
from sklearn.preprocessing import StandardScaler
scalar = StandardScaler()
X = scalar.fit_transform(X)
y = scalar.fit_transform(y)

In [22]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size= 0.2, random_state= 42)

In [23]:
Model = []
RMSE = []
MAE = []
MSE = []
R_Square = []

In [25]:
names = ["Linear Regression", "Ridge Regression", "Lasso Regression",
         "Decision Tree Regressor", "Random Forest Regressor", "Gradient Boosting Regressor",
         "Adaboost Regressor", "BaggingRegressor", "ExtraTreesRegressor","XGBRegressor", "XGBRFRegressor"]
models = [LinearRegression(), Ridge(), Lasso(), DecisionTreeRegressor(),
          RandomForestRegressor(), GradientBoostingRegressor(), 
          AdaBoostRegressor(), BaggingRegressor(), ExtraTreesRegressor(),XGBRegressor(), XGBRFRegressor()]

In [26]:
def evaluate(true, predicted):
    MAE.append(metrics.mean_absolute_error(true, predicted))
    MSE.append(metrics.mean_squared_error(true, predicted))
    RMSE.append(np.sqrt(metrics.mean_squared_error(true, predicted)))
    R_Square.append(metrics.r2_score(true, predicted))

In [27]:
def print_evaluate(true, predicted):  
    mae = metrics.mean_absolute_error(true, predicted)
    mse = metrics.mean_squared_error(true, predicted)
    rmse = np.sqrt(metrics.mean_squared_error(true, predicted))
    r2_square = metrics.r2_score(true, predicted)
    print("MAE:", mae)
    print("MSE:", mse)
    print("RMSE:", rmse)
    print("R2 Square", r2_square)

In [28]:
def fit_and_predict(name, model):
    variable_of_model = model
    variable_of_model.fit(X_train, y_train.ravel())
    pred = variable_of_model.predict(X_test)
    evaluate(y_test, pred)

In [29]:
for name, model in zip(names, models):
    fit_and_predict(name, model)

In [30]:
evaluation_dataframe = pd.DataFrame({"Model": names,
                                     "MAE": MAE,
                                     "MSE": MSE,
                                     "RMSE": RMSE,
                                     "R Squared": R_Square})

In [31]:
evaluation_dataframe = evaluation_dataframe.sort_values("R Squared")

In [32]:
evaluation_dataframe

Unnamed: 0,Model,MAE,MSE,RMSE,R Squared
3,Decision Tree Regressor,0.611594,1.035656,1.017672,-0.032891
2,Lasso Regression,0.848841,1.008285,1.004134,-0.005592
1,Ridge Regression,0.623743,0.598435,0.773586,0.403163
0,Linear Regression,0.623709,0.598418,0.773575,0.40318
6,Adaboost Regressor,0.628907,0.580483,0.761895,0.421067
5,Gradient Boosting Regressor,0.601593,0.559511,0.748005,0.441983
10,XGBRFRegressor,0.603533,0.550036,0.741644,0.451433
9,XGBRegressor,0.512384,0.52269,0.722973,0.478706
7,BaggingRegressor,0.532241,0.499464,0.706728,0.501869
4,Random Forest Regressor,0.533016,0.489687,0.699776,0.511621


In [33]:
white_wine = pd.read_csv("dataset/winequality-white.csv", sep= ";")

In [34]:
white_wine.head()

Unnamed: 0,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol,quality
0,7.0,0.27,0.36,20.7,0.045,45.0,170.0,1.001,3.0,0.45,8.8,6
1,6.3,0.3,0.34,1.6,0.049,14.0,132.0,0.994,3.3,0.49,9.5,6
2,8.1,0.28,0.4,6.9,0.05,30.0,97.0,0.9951,3.26,0.44,10.1,6
3,7.2,0.23,0.32,8.5,0.058,47.0,186.0,0.9956,3.19,0.4,9.9,6
4,7.2,0.23,0.32,8.5,0.058,47.0,186.0,0.9956,3.19,0.4,9.9,6


In [35]:
X = white_wine[["fixed acidity", "volatile acidity", "citric acid", "residual sugar",
       "chlorides", "free sulfur dioxide", "total sulfur dioxide", "density",
       "pH", "sulphates", "alcohol"]]
y = white_wine[["quality"]]

In [36]:
from sklearn.preprocessing import StandardScaler
scalar = StandardScaler()
X = scalar.fit_transform(X)
y = scalar.fit_transform(y)

In [37]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size= 0.2, random_state= 42)

In [38]:
Model = []
RMSE = []
MAE = []
MSE = []
R_Square = []

In [39]:
names = ["Linear Regression", "Ridge Regression", "Lasso Regression",
         "Decision Tree Regressor", "Random Forest Regressor", "Gradient Boosting Regressor",
         "Adaboost Regressor", "BaggingRegressor", "ExtraTreesRegressor","XGBRegressor", "XGBRFRegressor"]
models = [LinearRegression(), Ridge(), Lasso(), DecisionTreeRegressor(),
          RandomForestRegressor(), GradientBoostingRegressor(), 
          AdaBoostRegressor(), BaggingRegressor(), ExtraTreesRegressor(),XGBRegressor(), XGBRFRegressor()]

In [40]:
def evaluate(true, predicted):
    MAE.append(metrics.mean_absolute_error(true, predicted))
    MSE.append(metrics.mean_squared_error(true, predicted))
    RMSE.append(np.sqrt(metrics.mean_squared_error(true, predicted)))
    R_Square.append(metrics.r2_score(true, predicted))

In [41]:
def print_evaluate(true, predicted):  
    mae = metrics.mean_absolute_error(true, predicted)
    mse = metrics.mean_squared_error(true, predicted)
    rmse = np.sqrt(metrics.mean_squared_error(true, predicted))
    r2_square = metrics.r2_score(true, predicted)
    print("MAE:", mae)
    print("MSE:", mse)
    print("RMSE:", rmse)
    print("R2 Square", r2_square)

In [42]:
def fit_and_predict(name, model):
    variable_of_model = model
    variable_of_model.fit(X_train, y_train.ravel())
    pred = variable_of_model.predict(X_test)
    evaluate(y_test, pred)

In [43]:
for name, model in zip(names, models):
    fit_and_predict(name, model)

In [44]:
evaluation_dataframe = pd.DataFrame({"Model": names,
                                     "MAE": MAE,
                                     "MSE": MSE,
                                     "RMSE": RMSE,
                                     "R Squared": R_Square})

In [45]:
evaluation_dataframe = evaluation_dataframe.sort_values("R Squared")

In [46]:
evaluation_dataframe

Unnamed: 0,Model,MAE,MSE,RMSE,R Squared
2,Lasso Regression,0.762216,0.988967,0.994468,-0.001382
3,Decision Tree Regressor,0.546185,0.871815,0.93371,0.117241
1,Ridge Regression,0.662095,0.725718,0.851891,0.265172
0,Linear Regression,0.662038,0.725616,0.851831,0.265275
6,Adaboost Regressor,0.66961,0.702998,0.83845,0.288177
10,XGBRFRegressor,0.616557,0.612491,0.782618,0.37982
5,Gradient Boosting Regressor,0.605481,0.589388,0.767716,0.403213
9,XGBRegressor,0.502619,0.493866,0.702756,0.499935
7,BaggingRegressor,0.500439,0.490025,0.700018,0.503823
4,Random Forest Regressor,0.475861,0.449725,0.670615,0.54463
