# Use Polynomial Regression to predict winequality dataset

In [1]:
import numpy as np
from sklearn.model_selection import train_test_split
import pandas as pd
from sklearn import linear_model
from sklearn import preprocessing
from sklearn.preprocessing import PolynomialFeatures
from sklearn.metrics import mean_squared_error, r2_score

df = pd.read_csv('./dataset/winequality-red.csv')

y = df['quality']
X = df.drop('quality',axis=1)

poly = PolynomialFeatures(degree=2).fit(X)
X = poly.transform(X)

X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.3,random_state=1) #random_state 種子值


scaler = preprocessing.StandardScaler().fit(X_train)
X_train = scaler.transform(X_train)


model = linear_model.LinearRegression()
model.fit(X_train, y_train)

X_test = scaler.transform(X_test)
y_pred = model.predict(X_test)

# The mean squared error
print("Mean squared error: {}".format(mean_squared_error(y_test, y_pred)))
# Explained variance score: 1 is perfect prediction
print('R2 score: {}'.format(r2_score(y_test, y_pred)))

Mean squared error: 0.43087855884497245
R2 score: 0.29206509289756444


# Use XGBOOST to predict winequality dataset

In [2]:
import numpy as np
from sklearn.model_selection import train_test_split
import pandas as pd
from sklearn import preprocessing
from sklearn.metrics import mean_squared_error, r2_score
import xgboost as xgb

df = pd.read_csv('./dataset/winequality-red.csv')

y = df['quality']
X = df.drop('quality',axis=1)


X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.3,random_state=1) #random_state 種子值


scaler = preprocessing.StandardScaler().fit(X_train)
X_train = scaler.transform(X_train)


# build xgboost regressor (n_estimators=100, reg_lambda=1, gamma=0, max_depth=3)
model = xgb.XGBRegressor(
    n_estimators=100,
    reg_lambda=1,
    gamma=0,
    max_depth=3
)
model.fit(X_train, y_train)

X_test = scaler.transform(X_test)
y_pred = model.predict(X_test)

# The mean squared error
print("Mean squared error: {}".format(mean_squared_error(y_test, y_pred)))
# Explained variance score: 1 is perfect prediction
print('R2 score: {}'.format(r2_score(y_test, y_pred)))

Mean squared error: 0.40328127746423376
R2 score: 0.3374075181111206


# Use Adaboost regressor to predict winequality dataset

In [4]:
import numpy as np
from sklearn.model_selection import train_test_split
import pandas as pd
from sklearn import preprocessing
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.ensemble import AdaBoostRegressor

df = pd.read_csv('./dataset/winequality-red.csv')

y = df['quality']
X = df.drop('quality',axis=1)


X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.3,random_state=1) #random_state 種子值


scaler = preprocessing.StandardScaler().fit(X_train)
X_train = scaler.transform(X_train)

ensemble_size = 2000
# build adaboost regressor (n_estimators=2000)
model = AdaBoostRegressor(n_estimators=ensemble_size)
model.fit(X_train, y_train)

X_test = scaler.transform(X_test)
y_pred = model.predict(X_test)

# The mean squared error
print("Mean squared error: {}".format(mean_squared_error(y_test, y_pred)))
# Explained variance score: 1 is perfect prediction
print('R2 score: {}'.format(r2_score(y_test, y_pred)))

Mean squared error: 0.42416213028022304
R2 score: 0.3031002073966286
