In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

# Import Dataset

In [None]:
df=pd.read_csv('/kaggle/input/wine-quality-dataset/WineQT.csv')

In [None]:
df.head()

**Find null and duplicated values**

In [None]:
df=df.drop(['Id'],axis=1)

In [None]:
df.isna().sum()

In [None]:
df.duplicated().sum()

In [None]:
df=df.drop_duplicates()

In [None]:
df.describe()

**EDA and outlier detection**

In [None]:
import seaborn as sns
from scipy import stats
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings('ignore')

In [None]:
fig,ax = plt.subplots(12,2,figsize=(30,70))
for index,i in enumerate(df.columns):
    sns.distplot(df[i],ax=ax[index,0],color='green')
    stats.probplot(df[i],plot=ax[index,1])

In [None]:
plt.figure(figsize=(12, 5))
sns.boxplot(data=df[df.columns[0:4]])
plt.show()

In [None]:
plt.figure(figsize=(12, 5))
sns.boxplot(data=df[df.columns[4:8]])
plt.show()

In [None]:
plt.figure(figsize=(12, 5))
sns.boxplot(data=df[df.columns[8:]])
plt.show()

In [None]:
def outlier_detection(df, column):
    z_scores = np.abs(stats.zscore(df[column]))
    df = df[(z_scores < 3)]
    print(column,'has',len(np.where(z_scores>3)[0]),'outliers')
    return df

In [None]:
for column in df.columns:
    df = outlier_detection(df, column)

In [None]:
plt.figure(figsize=(12, 5))
sns.boxplot(data=df[df.columns[0:4]])
plt.show()

In [None]:
plt.figure(figsize=(12, 5))
sns.boxplot(data=df[df.columns[4:8]])
plt.show()

In [None]:
plt.figure(figsize=(12, 5))
sns.boxplot(data=df[df.columns[8:]])
plt.show()

**Correlation matrix**

In [None]:
plt.figure(figsize=(15,15))
sns.heatmap(df.corr(),linewidths=0.5,annot=True,cmap='rainbow')
plt.show()

In [None]:
from sklearn.model_selection import train_test_split 
from sklearn.pipeline import make_pipeline 
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from sklearn.linear_model import ElasticNet, Lasso,LinearRegression,RidgeCV
from catboost import CatBoostRegressor
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from xgboost import XGBRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import StackingRegressor
from sklearn.svm import SVR

In [None]:
X=df.drop(['quality'],axis =1)

In [None]:
y=df['quality']

In [None]:
X_train,X_test,y_train,y_test= train_test_split(X,y,test_size=0.2,random_state=11)

In [None]:
X_train.shape

In [None]:
X_test.shape

**Train model with standard scaler**

In [None]:
pipelines = {
    'en':make_pipeline(StandardScaler(), ElasticNet()),
    'lasso':make_pipeline(StandardScaler(), Lasso()),
    'Rcv':make_pipeline(StandardScaler(), RidgeCV()),
    'CatB':make_pipeline(StandardScaler(), CatBoostRegressor(eval_metric='RMSE',verbose=1000)),
    'lr':make_pipeline(StandardScaler(), LinearRegression()),
    'rf':make_pipeline(StandardScaler(), RandomForestRegressor()),
    'gb':make_pipeline(StandardScaler(), GradientBoostingRegressor()),
    'dtc':make_pipeline(StandardScaler(),DecisionTreeRegressor()),
    'xg':make_pipeline(StandardScaler(),XGBRegressor()),
    'svr':make_pipeline(StandardScaler(),SVR())
}

In [None]:
fit_models = {}
for algo, pipeline in pipelines.items():
    model = pipeline.fit(X_train, y_train)
    fit_models[algo] = model

In [None]:
from sklearn.metrics import mean_absolute_error, mean_squared_error
maes=[]
al=[]
for algo, model in fit_models.items():
    yhat = model.predict(X_test)
    al.append(algo)
    maes.append(mean_squared_error(y_test,yhat)**0.5)
    print(algo,'MEAN ABSOLUTE ERROR', mean_absolute_error(y_test,yhat))
    print(algo,'ROOT MEAN SQUARED ERROR',mean_squared_error(y_test,yhat)**0.5)

In [None]:
plt.figure(figsize=(5,5))
plt.xlabel('ML Algorithms...')
plt.ylabel('Root Mean Squared Errors...')
ax=sns.barplot(x=al,y=maes)
plt.show()

**Stacked regressor with Standard Scaler pipeline**

In [None]:
rcv = RidgeCV()
sv = SVR()
lr=LinearRegression()

stregr = StackingRegressor(estimators=[('svr', sv),('lr',lr),('rcv',rcv)],
                           final_estimator=lr)

pipeline = make_pipeline(
    StandardScaler(),
    stregr
)
pipeline.fit(X_train, y_train)

# Generate predictions on the test set
y_pred = pipeline.predict(X_test)

# Evaluate the model
print("Root Mean Squared Error: %.4f" % mean_squared_error(y_test,y_pred)**0.5)

In [None]:
al.append('stacked model')
maes.append(mean_squared_error(y_test,y_pred)**0.5)

In [None]:
for i in range(11):
    print("The RMSE of",al[i],'is',maes[i])

In [None]:
plt.figure(figsize=(12,5))
plt.xlabel('ML Algorithms...')
plt.ylabel('Root Mean Squared Errors...')
ax=sns.barplot(x=al,y=maes)
plt.show()

**Train model with min max scaler**

In [None]:
pipelines1 = {
    'en':make_pipeline(MinMaxScaler(), ElasticNet()),
    'lasso':make_pipeline(MinMaxScaler(), Lasso()),
    'Rcv':make_pipeline(MinMaxScaler(), RidgeCV()),
    'CatB':make_pipeline(MinMaxScaler(), CatBoostRegressor(eval_metric='RMSE',verbose=1000)),
    'lr':make_pipeline(MinMaxScaler(), LinearRegression()),
    'rf':make_pipeline(MinMaxScaler(), RandomForestRegressor()),
    'gb':make_pipeline(MinMaxScaler(), GradientBoostingRegressor()),
    'dtc':make_pipeline(MinMaxScaler(),DecisionTreeRegressor()),
    'xg':make_pipeline(MinMaxScaler(),XGBRegressor()),
    'svr':make_pipeline(MinMaxScaler(),SVR())
}

In [None]:
fit_model = {}
for algo, pipeline in pipelines1.items():
    model = pipeline.fit(X_train, y_train)
    fit_model[algo] = model

In [None]:
maes1=[]
al1=[]
for algo, model in fit_model.items():
    yhat = model.predict(X_test)
    al1.append(algo)
    maes1.append(mean_squared_error(y_test,yhat)**0.5)
    print(algo,'MEAN ABSOLUTE ERROR', mean_absolute_error(y_test,yhat))
    print(algo,'ROOT MEAN SQUARED ERROR',mean_squared_error(y_test,yhat)**0.5)

**Stacked regressor with Min Max Scaler pipeline**

In [None]:
rcv = RidgeCV()
sv = SVR()
lr=LinearRegression()

stregr1 = StackingRegressor(estimators=[('svr', sv),('rcv',rcv)],
                           final_estimator=lr)

pipeline = make_pipeline(
    MinMaxScaler(),
    stregr
)
pipeline.fit(X_train, y_train)

# Generate predictions on the test set
y_pred = pipeline.predict(X_test)

# Evaluate the model
print("Root Mean Squared Error: %.4f" % mean_squared_error(y_test,y_pred)**0.5)

In [None]:
al1.append('stacked model')
maes1.append(mean_squared_error(y_test,y_pred)**0.5)

In [None]:
for i in range(11):
    print("The RMSE of",al1[i],'is',maes1[i])

In [None]:
plt.figure(figsize=(12,5))
plt.xlabel('ML Algorithms...')
plt.ylabel('Root Mean Squared Errors...')
ax=sns.barplot(x=al1,y=maes1)
plt.show()

In [None]:
rcv = stregr1
sv = stregr
lr=LinearRegression()

stregr = StackingRegressor(estimators=[('Stack 1', stregr),('Stack 2', stregr1)],
                           final_estimator=lr)

pipeline = make_pipeline(
    StandardScaler(),
    stregr
)
pipeline.fit(X_train, y_train)

# Generate predictions on the test set
y_pred = pipeline.predict(X_test)

# Evaluate the model
print("Root Mean Squared Error: %.4f" % mean_squared_error(y_test,y_pred)**0.5)

**Stacked regressor with previous stacked regressors**

In [None]:
al1.append('Double stacked model')
maes1.append(mean_squared_error(y_test,y_pred)**0.5)

In [None]:
plt.figure(figsize=(8,5))
plt.xlabel('ML Algorithms...')
plt.ylabel('Root Mean Squared Errors...')
ax=sns.barplot(x=al1[10:],y=maes1[10:])
plt.show()

If you find the provided solution helpful, please consider upvoting it. Your feedback is appreciated!