### Importing the Libraries

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import date
%matplotlib inline

pd.set_option('precision', 3)

### Importing the dataset

In [None]:
labels = ('fixed_acidity','volatile_acidity','citric_acid','residual_sugar','chlorides','free_sulfur_dioxide','total_sulfur_dioxide','density','pH','sulphates','alcohol','quality')
df_red = pd.read_csv('Datasets/winequality-red.csv', header = 0, names = labels,sep = ';')

### Display the dataset

In [None]:
df_red.head(2)

In [None]:
def drop_constant_column(dataframe):
    return dataframe.loc[:, (dataframe != dataframe.iloc[0]).any()]

def cleanDf(df):
    columns_len = len(df.columns)
    # clear empty or "singular" columns
    df = df.dropna(axis='columns', how='all')
    df = drop_constant_column(df)
    len_before_drop = len(df)
    df = df.drop_duplicates()
    print(f"Loaded {len_before_drop} records, removed {len_before_drop - len(df)} duplicates, left {len(df)}, removed {columns_len-len(df.columns)} of {columns_len} columns")

    return df

#### Dropping the Duplicate Values and normalizing data

In [None]:
#loading data into MinMax scaler to change min=0 and max=1 of every column
from sklearn import preprocessing
from sklearn.preprocessing import MinMaxScaler
df_red = cleanDf(df_red)
scaler = MinMaxScaler()
scaler.fit(df_red)
d = scaler.transform(df_red)
scaled_df = pd.DataFrame(d, columns=labels)
scaled_df.head()

## Exploratory Data Analysis

In [None]:
def get_features(df, label, threshold):
    correlations = df.corr()[label].drop(label)
    features = correlations[correlations.abs() > threshold].index.tolist()
    df_filtered = df[features + [label]]

    return df_filtered, features, correlations

In [None]:
label = 'quality'
scaled_df_features, features, _ = get_features(scaled_df, label, 0.1)

In [None]:
plt.figure(figsize=(18,6))
sns.heatmap(scaled_df_features.corr(),annot=True)
plt.title('Correlation Matrix of Red Wine for Quality');

# Compare Predictions for Red Wine

### Splitting into Train and Test sets

In [None]:
from sklearn.model_selection import train_test_split

X = scaled_df[features]
y = scaled_df[label]
# split into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=1)

In [None]:
X.head(2)

In [None]:
y.head(2)

In [None]:
from sklearn.linear_model import LinearRegression
from sklearn import metrics

lr = LinearRegression()
lr.fit(X_train, y_train)

In [None]:
lr_train_pred = lr.predict(X_train)
print(f'Train Prediction: {lr_train_pred[:15].astype(float)}\n   Actual values: {y_train[:15].values}')
lr_test_pred = lr.predict(X_test)
print(f'Test  Prediction: {lr_test_pred[:15].astype(float)}\n   Actual values: {y_test[:15].values}')

In [None]:
lr_rmse_train = metrics.mean_squared_error(lr_train_pred, y_train) ** 0.5
print(f'LinearRegression train RMSE {lr_rmse_train:.2f}')

lr_rmse_test = metrics.mean_squared_error(lr_test_pred, y_test) ** 0.5
print(f'LinearRegression test  RMSE {lr_rmse_test:.2f}')

#### Using AUTO ML

In [None]:
import autosklearn
from sklearn.metrics import mean_absolute_error
from autosklearn.regression import AutoSklearnRegressor
from autosklearn.metrics import mean_absolute_error as auto_mean_absolute_error
print(f'autosklearn: {autosklearn.__version__}')

In [None]:
def prediction_out(name, estimator):
    train_pred = estimator.predict(X_train)
    test_pred = estimator.predict(X_test)
    rmse_train = metrics.mean_squared_error(train_pred, y_train) ** 0.5
    rmse_test = metrics.mean_squared_error(test_pred, y_test) ** 0.5
    print(f'{name} train RMSE {rmse_train:.3f}')
    print(f'{name} test  RMSE {rmse_test:.3f}')
    return rmse_train, rmse_test, train_pred, test_pred

In [None]:
# define search
model_mae = AutoSklearnRegressor(time_left_for_this_task=2*60, metric=auto_mean_absolute_error, per_run_time_limit=30, n_jobs=-1)

In [None]:
# perform the search
%time model_mae.fit(X_train, y_train)

In [None]:
# summarize
print(model_mae.sprint_statistics())
# evaluate best model_mae
ar_train_pred = model_mae.predict(X_train)
ar_test_pred = model_mae.predict(X_test)
mae = mean_absolute_error(y_test, ar_test_pred)
print(f"MAE: {mae:.2f}")

In [None]:
prediction_out('AutoSklearn', model_mae);

**KERAS**

In [None]:
from keras.models import Sequential
from keras.layers import Dense
import tensorflow as tf
from keras.wrappers.scikit_learn import KerasRegressor
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import KFold
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
import logging
tf.get_logger().setLevel(logging.ERROR)

In [None]:
# define base model wraper
def baseline_model_arg(activation_type):
    # create model
    def baseline_model():
        model = Sequential()
        model.add(Dense(12, input_dim=8, kernel_initializer='normal', activation=activation_type))
        model.add(Dense(8, activation=activation_type))
        model.add(Dense(1, kernel_initializer='normal'))
        # Compile model
        model.compile(loss='mean_squared_error', optimizer='adam')
        return model
    return baseline_model

In [None]:
#define keras model function serealization
def KerasRMSE(epoch, batches, activator, verbosity=0):
    kfold_s = KFold(n_splits=10)
    estimator_ks = KerasRegressor(build_fn=baseline_model_arg(activator), epochs=epoch, batch_size=batches, verbose=verbosity)
    %time results_s = cross_val_score(estimator_ks, X, y, cv=kfold_s)
    callbacks_list = [
    tf.keras.callbacks.EarlyStopping(patience=2),
    tf.keras.callbacks.ModelCheckpoint(filepath='./models/model.{epoch:02d}-{val_loss:.2f}.h5'),
    tf.keras.callbacks.TensorBoard(log_dir=f'./logs/{date.today().strftime("%b-%d-%Y")}'),]
    estimator_ks.fit(X_train, y_train, validation_split=0.33, callbacks=callbacks_list)
                    
    return estimator_ks

In [None]:
activations_full = ['relu', 'sigmoid', 'softmax', 'softplus']
def KerasAuto(activators, epochs, verbosity=0):
    batches = 5
    estimator_ks_1 = KerasRMSE(epochs[0], batches, activators[0], verbosity)
    o1, o2, _, _ = prediction_out(f'keras {epochs[0]} {batches}', estimator_ks_1)
    for i, activator in enumerate(activators):
        for j, epoch in enumerate(epochs):
            if i == 0 and j == 0:
                continue
            estimator_ks_2 = KerasRMSE(epoch, batches, activator, verbosity)
            l1, l2, _, _ = prediction_out(f'keras {epoch} {batches}', estimator_ks_2)
            if l2 > o2 and abs(o2 - o1) < abs(l2 - l1):
                break
            if l2 < o2:
                o1 = l1
                o2 = l2
                estimator_ks_1 = estimator_ks_2
            print(f'{activator, epoch}')
    return estimator_ks_1

In [None]:
epochs = [5, 10, 20]
estimator_ks_1 = KerasAuto(activations_full, epochs)

In [None]:
prediction_out('linear', lr)
_, _, am_train_pred, am_test_pred = prediction_out('AutoSklearn', model_mae)
_, _, keras_train_pred, keras_test_pred = prediction_out('keras', estimator_ks_1)

In [None]:
import matplotlib.gridspec as gridspec

gs = gridspec.GridSpec(1, 3)
fig = plt.figure(figsize=(24, 8), dpi=80)
ax1 = fig.add_subplot(gs[0, 0])
ax1.scatter(y_test, lr_test_pred)
ax1.update(dict(title='Linear', xlabel='value', ylabel='prediction'))

ax2 = fig.add_subplot(gs[0, 1])
ax2.scatter(y_test, keras_test_pred)
ax2.update(dict(title='Keras', xlabel='value', ylabel='prediction'))

ax3 = fig.add_subplot(gs[0, 2])
ax3.scatter(y_test, am_test_pred)
ax3.update(dict(title='AutoML', xlabel='value', ylabel='prediction'))
plt.show()

**CONCLUSION:** The best results