# Dataset import and preparation
- https://www.kaggle.com/shelvigarg/wine-quality-dataset
- Refer to https://github.com/fenago/deeplearning/blob/main/tensorflow/003_TensorFlow_Classification.ipynb for detailed preparation instructions

In [None]:
import os
import numpy as np
import pandas as pd
import warnings
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' 
warnings.filterwarnings('ignore')

df = pd.read_csv('data/winequalityN.csv')
df.sample(5)

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler


# Prepare the data
df = df.dropna()
df['is_white_wine'] = [1 if typ == 'white' else 0 for typ in df['type']]
df['is_good_wine'] = [1 if quality >= 6 else 0 for quality in df['quality']]
df.drop(['type', 'quality'], axis=1, inplace=True)

# Train/test split
X = df.drop('is_good_wine', axis=1)
y = df['is_good_wine']
X_train, X_test, y_train, y_test = train_test_split(
    X, y, 
    test_size=0.2, random_state=42
)

# Scaling
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [None]:
X_train_scaled[:3]

<br>

# Training a model which finds the optimal learning rate

In [None]:
import tensorflow as tf
tf.random.set_seed(42)

- This will be the minimum and maximum values for our learning rate:

In [None]:
1e-3 * 10 ** (1 / 30), 1e-3 * 10 ** (100 / 30)

- You can pass it as a `LearningRateScheduler` callback when fitting the model:

In [None]:
initial_model = tf.keras.Sequential([
    tf.keras.layers.Dense(128, activation='relu'),
    tf.keras.layers.Dense(256, activation='relu'),
    tf.keras.layers.Dense(256, activation='relu'),
    tf.keras.layers.Dense(1, activation='sigmoid')
])

initial_model.compile(
    loss=tf.keras.losses.binary_crossentropy,
    optimizer=tf.keras.optimizers.Adam(),
    metrics=[
        tf.keras.metrics.BinaryAccuracy(name='accuracy')
    ]
)

initial_history = initial_model.fit(
    X_train_scaled,
    y_train,
    epochs=100,
    callbacks=[
        tf.keras.callbacks.LearningRateScheduler(
            lambda epoch: 1e-3 * 10 ** (epoch / 30)
        )
    ]
)

- The accuracy was terrible at the end - makes sense as our model had a huge learning rate
- Let's plot loss vs. accuracy vs. learning rate:

In [None]:
import matplotlib.pyplot as plt
from matplotlib import rcParams

rcParams['figure.figsize'] = (18, 8)
rcParams['axes.spines.top'] = False
rcParams['axes.spines.right'] = False

In [None]:
plt.plot(np.arange(1, 101), initial_history.history['loss'], label='Loss', lw=3)
plt.plot(np.arange(1, 101), initial_history.history['accuracy'], label='Accuracy', lw=3)
plt.plot(np.arange(1, 101), initial_history.history['lr'], label='Learning rate', color='#000', lw=3, linestyle='--')
plt.title('Evaluation metrics', size=20)
plt.xlabel('Epoch', size=14)
plt.legend()
plt.savefig('eval_vs_lr.jpg', dpi=300, bbox_inches='tight');

- Accuracy dipped significantly around epoch 50, then flattened, and dipped once again towards the end
- The exact opposite happened to loss
- Let's now plot the learning rate against loss:

In [None]:
learning_rates = 1e-3 * (10 ** (np.arange(100) / 30))
plt.semilogx(learning_rates, initial_history.history['loss'], lw=3, color='#000')
plt.title('Learning rate vs. loss', size=20)
plt.xlabel('Learning rate', size=14)
plt.ylabel('Loss', size=14)
plt.savefig('lr_vs_loss.jpg', dpi=300, bbox_inches='tight');

<br>

# Training a model with the optimal learning rate
- You're looking for a learning rate value that achieved minimum loss
- Looks like 0.007 works the best for this dataset
- Let's retrain the model:

In [None]:
model_optimized = tf.keras.Sequential([
    tf.keras.layers.Dense(128, activation='relu'),
    tf.keras.layers.Dense(256, activation='relu'),
    tf.keras.layers.Dense(256, activation='relu'),
    tf.keras.layers.Dense(1, activation='sigmoid')
])

model_optimized.compile(
    loss=tf.keras.losses.binary_crossentropy,
    optimizer=tf.keras.optimizers.Adam(learning_rate=0.007),
    metrics=[
        tf.keras.metrics.BinaryAccuracy(name='accuracy')
    ]
)

history_optimized = model_optimized.fit(
    X_train_scaled,
    y_train,
    epochs=100
)

- Susipiciously high training accuracy - possible overfit
- Let's plot loss vs. accuracy:

In [None]:
plt.plot(np.arange(1, 101), history_optimized.history['loss'], label='Loss', lw=3)
plt.plot(np.arange(1, 101), history_optimized.history['accuracy'], label='Accuracy', lw=3)
plt.title('Accuracy vs. Loss per epoch', size=20)
plt.xlabel('Epoch', size=14)
plt.legend()
plt.savefig('accuracy_per_epoch.jpg', dpi=300, bbox_inches='tight');

<br>
# Model evaluation on the test set
- Let's now make predictions, convert them to classes and print accuracy and confusion matrix:

In [None]:
predictions = model_optimized.predict(X_test_scaled)
predictions

In [None]:
prediction_classes = [1 if prob > 0.5 else 0 for prob in np.ravel(predictions)]
print(prediction_classes[:20])

In [None]:
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score

print(f'Accuracy on the test set: {accuracy_score(y_test, prediction_classes):.2f}')
print()
print('Confusion matrix:')
print(confusion_matrix(y_test, prediction_classes))

- The accuracy on the test set increased by 3% compared to the default learning rate (0.001) used in the previous notebook