# Mini Project 6

Exploring Neural Networks and Learning Google Collab

## Wine Quality Regression

In [0]:
# imports and set-up for session

import tensorflow as tf
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from sklearn.model_selection import StratifiedShuffleSplit
from sklearn.metrics import mean_squared_error

## Kaggle Setup and Load Files from API

In [2]:
# Upload API key from local drive
from google.colab import files
files.upload()

# Kaggle API install
!pip install -q kaggle
!mkdir -p ~/.kaggle
!mv kaggle.json ~/.kaggle/
!chmod 600 /root/.kaggle/kaggle.json

# Download files
!mkdir wine
%cd wine
!kaggle competitions download -c wine-quality-linear-regression-2
!ls

Saving kaggle.json to kaggle.json
/content/wine
Downloading winequality-white-sample.csv to /content/wine
  0% 0.00/6.74k [00:00<?, ?B/s]
100% 6.74k/6.74k [00:00<00:00, 4.65MB/s]
Downloading winequality-white-testing.csv to /content/wine
  0% 0.00/55.7k [00:00<?, ?B/s]
100% 55.7k/55.7k [00:00<00:00, 39.9MB/s]
Downloading winequality-white-training.csv to /content/wine
  0% 0.00/227k [00:00<?, ?B/s]
100% 227k/227k [00:00<00:00, 72.3MB/s]
winequality-white-sample.csv   winequality-white-training.csv
winequality-white-testing.csv


## Load Training Set

In [0]:
training = pd.read_csv('winequality-white-training.csv', index_col=0).dropna()
train_X = training.drop('quality',axis=1).values
train_y = training['quality'].values

In [0]:
# normalize
norm_fac = np.max(train_X, axis=0).reshape(1, -1)
train_X /= norm_fac

## Define Tensorflow Network

In [0]:
def make_model_regression(learning_rate=0.0001):
  model = tf.keras.Sequential([
    tf.keras.layers.GaussianNoise(0.1),
    tf.keras.layers.Dense(640, activation=tf.nn.relu),
    tf.keras.layers.Dense(640, activation=tf.nn.relu),
    tf.keras.layers.GaussianDropout(0.1),
    tf.keras.layers.Dense(160, activation=tf.nn.relu),
    tf.keras.layers.Dense(160, activation=tf.nn.relu),
    tf.keras.layers.GaussianDropout(0.1),
    tf.keras.layers.Dense(40, activation=tf.nn.relu),
    tf.keras.layers.Dense(40, activation=tf.nn.relu),
#     tf.keras.layers.GaussianDropout(0.1),
    tf.keras.layers.Dense(1, activation=tf.nn.relu)
  ])
  model.compile(
    optimizer=tf.keras.optimizers.Nadam(lr=learning_rate),
    loss='mean_squared_error'
#     metrics=['accuracy']
  )
  return model

## Validate Model

In [0]:
def regression_cross_val_score(
  estimator,
  X,
  y,
  cv=10,
  scoring=None,
  fit_params={},
  convert=lambda x:x
):
  # annealer
  lr_annealing = tf.keras.callbacks.ReduceLROnPlateau(
      monitor='loss', patience=3, factor=0.5, min_lr=0.000001)
  
  score = 0
  for train, test in StratifiedShuffleSplit(cv).split(X,y):
    model = estimator()
    tf.random.set_random_seed(5)
    model.fit(
        X[train], 
        y[train], 
        validation_data=(X[test], y[test]),
        callbacks=[lr_annealing],
        **fit_params
    )
    s_part = mean_squared_error(y[test], convert(model.predict(X[test])))
    print(np.sqrt(s_part))
    score += s_part/cv
  return np.sqrt(score)  # root mean square error

In [26]:
for lr in [1e-3]:
  result = regression_cross_val_score(
      lambda : make_model_regression(lr),
      train_X,
      train_y,
      cv=4,
      convert=lambda x : np.minimum(np.maximum(0, x), 10),  # restrict to valid range
      fit_params={
          'epochs':40,
          'batch_size':64,
          'verbose':0
      }
  )
  print('lr', lr, ':', result)

0.7220789287426128
0.6990262797866341
0.7495910952064003
5.934903278325344
lr 0.001 : 3.0329440916482486


## Final Regression

Train, predict, save

In [7]:
test_X= pd.read_csv('winequality-white-testing.csv', index_col=0).values
test_pred = pd.read_csv('winequality-white-sample.csv', index_col=0)
print(test_X.shape, test_pred.shape)

(1000, 11) (1000, 1)


In [27]:
search_bounds = range(0, train_y.shape[0])

# annealer
lr_annealing = tf.keras.callbacks.ReduceLROnPlateau(
    monitor='loss', patience=3, factor=0.5, min_lr=0.000001)

model = make_model_regression(1e-3)
tf.random.set_random_seed(5)
model.fit(
    train_X[search_bounds], 
    train_y[search_bounds], 
    epochs=40, 
    callbacks=[lr_annealing]
)

Epoch 1/40
Epoch 2/40
Epoch 3/40
Epoch 4/40
Epoch 5/40
Epoch 6/40
Epoch 7/40
Epoch 8/40
Epoch 9/40
Epoch 10/40
Epoch 11/40
Epoch 12/40
Epoch 13/40
Epoch 14/40
Epoch 15/40
Epoch 16/40
Epoch 17/40
Epoch 18/40
Epoch 19/40
Epoch 20/40
Epoch 21/40
Epoch 22/40
Epoch 23/40
Epoch 24/40
Epoch 25/40
Epoch 26/40
Epoch 27/40
Epoch 28/40
Epoch 29/40
Epoch 30/40
Epoch 31/40
Epoch 32/40
Epoch 33/40
Epoch 34/40
Epoch 35/40
Epoch 36/40
Epoch 37/40
Epoch 38/40
Epoch 39/40
Epoch 40/40


<tensorflow.python.keras.callbacks.History at 0x7f6ae0009630>

In [0]:
pred = np.minimum(np.maximum(0, model.predict(test_X/norm_fac)), 10)

In [32]:
test_pred['quality'] = pred
test_pred.to_csv('wine-white-submission.csv', index=True)

!ls
files.download('wine-white-submission.csv')

winequality-white-sample.csv   winequality-white-training.csv
winequality-white-testing.csv  wine-white-submission.csv
