### V.1: Exploring the green reds

#### a) Writing a function that will plot a scatterplot matrix of the red wine data

In [None]:
%matplotlib inline

import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec

def plot_scatter_matrix(wine_data, good_threshold, bad_threshold, save_plot=False):
    good_df = wine_data[wine_data['quality'] > good_threshold]
    bad_df = wine_data[wine_data['quality'] < bad_threshold]
    cols = wine_data.columns;
    size = len(cols);
    fig = plt.figure(figsize=(20,20))
    gs1 = gridspec.GridSpec(size, size)
    gs1.update(wspace=0, hspace=0)
    for i in range(size):
        for j in range(size):
            if (i == j):
                ax = fig.add_subplot(gs1[i,j], xticks=[], yticks=[])
                ax.text(0.5, 0.5, cols[i], horizontalalignment='center',
                        verticalalignment='center')
                continue ;
            ax = fig.add_subplot(gs1[i,j], xticks=[], yticks=[])
            ax.scatter(good_df[cols[i]], good_df[cols[j]], c='#D41159')
            ax.scatter(bad_df[cols[i]], bad_df[cols[j]], c='#1A85FF')
    if (save_plot):
        fig.savefig('scatter_matrix.png')
    plt.show()
    
df = pd.read_csv('./winequality-red.csv', sep=';')
good, bad = 7, 4
plot_scatter_matrix(df, 7, 4, False)

#### b) Useful factors for my perceptron to distinguish high-quality from low-quality wines

According to the plot, the most useful features to train my perceptron are pH and alcohol. 

### V.2: Learning to perceptron

#### a) & b) Perceptron implementation

In [None]:
import pandas as pd

def prep_train_data(df:'pd.DataFrame', good_threshold:'int',
                    bad_threshold:'int', features:'list', randomise:'bool'=True) -> 'pd:DataFrame':
    good_df, bad_df = df[df['quality'] > good_threshold], df[df['quality'] < bad_threshold]
    good_df, bad_df = good_df[features], bad_df[features]
    if (randomise):
        train_df = pd.concat([good_df, bad_df], axis = 0).sample(frac=1)
    else:
        train_df = pd.concat([good_df, bad_df], axis = 0)
    train_df['quality'] = train_df['quality'].apply(lambda x: 1 if x > good_threshold else -1)
    return (train_df)

In [None]:
import random

'''
    X: feature matrix
    xi: ith feature vector (row vector)
    y: class labels vector (column vector)
'''
class Perceptron(object):
    
    def __init__(self, bias:'float'=1.0, seed:'int'=42):
        self.bias = bias
        self.seed = seed

    def net_input(self, xi:'numpy.ndarray') -> 'float':
         return sum([x * w for x, w in zip(xi, self.weights[1:])]) + self.weights[0]
    
    def activation(self, xi:'numpy.ndarray') -> 'float':
        return 1.0 if self.net_input(xi) >= 0.0 else -1.0
    
    def predict(self, X:'numpy.ndarray') -> 'numpy.ndarray':
        return [self.activation(xi) for xi in X]

    def init_weights(self, X:'numpy.ndarray') -> None:
        random.seed(self.seed)
        self.weights = [random.randrange(100)/10000 * (-1)**(i%2) for i in range(len(X[0]) + 1)]
        self.weights[0] = self.bias

    def update_weights(self, xi, error) -> None:
        self.weights[0] += self.eta * error
        for k in range(len(self.weights) - 1):
            self.weights[k + 1] += self.eta * error * xi[k]
    
    def train(self, X:'numpy.ndarray', y:'numpy.ndarray', eta:'float', n_epochs:'int') -> 'list':
        self.init_weights(X)
        self.eta = eta
        performance = []
        until_fit = True if 0 == n_epochs else False
        n_epochs = n_epochs if n_epochs > 0 else sys.maxsize
        for i_epoch in range(n_epochs):
            n_errors = 0
            for i_x in range(len(X)):
                activation = self.activation(X[i_x])
                error = y[i_x] - activation
                n_errors += abs(error)#**2
                self.update_weights(X[i_x], error)
            performance.append((i_epoch, n_errors, self.weights, self.weights[0]))
            if not n_errors and until_fit:
                break
        return performance

#### c) Plot the perceptron's performance

In [None]:
import matplotlib.pyplot as plt

def plot_performance(performance:'tuple', wine_data:'pd.DataFrame', 
                     good_threshold:'int', bad_threshold:'int',
                    epoch:'int'=-1, save_plot:'bool'=False) -> None:
    if epoch >= performance[-1][0]: epoch = performance[-1][0]
    if epoch < -performance[-1][0] - 1: epoch = -performance[-1][0] - 1
    n_errors = [t[1] for t in performance]
    n_epochs = [t[0] for t in performance]
    fig = plt.figure(figsize=(20,5))
    ax = fig.add_subplot(1,2,1, title='Errors as a function of epoch',
                        xlabel='epoch', ylabel='classification error')
    ax.plot([t[0] for t in performance], [t[1] for t in performance])
    ax = fig.add_subplot(1,2,2, title='Decision boundary on epoch {}'.format(
        epoch if epoch >= 0 else len(performance)), xlabel=wine_data.columns[0], ylabel=wine_data.columns[1])
    perf_at = performance[epoch];
    good_df = wine_data[wine_data['quality'] > good_threshold]
    bad_df = wine_data[wine_data['quality'] < bad_threshold]
    ax.scatter(good_df.iloc[:,0], good_df.iloc[:,1], c='#D41159', label='good (>%d score)'%good_threshold)
    ax.scatter(bad_df.iloc[:,0], bad_df.iloc[:,1], c='#1A85FF', label='bad (<%d score)'%bad_threshold)
    w1, w2, b = perf_at[2][1], perf_at[2][2], perf_at[2][0]
    y = lambda x: (-(b / w2) / (b / w1))*x + (-b / w2)
    offset = 0.1
    x_min, x_max = wine_data.iloc[:,0].min() - offset, wine_data.iloc[:,0].max() + offset
    y_min, y_max = wine_data.iloc[:,1].min() - offset, wine_data.iloc[:,1].max() + offset
    ax.plot([x_min,x_max], [y(x_min), y(x_max)], label='decision boundary', linestyle='--')
    ax.fill_between([x_min,x_max], [y(x_min), y(x_max)], y_min, alpha=0.1, color='#D41159')
    ax.fill_between([x_min,x_max], [y(x_min), y(x_max)], y_max, alpha=0.1, color='#1A85FF')
    plt.axis([x_min, x_max, y_min, y_max])
    plt.legend()
    
df = pd.read_csv('./winequality-red.csv', sep = ';')
train_df = prep_train_data(df, 7, 4, ['alcohol', 'pH', 'quality'], randomise=True)
X = train_df.iloc[:,:-1].values
y = train_df.iloc[:,-1].values
perceptron = Perceptron()
performance = perceptron.train(X, y, 0.5, 0)
train_df['quality'] = df['quality']
plot_performance(performance, train_df, 7, 4)    

#### d) Feature scaling 

In [None]:
df = pd.read_csv('./winequality-red.csv', sep = ';')
train_df = prep_train_data(df, 7, 4, ['alcohol', 'pH', 'quality'])
perceptron = Perceptron()

#   raw data
X = train_df.iloc[:,:-1].values
y = train_df.iloc[:,-1].values
performance = perceptron.train(X, y, 0.5, 0)
train_df['quality'] = df['quality']
plot_performance(performance, train_df, 7, 4)
    
#    data normalisation
norm_df = (train_df - train_df.mean()) / (train_df.max() - train_df.min())
norm_df['quality'] = df['quality'].apply(lambda x: 1 if x > 7 else -1)
X = norm_df.iloc[:,:-1].values
y = norm_df.iloc[:,-1].values
performance = perceptron.train(X, y, 0.1, 0)
norm_df['quality'] = df['quality']
plot_performance(performance, norm_df, 7, 4, -1)
    
#   data standardisation
std_df = (train_df - train_df.min()) / train_df.std()
std_df['quality'] = df['quality'].apply(lambda x: 1 if x > 7 else -1)
X = std_df.iloc[:,:-1].values
y = std_df.iloc[:,-1].values
performance = perceptron.train(X, y, 0.1, 0)
std_df['quality'] = df['quality']
plot_performance(performance, std_df, 7, 4, -1)

### V.3: My fair ADALINE

#### a) Wines with scores less than 7 and greater than 4

In [None]:
train_df = prep_train_data(df, 6, 5, ['alcohol', 'pH', 'quality'])
X = train_df.iloc[:,:-1].values
y = train_df.iloc[:,-1].values
performance = perceptron.train(X, y, 0.1, 12000)
train_df['quality'] = df['quality']
plot_performance(performance, train_df, 6, 5, -1)

#### b) & c) Gradient descent and ADALINE implementation

In [None]:
import math

class Adaline(object):
    def __init__(self, bias:'float'=1.0, seed:'int'=42):
        self.bias = bias
        self.seed = seed
        
    def net_input(self, X:'numpy.ndarray') -> 'list':
        net_input = []
        for xi in X:
            net_input.append(sum([x*w for x,w in zip(xi, self.weights[1:])]) + self.weights[0])
        return net_input
    
    def activation(self, X:'numpy.ndarray') -> 'list':
        return self.net_input(X)
    
    def activation2(self, X:'numpy.ndarray') -> 'list':
        Z = self.net_input(X)
        return [1 / (1 + math.exp(-z)) for z in Z]

    def predict(self, X:'numpy.ndarray') -> 'list':
        return [1 if xi >= 0.0 else -1 for xi in self.activation(X)]

    def predict2(self, X:'numpy.ndarray') -> 'list':
        return [1 if xi >= .5 else -1 for xi in self.activation(X)]
    
    def init_weights(self, X:'numpy.ndarray') -> None:
        random.seed(self.seed)
        self.weights = [random.randint(1,100) / 10000 * (-1) ** (i % 2)
                        for i in range(len(X[0]) + 1)]
        self.weights[0] = self.bias
       
    def update_weights(self, X:'numpy.ndarray', errors:'list') -> None:
        self.weights[0] += self.eta * sum(errors)
        for wi in range(len(self.weights[1:])):
                for xi in range(len(X)):
                    self.weights[wi + 1] += self.eta * errors[xi] * X[xi][wi]
        
    def train(self, X:'np.ndarray', y:'np.ndarray', eta:'float',
              n_epochs:'int', batch:'bool'=True) -> 'list':
        self.eta = eta
        self.init_weights(X)
        until_fit = True if 0 == n_epochs else False
        n_epochs = n_epochs if n_epochs > 0 else sys.maxsize
        performance = []
        for i_epoch in range(n_epochs):
            if (batch):
                output = self.activation(X)
                errors = (y - output)
                self.update_weights(X, errors)
                cost = sum([x**2 for x in errors]) / 2.0
            else:
                errors = []
                for xi, target in zip(X, y):
                    output = self.net_input([xi])
                    error = target - output
                    errors.append(error[0])
                    self.update_weights([xi], error)
                cost = sum(((y - self.activation(X))**2)) / 2.0
            performance.append((i_epoch, cost, self.weights, self.weights[0]))
            if (0 == sum(y - self.predict(X)) and until_fit):
                break
        return performance                      

#### d) Find a good learning rate for ADALINE and plot the number of classifications errors vs epochs 

In [None]:
df = pd.read_csv('./winequality-red.csv', sep = ';')
train_df = prep_train_data(df, 7, 4, ['alcohol', 'pH', 'quality'], randomise=True)
adaline = Adaline(seed=55)
X = train_df.iloc[:,:-1].copy().values
y = train_df.iloc[:,-1].copy().values
train_df['quality'] = df['quality']
performance = adaline.train(X, y, 0.01, 10)
plot_performance(performance, train_df, 7, 4)
performance = adaline.train(X, y, 0.0005, 0) 
plot_performance(performance, train_df, 7, 4)
performance = adaline.train(X, y, 0.0001, 0)
plot_performance(performance, train_df, 7, 4)

### V.4 Advanced wine sampling and resampling

#### a) Partitioning data into a training and validation set using the holdout method

In [None]:
def holdout(df:'pd.DataFrame', k:'int') -> 'tuple':
    k = k if k <= 1 and k > 0 else 1
    n_rows = len(df.iloc[:])
    k = int(n_rows * k)
    train_df = df.iloc[:k]
    validation_df = df.iloc[k:]
    return (train_df, validation_df)

#### b) Generate a k-fold cross validation dataset

In [None]:
'''
    k - the number of groups the given data is split into
    
    1. shuffle the data randomly
    2. split the data into k groups of approximately equal size
    the first fold is a validation set and the rest are training sets
'''
def k_fold(df:'pd.DataFrame', k:'int'=10, shuffle:'bool' = True) -> 'tuple':
    folds = []
    if (shuffle):
        df = df.sample(frac=1).reset_index(drop=True)
    n_per_fold = int(len(df.iloc[:]) / k)
    for i in range(k):
        folds.append(df.iloc[int(i*n_per_fold):int(n_per_fold)*(i+1)])
    kfolds = []
    for i in range(k):
        cp = folds[:]
        valid_df = cp.pop(i)
        train_df = pd.concat(cp)
        kfolds.append((train_df, valid_df))
    return kfolds

#### c) Evaluating Adaline via k-fold cross validation

In [None]:
def k_fold_validation(model, df:'pd.DataFrame', k:'int'=10,
                      n_epochs:'int'=0, eta:'int'=0.001) -> None:
    kfolds = k_fold(df, k)
    errors = []
    accuracy = 0
    for i in range(k):
        X = kfolds[i][0].iloc[:,:-1].values
        y = kfolds[i][0].iloc[:,-1].values
        model.train(X, y, eta, n_epochs)
        X = kfolds[i][1].iloc[:,:-1].values
        y = kfolds[i][1].iloc[:,-1].values
        predictions = model.predict(X)
        for target, pred in zip(y, predictions):
            accuracy += int(target == pred)
    print('{}'.format(list(df.columns)), end=' ')
    print('k: %d epochs: %d eta: %f accuracy: %f' %
          (k, n_epochs, eta, accuracy / (k * len(y))))

df = pd.read_csv('./winequality-red.csv', sep=';')
df = prep_train_data(df, 7, 4, ['alcohol','pH','quality'], randomise=False)
ada = Adaline()
k_fold_validation(ada, df, 10, eta=0.0001)
k_fold_validation(ada, df, 10, 500, eta=0.0001)
k_fold_validation(ada, df, 10, 1000, eta=0.0001)
k_fold_validation(ada, df, 10, eta=0.0005)
k_fold_validation(ada, df, 10, 500, eta=0.0005)

### V.5 Adventures in the Nth dimension

#### a) Train Adaline and Perceptron with different numbers and types of chemicals 

In [None]:
import pandas as pd
import matplotlib.pyplot as plt

df = pd.read_csv('./winequality-red.csv', sep=';')
perceptron = Perceptron()
adaline = Adaline()
columns = ['fixed acidity', 'volatile acidity', 'citric acid', 'quality']
train_df = prep_train_data(df, 7, 4, columns)
k_fold_validation(adaline, train_df, eta=0.0005, n_epochs=1000)
k_fold_validation(perceptron, train_df, eta=0.01, n_epochs=1000)

columns = ['residual sugar', 'density', 'sulphates', 'quality']
train_df = prep_train_data(df, 7, 4, columns)
k_fold_validation(adaline, train_df, eta=0.0005, n_epochs=1000)
k_fold_validation(perceptron, train_df, eta=0.01, n_epochs=1000)

columns = ['alcohol', 'pH', 'sulphates', 'quality']
train_df = prep_train_data(df, 7, 4, columns)
k_fold_validation(adaline, train_df, eta=0.0005, n_epochs=1000)
k_fold_validation(perceptron, train_df, eta=0.01, n_epochs=1000)

columns = list(df.columns)
train_df = prep_train_data(df, 7, 4, columns)
k_fold_validation(adaline, train_df, eta=0.00001, n_epochs=500)
k_fold_validation(perceptron, train_df, eta=0.01, n_epochs=500)

#### b) What does the decision boundary for N factors look like? 

As the decision boundary for N-feature data is (N-1)-dimensional hyperplane, for three-feature data it will be a plane. 

### V.6 Marvin's rebuttal

#### a) Classify the Pan-Galactic Gargle Blaster dataset

In [None]:
df = pd.read_csv('./Pan Galactic Gargle Blaster.csv', sep=';')
good = 5
bad = 6
plot_scatter_matrix(df, good, bad)
train_df = prep_train_data(df, good, bad, list(df.columns))
norm_df = (train_df - train_df.mean()) / (train_df.max() - train_df.min())
x1 = norm_df.iloc[:,0]
x2 = norm_df.iloc[:,1]
norm_df = norm_df.assign(r=pd.Series(x1*x1 + x2*x2).pow(1./2))
norm_df = norm_df.assign(phi=pd.Series([math.atan2(x, y) for x, y in zip(x1,x2)]))
norm_df = norm_df[['phi', 'r', 'quality']]
norm_df['quality'] = df['quality']
plot_scatter_matrix(norm_df, good, bad)
norm_df['quality'] = norm_df['quality'].apply(lambda x: 1 if x > good else -1)
X = norm_df.iloc[:,:-1].copy().values
y = norm_df.iloc[:,-1].copy().values
norm_df['quality'] = df['quality']
ada = Adaline()
performance = ada.train(X, y, 0.0001, 0)
plot_performance(performance, norm_df, good, bad)
perceptron = Perceptron()
performance = perceptron.train(X, y, 0.001, 0)
plot_performance(performance, norm_df, good, bad)