In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import random
import copy

pd.set_option('display.max_rows', 2000)

In [None]:
red_wine_df = pd.read_csv("resources/winequality-red.csv", sep=';')

In [None]:
print("lenght =", len(red_wine_df))
red_wine_df.head()

### V.1 Exploring the green reds

#### a) Plot scatter matrix function

In [None]:
def plot_scatter_matrix(data, good_threshold, bad_threshold, rows=12, cols=12, save_plot=False, name=None):
    fig, axmat = plt.subplots(rows, cols, figsize=(20, 20))
    for axrow in axmat:
        for ax in axrow:
            ax.set_xticks([])
            ax.set_yticks([])
    plt.subplots_adjust(wspace=0, hspace=0)
    h_list = data.columns.values
    for i in range(len(h_list)):
        for j in range(len(h_list)):
            plt.sca(axmat[i][j])
            if (i == j):
                plt.text(0.5, 0.5, h_list[j].replace(' ', '\n'), fontsize=16, ha='center', va='center')
            else:
                plt.scatter(data[h_list[j]][data['quality'] > good_threshold],
                            data[h_list[i]][data['quality'] > good_threshold], s=3, c='g')
                plt.scatter(data[h_list[j]][data['quality'] < bad_threshold],
                            data[h_list[i]][data['quality'] < bad_threshold], s=3, c='m')
    if (save_plot):
        if (name == None):
            plt.savefig('Plt.png')
        else:
            plt.savefig("{}.png".format(name))
    plt.show()

In [None]:
plot_scatter_matrix(red_wine_df, 6, 5)

#### b) Which factors do you think will be most useful for distinguishing high vs low quality wines

At a first glance I see that pH and alcohol have a clearer division when it comes to quality. We can plot a decision boundary that will divide the data perfectly using these 2 features. This is the case when comparing high quality wines (8 or higher) vs low quality wines (3 or lower)

### V.2 Learning to perceptron

#### a)&b) Perceptron implementation and training

In [None]:
def select_features_labels(data, good_thres=7, bad_thres=4, feature_list=["pH", "alcohol"], labels=(1, 0)):
    tmp = data[feature_list + ["quality"]][(
        data['quality'] > good_thres) | (data['quality'] < bad_thres)]
    tmp['quality'].where(tmp['quality'] < bad_thres, labels[0], inplace=True)
    tmp['quality'].where(tmp['quality'] == 1, labels[1], inplace=True)
    features = tmp[feature_list].values
    labels = tmp['quality'].values
    return (features, labels)

In [None]:
class Perceptron:
    def __init__(self, input_size):
        self.weights = pd.DataFrame([random.uniform(-1, 1)])
        for i in range(input_size - 1):
            self.weights = pd.concat([self.weights, pd.DataFrame([random.uniform(-1, 1)])])
        self.weights = self.weights.values.flatten()
        self.bias = pd.DataFrame([random.uniform(-1, 1)]).values[0]
    def heaviside_step_fn(self, nb):
        if (nb >= 0):
            return (1)
        else:
            return (0)
    def forward_pass(self, X):
        return (self.heaviside_step_fn(sum(self.weights * X) + self.bias))
    def evaluate(self, features, labels):
        errors = 0
        for X, y in zip(features, labels):
            output = self.forward_pass(X)
            if (output != y):
                errors += 1
        return (errors)
    def update_weights(self, output, X, y, l_rate):
        error = y - output
        if (error != 0):
            self.bias += l_rate * error
            self.weights += error * l_rate * X
    def train(self, data, l_rate, epochs, thres=(7, 4), feature_list=["pH", "alcohol"]):
        features, labels = select_features_labels(data, thres[0], thres[1], feature_list=feature_list)
        if (epochs == 0):
            epochs = 20000
        perf = []
        perf.append((0, self.evaluate(features, labels), copy.copy(self.weights), copy.copy(self.bias)))
        for epoch in range(1, epochs + 1):
            for X, y in zip(features, labels):
                output = self.forward_pass(X)
                self.update_weights(output, X, y, l_rate)
            errors = self.evaluate(features, labels)
            perf.append((epoch, errors, copy.copy(self.weights), copy.copy(self.bias)))
            if (errors == 0):
                break
        self.perf = perf
        return (perf)

#### c) Plot perceptron performance

In [None]:
def plot_perceptron_performance(wine_data, performance, good_threshold,
                                bad_threshold, feature_list=["pH", "alcohol"], epoch=-1):
    fig, axvec = plt.subplots(1, 2, figsize=(15, 5))
    if (epoch >= 0):
        performance = performance[: epoch + 1]
    else:
        epoch = performance[-1][0]
    df = pd.DataFrame(performance)
    
    plt.sca(axvec[0])
    plt.plot(df[0], df[1], color="navy")
    plt.xlabel('epoch')
    plt.ylabel('classification errors')
    plt.title('Error as a function of epoch')
    
    plt.sca(axvec[1])
    epsilon = wine_data[feature_list[0]].min() / 20
    epsilon_2 = wine_data[feature_list[1]].min() / 5
    xmin = wine_data[feature_list[0]].min() - epsilon
    xmax = wine_data[feature_list[0]].max() + epsilon
    ymin = wine_data[feature_list[1]].min() - epsilon_2
    ymax = wine_data[feature_list[1]].max() + epsilon_2
    axes = plt.gca()
    axes.set_xlim([xmin, xmax])
    axes.set_ylim([ymin, ymax])
    good_label = 'good wines (> {} score)'.format(good_threshold)
    bad_label = 'bad wines (< {} score)'.format(bad_threshold)
    good = plt.scatter(wine_data[feature_list[0]][wine_data['quality'] > good_threshold],
                wine_data[feature_list[1]][wine_data['quality'] > good_threshold], s=15, c='g', label=good_label)
    bad = plt.scatter(wine_data[feature_list[0]][wine_data['quality'] < bad_threshold],
                wine_data[feature_list[1]][wine_data['quality'] < bad_threshold], s=15, c='m', label=bad_label)
    x_plot = [xmin, xmax]
    w = performance[-1][2]
    b = performance[-1][3]
    print("learned weights", w)
    print("learned bias", b)
    y = [0, 0]
    y[0] = (-1 / w[1]) * (w[0] * x_plot[0] + b[0])
    y[1] = (-1 / w[1]) * (w[0] * x_plot[1] + b[0])
    d_boundary = plt.plot(x_plot, y, label="decision boundary", linestyle='dashed', color="navy")
    plt.fill_between(x_plot, ymin, y, alpha=0.2, color="m")
    plt.fill_between(x_plot, y, ymax, alpha=0.2, color="g")
    
    plt.xlabel(feature_list[0])
    plt.ylabel(feature_list[1])
    plt.title('Decision boundary on epoch {}'.format(epoch))
    plt.legend(loc=(1.01, 0.82))
    plt.show()

In [None]:
my_perceptron = Perceptron(2)
performance = my_perceptron.train(red_wine_df, l_rate=0.9, epochs=0)
plot_perceptron_performance(red_wine_df, performance, 7, 4)

#### d) Feature scaling for faster training

In [None]:
def normalize_data(wine_data):
    quality = wine_data["quality"]
    norm_wine_data = wine_data.drop(labels="quality", axis='columns')
    norm_wine_data = (norm_wine_data - norm_wine_data.mean()) / (norm_wine_data.max() - norm_wine_data.min())
    norm_wine_data["quality"] = quality
    return (norm_wine_data)

In [None]:
my_perceptron = Perceptron(2)
norm_perf = my_perceptron.train(normalize_data(red_wine_df), l_rate=0.9, epochs=0)
plot_perceptron_performance(normalize_data(red_wine_df), norm_perf, 7, 4)

### V.3 My fair ADALINE

#### a) Perceptrons don't work when the data is not linearly separable

In [None]:
my_perceptron = Perceptron(2)
performance = my_perceptron.train(red_wine_df, l_rate=0.9, epochs=0, thres=(6, 4))
plot_perceptron_performance(red_wine_df, performance, 6, 4)

#### b)&c) Implement an ADALINE with gradient descent and a training function

In [None]:
class Adaline:
    def __init__(self, input_size):
        self.input_size = input_size
        self.weights = pd.DataFrame([random.uniform(-1, 1)])
        for i in range(input_size - 1):
            self.weights = pd.concat([self.weights, pd.DataFrame([random.uniform(-1, 1)])])
        self.weights = self.weights.values.flatten()
        self.bias = pd.DataFrame([random.uniform(-1, 1)]).values[0]
    def heaviside_step_fn(self, nb):
        if (nb >= 0):
            return (1)
        else:
            return (-1)
    def forward_pass(self, X):
        return (sum(self.weights * X) + self.bias)
    def evaluate(self, features, labels):
        errors = 0
        for X, y in zip(features, labels):
            output = self.heaviside_step_fn(self.forward_pass(X))
            if (output != y):
                errors += 1
        return (errors)
    def update_weights(self, w_gradient, b_gradient, l_rate):
        self.bias += b_gradient * l_rate
        self.weights += w_gradient * l_rate
    def next_batch(self, features, labels, batch_size):
        for i in range(0, len(labels), batch_size):
            yield features[:][i:i + batch_size], labels[i:i + batch_size]
    def batch_processing(self, features, labels, l_rate, batch_size):
        generator = self.next_batch(features, labels, batch_size)
        for batch_X, batch_y in generator:
            w_gradient = pd.DataFrame([0.] * self.input_size).values.flatten()
            b_gradient = 0.
            for X, y in zip(batch_X, batch_y):
                gradient = (y - self.forward_pass(X))
                w_gradient += gradient * X
                b_gradient += gradient
            self.update_weights(w_gradient, b_gradient, l_rate)
    def train(self, data, l_rate, epochs, thres=(7, 4), training="online",
                feature_list=["pH", "alcohol"], nb=38, labels=(1, -1)):
        if (training == "online"):
            batch_size = 1
        else:
            batch_size = 32
        features, labels = select_features_labels(data, thres[0], thres[1],
                                                  labels=labels, feature_list=feature_list)
        if (epochs == 0):
            epochs = 200
        perf = []
        perf.append((0, self.evaluate(features, labels), copy.copy(self.weights), copy.copy(self.bias)))
        for epoch in range(1, epochs + 1):
            errors = self.evaluate(features, labels)
            perf.append((epoch, errors, copy.copy(self.weights), copy.copy(self.bias)))
            self.batch_processing(features, labels, l_rate, batch_size)
            if (errors <= nb):
                break
        self.perf = perf
        return (perf)

In [None]:
my_adaline = Adaline(2)
performance = my_adaline.train(red_wine_df, l_rate=0.00099, epochs=0, thres=(6, 5), training="online")
plot_perceptron_performance(red_wine_df, performance, 6, 5)

In [None]:
my_adaline = Adaline(2)
performance = my_adaline.train(red_wine_df, l_rate=0.0003, epochs=0, thres=(6, 5), training="batch")
plot_perceptron_performance(red_wine_df, performance, 6, 5)

In [None]:
my_adaline = Adaline(2)
norm_perf = my_adaline.train(normalize_data(red_wine_df), l_rate=0.025, epochs=0, thres=(6, 5), training="batch")
plot_perceptron_performance(normalize_data(red_wine_df), norm_perf, 6, 5)

### V.4 Advanced wine sampling and resampling

#### a) Holdout method to partition data in training and validation

In [None]:
def partition_data(wine_data, validation_split):
    wine_data = wine_data.sample(frac=1).reset_index(drop=True) # shuffle data
    size = len(wine_data)
    validation_size = round(size * validation_split)
    training_size = size - validation_size
    return (wine_data.head(training_size), wine_data.tail(validation_size))

#### b) k-fold cross-validation dataset

In [None]:
def k_fold_validation(wine_data, k, shuffle=True):
    if (shuffle == True):
        wine_data = wine_data.sample(frac=1).reset_index(drop=True)
    k_val_sizes = [(len(wine_data) // k) + 1 if (i < len(wine_data) % k) else (len(wine_data) // k) for i in range(k)]
    k_tra_sizes = [len(wine_data) - j for j in k_val_sizes]
    k_folds = []
    i = 0
    for val_size in k_val_sizes:
        k_folds.append(wine_data[i:i + val_size])
        i += val_size
    k_tuples = []
    for i in range(k):
        training = pd.DataFrame()
        for j in range(k):
            if (i != j):
                training = pd.concat((training, k_folds[j]))
        k_tuples.append((training, k_folds[i]))
    return (k_tuples)

#### c) Effects on changing learning rate and epochs: k-fold cross-validation

In [None]:
def adaline_k_fold_validation(wine_data, l_rate, k, shuffle=True,
                              epochs=10, thres=(6, 5)):
    if (epochs == 0):
        return(print("epochs can't be 0"))
    tmp = wine_data[['pH', 'alcohol', 'quality']][(
        wine_data['quality'] > thres[0]) | (wine_data['quality'] < thres[1])]
    tmp['quality'].where(tmp['quality'] < thres[1], 1, inplace=True) # Replace above good_quality_nb with 1
    tmp['quality'].where(tmp['quality'] == 1, 0, inplace=True) # and below bad_quality_nb 0
    k_folds = k_fold_validation(tmp, k, shuffle=shuffle)
    my_adaline = Adaline(2)
    
    errors = 0
    val_errors = 0
    for fold in k_folds:
        features, labels = select_features_labels(fold[0], thres[0], thres[1])
        val_features, val_labels = select_features_labels(fold[1], thres[0], thres[1])
        errors += my_adaline.evaluate(features, labels)
        val_errors += my_adaline.evaluate(val_features, val_labels)
    errors /= len(k_folds)
    val_errors /= len(k_folds)
    print("epoch 0: average training     errors: {:<4}/{:<4} => {:<2}%".format(errors, len(labels), round(errors / len(labels) * 100, 2)))
    print("         average validation   errors: {:<4}/{:<4} => {:<2}%".format(val_errors, len(val_labels), round(val_errors / len(val_labels) * 100, 2)))
    print("weights learned", my_adaline.weights)
    print("bias learned", my_adaline.bias)
    print()
    
    for epoch in range(epochs):
        errors = 0
        val_errors = 0
        for fold in k_folds:
            features, labels = select_features_labels(fold[0], thres[0], thres[1])
            val_features, val_labels = select_features_labels(fold[1], thres[0], thres[1])
            my_adaline.train(fold[0], l_rate=l_rate, epochs=1, thres=thres)
            e = my_adaline.evaluate(features, labels)
            val_e = my_adaline.evaluate(val_features, val_labels)
            errors += e
            val_errors += val_e
        errors /= len(k_folds)
        val_errors /= len(k_folds)
        print("epoch {}: average training     errors: {:<4}/{:<4} => {:<2}%".format(epoch + 1, errors, len(labels), round(errors / len(labels) * 100, 2)))
        print("         average validation   errors: {:<4}/{:<4} => {:<2}%".format(val_errors, len(val_labels), round(val_errors / len(val_labels) * 100, 2)))
        print("weights learned", my_adaline.weights)
        print("bias learned", my_adaline.bias)
        print()

##### Learning rate too high, it diverges instead of converging on a local minima

In [None]:
adaline_k_fold_validation(red_wine_df, l_rate=0.1, k=10, epochs=10)

##### Learning rate too small and few epochs it does not improve performance

In [None]:
adaline_k_fold_validation(red_wine_df, l_rate=0.0000001, k=10, epochs=3)

In [None]:
adaline_k_fold_validation(red_wine_df, l_rate=0.003, k=10, epochs=8)

##### If the learning rate is set well there is no need for many epochs

In [None]:
adaline_k_fold_validation(red_wine_df, l_rate=0.001, k=10, epochs=3)

### V.5 Adventures in the Nth dimension

#### a) Trying more and different chemical factors

In [None]:
feature_list = ["pH", "alcohol", "sulphates"]
my_adaline = Adaline(3)
perf = my_adaline.train(red_wine_df, l_rate=0.0003, epochs=2000,
                        thres=(6, 5), training="batch", feature_list=feature_list, nb=25)

In [None]:
for p in my_adaline.perf:
    print(p)

In [None]:
feature_list = ["pH", "alcohol", "sulphates", "chlorides"]
my_adaline = Adaline(4)
perf = my_adaline.train(red_wine_df, l_rate=0.0003, epochs=2000,
                        thres=(6, 5), training="batch", feature_list=feature_list, nb=25)

In [None]:
for p in my_adaline.perf:
    print(p)

In [None]:
feature_list = ["chlorides", "sulphates"]
my_adaline = Adaline(2)
perf = my_adaline.train(red_wine_df, l_rate=0.03, epochs=2000,
                        thres=(6, 5), training="batch", feature_list=feature_list, nb=40)
plot_perceptron_performance(red_wine_df, perf, 6, 5, feature_list=feature_list, epoch=2000)

#### b) Multiple dimensions decision boundary

In the case of 3 dimensions the decision boundary will be a plane that separates the data in a 3d space. In 4 or higher dimensions it will be a hyperplane 1 dimension lower than the space.

### V.6 Marvin's rebuttal

#### a) Pan-Galactic Gargle Blaset dataset

In [None]:
galactic_df = pd.read_csv("resources/Pan Galactic Gargle Blaster.csv", sep=";")

In [None]:
galactic_df.head()

In [None]:
galactic_df.describe()

In [None]:
plot_scatter_matrix(galactic_df, 5, 5, rows=3, cols=3)

In [None]:
def transform_data(galactic_data):
    galactic_data.loc[(galactic_data["wonderflonium"] > 3.756) &
                      (galactic_data["wonderflonium"] < 4.689), "wonderflonium"] /= 2
    galactic_data.loc[(galactic_data["fallian marsh gas"] > 3.770) & 
                      (galactic_data["fallian marsh gas"] < 4.689), "wonderflonium"] /= 2
    galactic_data.loc[(galactic_data["wonderflonium"] > 1.2), "wonderflonium"] += 10
    galactic_data.loc[(galactic_data["wonderflonium"] < 1.2), "wonderflonium"] += 5
    return (galactic_data)

In [None]:
galactic_df = pd.read_csv("resources/Pan Galactic Gargle Blaster.csv", sep=";")
galactic_data = transform_data(galactic_df)

In [None]:
plot_scatter_matrix(galactic_data, 5, 5, rows=3, cols=3)

In [None]:
galactic_df = pd.read_csv("resources/Pan Galactic Gargle Blaster.csv", sep=";")
galactic_data = transform_data(galactic_df)
feature_list = ["wonderflonium", "fallian marsh gas"]
my_adaline = Adaline(2)
perf = my_adaline.train(galactic_data, l_rate=0.0005, epochs=100,
                        thres=(5, 5), training="batch", feature_list=feature_list, nb=0)
plot_perceptron_performance(galactic_data, perf, 5, 5, feature_list=feature_list, epoch=100)