In [None]:
from Code.KNearestNeighbors import Knn
from Code.PolynomialRegression import PolynomialRegression
import pandas as pd
import numpy as np
import math
import csv
import sys
import time

In [None]:
def readInData(filename):
    data = list()
    try:
        with open(filename, mode='r') as file:
            csvFile = csv.reader(file)

            for line in csvFile:
                list1 = list()

                try:
                    for value in line:
                        list1.append(float(value))
                    list1 = np.array(list1, dtype=np.float32)
                    data.append(list1)
                except(ValueError):
                   print('Could not covert to float')

    except(FileNotFoundError):
        print("File Not Found")
        sys.exit(1)
        
    data = np.array(data)
    return data[:,1], data[:,11]

In [None]:
red_wine_X, red_wine_y = readInData('Data/winequality-red.csv')

length_train_set = math.ceil(0.8 * len(red_wine_X))

train_X = red_wine_X[:length_train_set]
train_y = red_wine_y[:length_train_set]

test_X = red_wine_X[length_train_set:]
test_y = red_wine_y[length_train_set:]

folds = 6


In [None]:
d_max = 12

start = time.time()

reg = PolynomialRegression()
reg.fit(train_X.copy(), train_y.copy(), d_max, folds)
reg.determineWFinal(train_X.copy(), train_y.copy())
predictions = reg.predict_y(test_X.copy(), test_y.copy())
reg.Loss(predictions, test_y)

end = time.time()

runtime_1 = end-start

In [None]:
reg_rmses = pd.DataFrame(
    reg.avg_rmses,
    index=range(d_max+1),
    columns=["RMSE"])

reg_rmses

In [None]:
reg_final_values = pd.DataFrame({
    "d*": [reg.d_final],
    "w*": [reg.w_final],
    "Loss": [reg.loss]
})

reg_final_values.style \
    .set_caption("Polynomial Regresion Final Values") \
    .format(precision=7, decimal=".") \
    .relabel_index([""], axis=0)

In [None]:
neighbors_min = 1267
neighbors_max = 1280

start = time.time()

knn = Knn()

knn.fit(train_X.copy(), train_y.copy(), folds, neighbors_min, neighbors_max)
knn.Loss(train_X.copy(), train_y.copy(), test_X.copy(), test_y.copy())

end = time.time()

runtime_2 = end-start

In [None]:
knn_rmses = pd.DataFrame(
    knn.avg_rmses,
    index=range(neighbors_min, neighbors_max+1),
    columns=["RMSE"])

knn_rmses.style \
    .set_caption("K Nearest Neighbors RMSE's") \
    .format(precision=10, decimal=".") \
    
knn_rmses

In [None]:
knn_finals_values = pd.DataFrame({
    "k*": [knn.k_final+neighbors_min],
    "Loss": [knn.loss]
})

knn_finals_values.style\
    .format(precision=7, decimal=".")\
    .relabel_index([""], axis=0)

In [None]:
runtimes = pd.DataFrame({
    "Polynomial Regression": [runtime_1],
    "K Nearest Neighbors": [runtime_2]
})

runtimes.style\
    .set_caption("Runtime (seconds)")\
    .format(precision=7, decimal=".")\
    .relabel_index([""], axis=0)