In [11]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import copy

In [12]:
def layer_sizes(X, Y, n_h):
    n_x = X.shape[0] # number of input features
    n_y = Y.shape[0]

    return (n_x, n_h, n_y)

In [13]:
def initialize_parameters(n_x, n_h, n_y):
    W1 = np.random.randn(n_h, n_x)
    b1 = np.zeros((n_h,1))
    W2 = np.random.randn(n_y, n_h)
    b2 = np.zeros((n_y,1))

    parameters = {"W1" : W1,
                  "b1" : b1,
                  "W2" : W2,
                  "b2" : b2}
    
    return parameters

In [14]:
def forward_propagation(X, parameters):

    W1 = parameters["W1"]
    b1 = parameters["b1"]
    W2 = parameters["W2"]
    b2 = parameters["b2"]

    Z1 = np.dot(W1, X) + b1 # n_h, m
    A1 = np.maximum(0, Z1)
    Z2 = np.dot(W2, A1) + b2 # n_y, m
    A2 = Z2

    cache = {"Z1": Z1,
             "A1": A1,
             "Z2": Z2,
             "A2": A2}
    
    return A2, cache

In [15]:
def compute_cost(A2, Y):

    m = Y.shape[1]

    MSE = (1/m)*np.dot((A2-Y), (A2-Y).T)
    cost = np.squeeze(MSE)

    return cost

In [16]:
def backward_propagation(parameters, cache, X, Y):

    m = X.shape[1]

    W1 = parameters["W1"]
    W2 = parameters["W2"]

    A1 = cache["A1"]
    Z1 = cache["Z1"]
    A2 = cache["A2"]

    dZ2 = (2/m)*(A2-Y)
    dW2 = (2/m)*np.dot((A2-Y), A1.T)
    db2 = (2/m)*np.sum((A2-Y),axis=1, keepdims=True)
    dZ1 = (2/m)*np.dot(W2.T, A2-Y)*np.where(Z1 >= 0, 1, 0)
    dW1 = np.dot(dZ1, X.T)
    db1 = np.sum(dZ1, axis=1, keepdims=True)

    grads = {"dW1": dW1,
             "db1": db1,
             "dW2": dW2,
             "db2": db2}
    
    return grads

In [17]:
def update_parameters(parameters, grads, learning_rate):

    W1 = copy.deepcopy(parameters["W1"])
    b1 = copy.deepcopy(parameters["b1"])
    W2 = copy.deepcopy(parameters["W2"])
    b2 = copy.deepcopy(parameters["b2"])

    dW1 = grads["dW1"]
    db1 = grads["db1"]
    dW2 = grads["dW2"]
    db2 = grads["db2"]

    W1 -= learning_rate*dW1
    b1 -= learning_rate*db1
    W2 -= learning_rate*dW2
    b2 -= learning_rate*db2

    parameters = {"W1": W1,
                  "b1": b1,
                  "W2": W2,
                  "b2": b2}
    
    return parameters

In [18]:
def optimise(X, Y, n_h, num_iterations):
    np.random.seed(3)

    n_x = layer_sizes(X, Y, n_h)[0]
    n_y = layer_sizes(X, Y, n_h)[2]

    parameters = initialize_parameters(n_x, n_h, n_y)

    for i in range(0, num_iterations):
        A2, cache = forward_propagation(X, parameters)
        cost = compute_cost(A2, Y)
        grads = backward_propagation(parameters, cache, X, Y)
        parameters = update_parameters(parameters, grads, 0.01)

        if i%750 == 0:
            print(f"the error after {i} iterations = {cost}")
    
    return parameters


In [19]:
def predict(parameters, X):
    A2, cache = forward_propagation(X, parameters)
    
    return A2

In [20]:
scaler = StandardScaler()

df1 = pd.read_csv("winequality-red.csv", sep=";")

X_train1, X_test1, y_train1, y_test1 = train_test_split(
    df1.drop(columns=["quality"]).values, df1["quality"].values, 
    test_size=0.2,
    random_state=42
)

X_train1 = scaler.fit_transform(X_train1)  
X_test1 = scaler.transform(X_test1)    

X_train1 = X_train1.T
X_test1 = X_test1.T
y_train1 = y_train1.reshape(-1,1).T
y_test1 = y_test1.reshape(-1,1).T

parameters = optimise(X_train1, y_train1, 32, 6000)
predictions = predict(parameters, X_train1)
MSE = np.mean((predictions-y_train1)**2)
print(f"Error for red wine train {MSE}")
predictions = predict(parameters, X_test1)
MSE = np.mean((predictions-y_test1)**2)
print(f"Error for red wine test {MSE}")


df2 = pd.read_csv("winequality-white.csv", sep=";")

X_train2, X_test2, y_train2, y_test2 = train_test_split(
    df2.drop(columns=["quality"]).values, df2["quality"].values, 
    test_size=0.2,
    random_state=42
)

X_train2 = scaler.fit_transform(X_train2)  
X_test2 = scaler.transform(X_test2)        

X_train2 = X_train2.T
X_test2 = X_test2.T
y_train2 = y_train2.reshape(-1,1).T
y_test2 = y_test2.reshape(-1,1).T

parameters = optimise(X_train2, y_train2, 32, 6000)
predictions = predict(parameters, X_train2)
MSE = np.mean((predictions-y_train2)**2)
print(f"Error for white wine train {MSE}")
predictions = predict(parameters, X_test2)
MSE = np.mean((predictions-y_test2)**2)
print(f"Error for white wine test {MSE}")

the error after 0 iterations = 211.63052197734123
the error after 750 iterations = 0.5521634144214619
the error after 1500 iterations = 0.4520530325477524
the error after 2250 iterations = 0.41322055743940617
the error after 3000 iterations = 0.39165545943143004
the error after 3750 iterations = 0.3822945033267468
the error after 4500 iterations = 0.3758989491284076
the error after 5250 iterations = 0.3705737209042003
Error for red wine train 0.365636638367511
Error for red wine test 0.3871497455873727
the error after 0 iterations = 149.21024618513127
the error after 750 iterations = 0.624382659677012
the error after 1500 iterations = 0.5376495549262699
the error after 2250 iterations = 0.5158668625304956
the error after 3000 iterations = 0.5083542780398159
the error after 3750 iterations = 0.5038778510871976
the error after 4500 iterations = 0.5008251900610724
the error after 5250 iterations = 0.4984726049174764
Error for white wine train 0.49638452539951666
Error for white wine test 