In [5]:
import numpy as np
from sklearn.model_selection import train_test_split
import csv
import sys

In [None]:
def load_data(directory):
    data = np.array()
    return data

In [32]:
def load_data_shopping(directory):
    with open(directory) as f:
        reader = csv.DictReader(f)

        evidence = []
        labels = []

        months = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'June', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']

        for row in reader:
            row_evidence = [
                int(row["Administrative"]),
                float(row["Administrative_Duration"]),
                int(row["Informational"]),
                float(row["Informational_Duration"]),
                int(row["ProductRelated"]),
                float(row["ProductRelated_Duration"]),
                float(row["BounceRates"]),
                float(row["ExitRates"]),
                float(row["PageValues"]),
                float(row["SpecialDay"]),
                months.index(row["Month"]),
                int(row["OperatingSystems"]),
                int(row["Browser"]),
                int(row["Region"]),
                int(row["TrafficType"]),
                1 if row["VisitorType"] == "Returning_Visitor" else 0,
                1 if row["Weekend"] == "TRUE" else 0
            ]

            label = 1 if row["Revenue"] == "TRUE" else 0

            evidence.append(row_evidence)
            labels.append(label)

        return (evidence, labels)

In [1]:
def initialize_params(layers: int, n_neurons: list):
    if len(n_neurons) != layers:
        return "Invalid number of neurons per layer."
    weights = []
    bias = []
    for i in range(layers - 1):
        W = np.random.randn(n_neurons[i + 1], n_neurons[i])
        b = np.random.randn(n_neurons[i + 1], 1)
        weights.append(W)
        bias.append(b)
    return (weights, bias)

In [52]:
SUPPORTED_ACTIVATION_FUNCTIONS = ("ReLU", "Sigmoid")
SUPPORTED_LOSS_FUNCTIONS = ("MSE", "Cross-entropy")

def forward(X, weights, bias, functions):
    if len(functions) != len(weights):
        return "Invalid number of functions."
    A = []
    Z = []
    a = X
    for i in range(len(weights)):
        z = a.dot(weights[i].T) + bias[i].T
        Z.append(z)
        f = functions[i]
        if f not in SUPPORTED_ACTIVATION_FUNCTIONS:
            return "Function type '" + f + "' not supported."
        if f == "ReLU":
            a = ReLU(z)
        elif f == "Sigmoid":
            a = Sigmoid(z)
        elif f == "Softmax":
            a = Softmax(z)
        else:
            return "Specify activation function."
        A.append(a)
    return A, Z

def backward(A, W, Z, y, functions, loss):
    n_layers = len(A)
    gradients_w = []
    gradients_b = []
    for i in range(n_layers):
        print(n_layers)
        if functions[n_layers - i - 1] not in SUPPORTED_ACTIVATION_FUNCTIONS:
            return "Function type '" + functions[n_layers - i - 1] + "' not supported."
        if functions[n_layers - i - 1] == "ReLU":
            dadz = ReLU_derivative(Z[n_layers - i - 1])
        elif functions[n_layers - i - 1] == "Sigmoid":
            dadz = Sigmoid_derivative(Z[n_layers - i - 1])
        else:
            return "Invalid activation function type."
        if loss not in SUPPORTED_LOSS_FUNCTIONS:
            return "Loss function type '" + loss + "' not supported."
        if i == 0:
            if loss == "MSE":
                djda = MSE_derivative(A[n_layers - i - 1], y)
            elif loss == "Cross-entropy":
                djda = Cross_entropy_derivative(A[n_layers - i - 1], y)
            dz = djda * dadz
        else:
            dz = dz.dot(W[n_layers - i - 1].T) * dadz
        dw = A[n_layers - i - 2].T.dot(dz)
        db = np.mean(dz, axis=0, keepdims=True).T
        gradients_w.insert(0,dw)
        gradients_b.insert(0,db)
    return gradients_w, gradients_b

def Sigmoid(z):
    return 1 / (1 + np.exp(-z))

def Sigmoid_derivative(z):
    s = Sigmoid(z)
    return s * (1 - s)

def ReLU(z):
    return np.maximum(0, z)

def ReLU_derivative(z):
    return (z > 0).astype(float)

def Softmax(z):
    ez = np.exp(z - np.max(z))
    return ez / np.sum(ez)

def Softmax_derivative(z):
    S = Softmax(z)
    return S * (1 - S)


def MSE_derivative(a, y):
    return (a - y) / len(y)

def Cross_entropy_derivative(a, y):
    return a - y

In [28]:
def update_params(W, b, grads_w, grads_b, alpha):
    for i in range(len(W)):
        W[i] = W[i] - alpha*grads_w[i].T
        b[i] = b[i] - alpha*grads_b[i].T
    return W, b

In [59]:
#load data

def backward(A, W, Z, y, functions, loss):
    n_layers = len(W)
    gradients_w = []
    gradients_b = []
    
    # Start from output layer and move backwards
    for i in reversed(range(n_layers)):
        if i == n_layers - 1:  # Output layer
            if loss == "MSE":
                djda = MSE_derivative(A[-1], y)
            elif loss == "Cross-entropy":
                djda = Cross_entropy_derivative(A[-1], y)
            else:
                return "Invalid loss function"
            
            if functions[-1] == "ReLU":
                dadz = ReLU_derivative(Z[-1])
            elif functions[-1] == "Sigmoid":
                dadz = Sigmoid_derivative(Z[-1])
            elif functions[-1] == "Softmax":
                dadz = Softmax_derivative(Z[-1])
            else:
                return "Invalid activation function"
            
            dz = djda * dadz
        else:  # Hidden layers
            if functions[i] == "ReLU":
                dadz = ReLU_derivative(Z[i])
            elif functions[i] == "Sigmoid":
                dadz = Sigmoid_derivative(Z[i])
            elif functions[i] == "Softmax":
                dadz = Softmax_derivative(Z[i])
            else:
                return "Invalid activation function"
            
            dz = dz.dot(W[i+1]) * dadz
        
        # Calculate gradients
        if i == 0:
            a_prev = X
        else:
            a_prev = A[i-1]
            
        dw = a_prev.T.dot(dz) / len(y)
        db = np.mean(dz, axis=0, keepdims=True)
        
        gradients_w.insert(0, dw)
        gradients_b.insert(0, db)
    
    return gradients_w, gradients_b

""" evidence, labels = load_data_shopping("shopping.csv")
X_train, X_test, y_train, y_test = train_test_split(
    evidence, labels, test_size=0.3
)

y = np.eye(2)[y_train]
X = np.array(X_train) """

X = np.array([
    [0.5, 1.2, 0.8],
    [1.5, 0.3, 1.0]
])

y = np.eye(2)[np.array([0, 1])]

#define network
alpha = 0.1
epochs = 50
layers = 3
n_neurons = [len(X[0]), 3, 2]
functions = ["ReLU", "Softmax"]
loss = "MSE"

weights, bias = initialize_params(layers, n_neurons)

#A, Z = forward(X, weights, bias, functions)

# gradients_w, gradients_b = backward(A, weights, Z, y, functions, loss)

# W, b = update_params(weights, bias, gradients_w, gradients_b, alpha)


while epochs > 0:
    print(epochs)
    A, Z = forward(X, weights, bias, functions)
    gradients_w, gradients_b = backward(A, weights, Z, y, functions, loss)
    weights, bias = update_params(weights, bias, gradients_w, gradients_b, alpha)
    epochs -= 1

A, Z = forward(X, weights, bias, functions)
print(A[1])

50


ValueError: too many values to unpack (expected 2)