In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
import os

def rmse(y_true, y_pred):
    squared_errors = np.power(y_true - y_pred, 2)
    mean_squared_error = np.mean(squared_errors)
    return np.sqrt(mean_squared_error)
    
def r_squared(y_true, y_pred):
    sum_of_squared_residuals = np.sum(np.power(y_true - y_pred, 2))
    mean_y = np.mean(y_true)
    total_sum_of_squares = np.sum(np.power(y_true - mean_y, 2))
    if total_sum_of_squares == 0:
        return 1.0
    return 1 - (sum_of_squared_residuals / total_sum_of_squares)

def linear(x, derivative=False):
    if derivative:
        return 1.0
    return x

def ReLU(x, derivative=False):
    if derivative:
        return (x > 0).astype(float)

    return np.maximum(0, x)

def logistical(x, derivative=False):
    x = np.clip(x, -500, 500)
    s = 1.0/(1.0 + np.exp(-x))
    if derivative:
        return s*(1-s)
    return s

In [5]:
class NeuronLayer:

    # Create a NeuronLayer with:
    #     n_in input neurons
    #     n_out output neurons
    #     n_in * n_out connections     
    #   Obs: if l_type == "input" this is just a big identity matrix :D
    def __init__(
        self,
        n_in,                  # Previous Layer Size / Input size
        n_out,                 # Current Layer Size
        l_n,
    ):
        self.l_n = l_n
        self.l_size = n_out
        self.error = np.ndarray((n_out, 1))
        if l_n == 0:
            self.weights = None
            self.biases = None
            self.l_size = n_in
            self.Z = np.ndarray((n_in, 1))
            self.A = np.ndarray((n_in, 1))
            return
            
        self.Z = np.ndarray((n_out, 1))
        self.A = np.ndarray((n_out, 1))
        self.weights = np.random.normal(loc=0.0, scale=np.sqrt(2/n_in), size=(n_out, n_in))
        self.biases = np.zeros((n_out, 1))

    
    
    def get_weights(self):
        return self.weights

    def get_biases(self):
        return self.biases

    def set_weights(self, w):
        self.weights = w

    def set_biases(self, b):
        self.biases = b

    def update_weights(self, learning_rate, gradW):
        self.weights -= learning_rate*gradW

    def update_biases(self, learning_rate, gradB):
        self.biases -= learning_rate*gradB

    
    def propagate_foward(self, input, activation):
        self.Z = np.dot(self.weights, input) + self.biases
        self.A = activation(self.Z)
        return self.A

    
    def propagate_backward(self, next_layer, prev_layer, m, activation):
        self.error = np.dot(next_layer.get_weights().transpose(), next_layer.error) * activation(self.Z, derivative=True)
        dZ = self.error * 1.0
        self.gradW = np.dot(dZ, prev_layer.A.T) / m
        self.gradB = np.sum(dZ, axis=1, keepdims=True) / m

        return (self.gradW, self.gradB)
        
        

class NeuralNetwork:

    # Creates an empty model. Params:
    #     layer_sizes: array with the size of each layer [input, hidden_layer_1, hidden_layer_2, ..., output]
    def __init__(
        self,
        layer_sizes,
        activation = ReLU,
        final_activation = linear,
        learning_rate = 0.0001,
        model_name = "default",
    ):
        self.model_name = model_name
        self.activation = activation
        self.final_activation = final_activation
        #self.cost_function = cost_function
        self.learning_rate = learning_rate
        self.n_of_layers = len(layer_sizes)
        self.layers = [0] * self.n_of_layers
        self.layers[0] = NeuronLayer(layer_sizes[0], layer_sizes[0], 0)
        
        for i in range(1, self.n_of_layers):
            self.layers[i] = NeuronLayer(layer_sizes[i-1], layer_sizes[i], i)


    def set_lr(self, learning_rate):
        self.learning_rate = learning_rate

        
    def get_layer_params(self, layer):
        return (self.layers[layer].get_weights(), self.layers[layer].get_biases())

        
    def print_layer_params(self, layer):
        if layer == 0:
            print(f"Layer 0 has {self.layers[0].l_size} inputs!")
            return
            
        w, b = self.get_layer_params(layer)
        print(f"Weights: ({w.shape[0]} rows & {w.shape[1]} columns) \n", w)
        print(f"Biases: ({b.shape[0]}) \n", b)

    
    def save_parameters(self):
        dir_path = f"model_params_{self.model_name}"
        os.makedirs(dir_path, exist_ok=True)
        for layer in self.layers:
            if layer.l_n == 0:
                continue

            pd.DataFrame(layer.get_weights()).to_csv(f"{dir_path}/{self.model_name}_w_{layer.l_n}", index=False, header=False)
            pd.DataFrame(layer.get_biases()).to_csv(f"{dir_path}/{self.model_name}b_{layer.l_n}", index=False, header=False)


    def load_parameters(self, dir_path):
        for layer in self.layers:
            if layer.l_n == 0:
                continue
    
            w = pd.read_csv(f"{dir_path}/{dir_path[13:]}_w_{layer.l_n}",
                            header=None).values
            b = pd.read_csv(f"{dir_path}/{dir_path[13:]}_b_{layer.l_n}",
                            header=None).values
    
            layer.set_weights(w)
            layer.set_biases(b)

    
    def foward(self, input):

        if input.shape[0] != self.layers[0].l_size:
            print("input wrong!")
            return None
            
        A_i = []
        for layer in self.layers:
            if layer.l_n == 0:
                A_i = input
                layer.A = A_i
                continue

            if layer.l_n == self.n_of_layers - 1:
                A_i = layer.propagate_foward(A_i, self.final_activation)
                return A_i
            
            A_i = layer.propagate_foward(A_i, self.activation)

    def backpropagation(self, y, m):
        y = np.asarray(y)
        prediction = self.layers[-1].A   
        
        for layer in reversed(self.layers):
            if layer.l_n == 0:
                continue
                
            prev_layer = self.layers[layer.l_n-1]
            
            if layer.l_n == self.n_of_layers-1:
                layer.error  = prediction - y
                layer.gradW = np.dot(layer.error, prev_layer.A.transpose()) / m
                layer.gradB = np.sum(layer.error, axis=1, keepdims=True) / m
                continue
                
            next_layer = self.layers[layer.l_n+1]
            gradW, gradB = layer.propagate_backward(next_layer, prev_layer, m, self.activation)

        for layer in self.layers:
            if layer.l_n == 0:
                continue
            
            layer.update_weights(self.learning_rate, layer.gradW)
            layer.update_biases(self.learning_rate, layer.gradB)


    def train(self, train_df, train_sol, epochs = 100):
        for epoch in range(epochs):
          for i in range(train_df.shape[0]):
              self.foward(train_df.iloc[i].values.reshape(-1, 1))
              self.backpropagation(train_sol.iloc[i].values.reshape(-1, 1), 1)
            

    def test(self, test_df):
        r = []
        for i in range(test_df.shape[0]):
            self.foward(test_df.iloc[i].values.reshape(-1, 1))
            r.append(self.layers[-1].A.item())

        return r

In [3]:
# Importação do conjunto de dados
url = 'https://raw.githubusercontent.com/Krumpu/Homework_ICA/main/Data-Melbourne_F.csv'
df = pd.read_csv(url)
df = df.drop(columns=['BOD','month', 'day','year'])

df_y = df[['COD']].copy()
df_x = df.drop(columns=['COD'])

# Divisão dos dados (80% para treino e 20% para teste)
X_train, X_test, y_train, y_test = train_test_split(df_x, df_y, test_size=0.2, random_state=42)

def df_z_normalize(df):
    mean = df.mean(axis=0)
    std_dev = df.std(axis=0)

    std_dev[std_dev == 0] = 1.0 
    
    df_normalized = (df - mean) / std_dev
    
    return df_normalized, mean, std_dev

df_train_X, trxMean, trxstd = df_z_normalize(X_train)
df_train_Y, tryMean, trystd = df_z_normalize(y_train)
df_test_X = (X_test - trxMean) / trxstd
df_test_Y = (y_test - tryMean) / trystd


nn = NeuralNetwork([df_train_X.shape[1], 32, 64, 20, 1], final_activation=linear, learning_rate=0.0001, model_name="mk4")

# This may take a while.... like 90 seconds
nn.train(df_train_X, df_train_Y, epochs=200)
nn.set_lr(0.00001)
nn.train(df_train_X, df_train_Y, epochs=200)
r = nn.test(df_test_X)

pred = np.array(r).reshape(-1,1)
pred_unscaled = pred * trystd.values + tryMean.values

v_rmse , v_r_squared = rmse(y_test.values, pred_unscaled), r_squared(y_test.values, pred_unscaled)

print(v_rmse, v_r_squared)

89.43529830454 0.5670763736921033


In [4]:
nn.save_parameters()

In [9]:
mm = NeuralNetwork([df_train_X.shape[1], 32, 64, 20, 1], final_activation=linear, learning_rate=0.0001, model_name="mk5")
mm.load_parameters("model_params_mk4")
r = mm.test(df_test_X)

pred = np.array(r).reshape(-1,1)
pred_unscaled = pred * trystd.values + tryMean.values

v_rmse , v_r_squared = rmse(y_test.values, pred_unscaled), r_squared(y_test.values, pred_unscaled)

print(v_rmse, v_r_squared)

89.43529830453998 0.5670763736921034
