In [1]:
import numpy as np
import pandas as pd

In [2]:
np.random.seed(20030403)

In [3]:
def linear(x):
    """ Liniowa funkcja aktywacji """
    return  x

In [4]:
def sigmoidal(x):
    """ sigmoidalna funkcja aktywacji """
    return 1/(1 + np.exp(-x))

In [5]:
print(linear(3))
print(sigmoidal(10))

3
0.9999546021312976


Działają jak należy :)

In [6]:
class MLPNoBackprop:
    def __init__(self, layers, weights, 
                 hidden_activation_function='sigmoid', 
                 output_activation_function='linear'):
        self.layers = layers
        self.weights = weights
        
        self.biases = [np.zeros((n,)) for n in layers[1:]]

        activation_functions = {"linear": linear, "sigmoid": sigmoidal}
        
        if hidden_activation_function in activation_functions:
            self.hidden_activation_function = activation_functions[hidden_activation_function]
        else:
            raise ValueError("Zla funkcja aktywacji, wybierz z:", activation_functions)
        
        if output_activation_function in activation_functions:
            self.output_activation_function = activation_functions[output_activation_function]
        else:
            raise ValueError("Zla funkcja aktywacji, wybierz z:", activation_functions)

    def forward(self, X):
        activation = X
        activations = [X]
        for i in range(len(self.weights) - 1):
            z = activation @ self.weights[i] + self.biases[i]
            activation = self.hidden_activation_function(z)
            activations.append(activation)
        z = activation @ self.weights[-1] + self.biases[-1]
        output = self.output_activation_function(z)
        activations.append(output)
        return activations

    def predict(self, X):
        return self.forward(X)[-1]
    
    def set_weights_and_biases(self, layer_idx, W, b):
        
        self.weights[layer_idx] = W
        self.biases[layer_idx] = b
    
    def mse(self, y_true, y_pred):
        return np.mean((y_true - y_pred) ** 2)


In [8]:
def drop_first_column(df):
    df.drop(df.columns[[0]], axis=1, inplace=True)

In [None]:

train_data_path = "regression/square-simple-training.csv"
test_data_path = "regression/square-simple-test.csv"
train_data = pd.read_csv(train_data_path)
test_data = pd.read_csv(test_data_path)
X_train, y_train = train_data.iloc[:, :-1].values, train_data.iloc[:, -1].values
X_test, y_test = test_data.iloc[:, :-1].values, test_data.iloc[:, -1].values



In [19]:
def random_weights(layers):
    return [np.random.randn(layers[i], layers[i+1]) for i in range(len(layers)-1)]

def random_biases(layers):
    return [np.random.randn(n) for n in layers[1:]]

In [59]:
input_dim = X_train.shape[1]
variants = [ # rozpratrze 3 mozliwosci - tak jak w leonie
    [input_dim, 5, 1],       
    [input_dim, 10, 1],
    [input_dim, 5, 5, 1]
]

In [61]:
num_iterations = 10000


best_weights_list = []
best_biases_list = []
mse_res = []

for i in range(len(variants)):
    layers = variants[i]
    mlp = MLPNoBackprop(layers, random_weights(layers))
    best_mse = float('inf')
    best_weights = None
    best_biases = None

    for j in range(num_iterations):
        weights = random_weights(layers)
        biases = random_biases(layers)
        
        for m in range(len(weights)):
            mlp.set_weights_and_biases(m, weights[m], biases[m])
        
        y_pred = mlp.predict(X_train)
        mse = mlp.mse(y_train, y_pred)
        
        if mse < best_mse:
            best_mse = mse
            best_weights = weights
            best_biases = biases

    for j in range(len(best_weights)):
        mlp.set_weights_and_biases(j, best_weights[j], best_biases[j])

    y_test_pred = mlp.predict(X_test)
    test_mse = mlp.mse(y_test, y_test_pred)

    print("Najlepszy MSE na zbiorze treningowym:", best_mse)
    print("MSE na zbiorze testowym:", test_mse)
    best_weights_list.append(best_weights)
    best_biases_list.append(best_biases)
    mse_res.append(test_mse)

print(mse_res)


Najlepszy MSE na zbiorze treningowym: 9767.819373204526
MSE na zbiorze testowym: 8257.811770030554
Najlepszy MSE na zbiorze treningowym: 9635.432495952991
MSE na zbiorze testowym: 8106.888014948694
Najlepszy MSE na zbiorze treningowym: 9681.277024217849
MSE na zbiorze testowym: 8151.6532844740195
[np.float64(8257.811770030554), np.float64(8106.888014948694), np.float64(8151.6532844740195)]


Słabe wyniki, spróbuję ręcznie wyszkolić lepiej

In [14]:
train_data_path = "regression/square-simple-training.csv"
test_data_path = "regression/square-simple-test.csv"
train_data = pd.read_csv(train_data_path)
test_data = pd.read_csv(test_data_path)
drop_first_column(train_data)
drop_first_column(test_data)
X_train, y_train = train_data.iloc[:, :-1].values, train_data.iloc[:, -1].values
X_test, y_test = test_data.iloc[:, :-1].values, test_data.iloc[:, -1].values


In [15]:
X_train.shape

(100, 1)

In [16]:
X_train

array([[-0.17154266],
       [ 0.02520055],
       [-1.36899138],
       [ 1.9073897 ],
       [ 0.01112937],
       [ 1.85151809],
       [ 1.60714217],
       [ 0.73992569],
       [ 1.89838314],
       [ 0.66593373],
       [-0.33300476],
       [-0.28638113],
       [-0.10530106],
       [ 1.24642471],
       [-1.36372228],
       [ 1.12078385],
       [ 0.87046891],
       [-0.90057484],
       [-0.58619213],
       [ 0.30044527],
       [ 0.86462546],
       [ 1.93986019],
       [ 1.15840494],
       [ 0.48270948],
       [ 1.47391401],
       [-0.83684123],
       [-0.55049685],
       [ 1.3985547 ],
       [ 0.92621687],
       [-0.65809341],
       [-1.34953921],
       [-1.00832317],
       [-0.74265105],
       [ 0.17789497],
       [-0.8090638 ],
       [ 1.01774543],
       [-1.47240341],
       [-0.18578512],
       [ 0.30042698],
       [-1.49450306],
       [ 0.53561401],
       [-0.94733177],
       [-0.24340093],
       [ 0.75971157],
       [ 1.21538177],
       [ 0

In [17]:
X_train.shape[1]

1

In [52]:
layers = [1, 5, 1]
mlp = MLPNoBackprop(layers, random_weights(layers))

In [45]:
weights_list = [5, 5, 5, 5, 5]  
bias_list = [10, 5, 0, -5, -10]
weights_vec = np.array(weights_list).reshape(-1, 1)  # shape (5,1)
bias_vec = np.array(bias_list).reshape(-1,1)





input_dim = X_train.shape[1]

In [46]:
mlp.weights
input_dim

1

In [47]:
for weight in mlp.weights:
    print(weight.shape)

(1, 5)
(5, 1)


In [54]:
mlp.set_weights_and_biases(1, weights_vec, bias_vec)


In [49]:
for weight in mlp.weights:
    print(weight.shape)

(1, 5)
(5, 1)


In [50]:
mlp.weights

[array([[-1.37466853, -1.56884228,  1.50276989,  0.81709288,  1.70306961]]),
 array([[5],
        [5],
        [5],
        [5],
        [5]])]

In [55]:
y_test_pred = mlp.predict(X_test)
test_mse = mlp.mse(y_test, y_test_pred)

ValueError: operands could not be broadcast together with shapes (100,1) (5,1) 

In [None]:
train_data_path = "regression/steps-small-training.csv"
test_data_path = "regression/steps-small-test.csv"

In [None]:
def random_weights(layers):
    """
    layers is a list, e.g. [1, 5, 1].
    Returns a list of weight matrices. 
    For [1,5,1], you'll get two matrices:
      shape (1,5) for the first,
      shape (5,1) for the second.
    """
    np.random.seed(0)  # for reproducibility
    weight_list = []
    for i in range(len(layers) - 1):
        in_size  = layers[i]
        out_size = layers[i+1]
        # For instance, shape=(in_size, out_size)
        W = np.random.randn(in_size, out_size)
        weight_list.append(W)
    return weight_list


train_data_path = "regression/square-simple-training.csv"
test_data_path  = "regression/square-simple-test.csv"

train_data = pd.read_csv(train_data_path)
test_data  = pd.read_csv(test_data_path)

X_train, y_train = train_data.iloc[:, :-1].values, train_data.iloc[:, -1].values
X_test,  y_test  = test_data.iloc[:, :-1].values, test_data.iloc[:, -1].values

input_dim = X_train.shape[1]  # e.g. 1 if there's only one feature
layers = [input_dim, 5, 1]

initial_weights = random_weights(layers)

mlp = MLPNoBackprop(
    layers=layers, 
    weights=initial_weights, 
    hidden_activation_function='sigmoid',
    output_activation_function='linear'
)


weights_vec = np.array([[5, 5, 5, 5, 5]])   # shape (1,5)
bias_vec    = np.array([10, 5, 0, -5, -10]) # shape (5,)

mlp.set_weights_and_biases(layer_idx=0, W=weights_vec, b=bias_vec)

y_test_pred = mlp.predict(X_test)
test_mse    = mlp.mse(y_test, y_test_pred)
print("Test MSE:", test_mse)

In [None]:
def random_init(layers):
    """
    Creates a list of random weight matrices for the MLP
    consistent with the layer sizes in 'layers'.
    E.g. if layers = [1, 5, 1], that is:
       W0 shape = (1,5)
       W1 shape = (5,1)
    """
    weights = []
    for i in range(len(layers)-1):
        in_size  = layers[i]
        out_size = layers[i+1]
        # random from e.g. U(-1,1)
        W = np.random.uniform(-1, 1, size=(in_size, out_size))
        weights.append(W)
    return weights

In [None]:
BEST_MSE = float('inf')
BEST_INFO = None

N_TRIALS = 50000  # the bigger, the better chance to find a small MSE
TARGET_MSE = 9.0

for arch in variants:
    print("=== Architecture:", arch, "===")
    
    # We'll store the best solution we find for this architecture
    best_mse_for_arch = float('inf')
    best_weights_for_arch = None
    best_biases_for_arch  = None

    # Make a model shell for this architecture (weights pass in next)
    # By default, we can pass an empty "weights" list, then set them:
    dummy_weights = [np.zeros((arch[i], arch[i+1])) for i in range(len(arch)-1)]
    model = MLPNoBackprop(layers=arch,
                          weights=dummy_weights,
                          hidden_activation_function='sigmoid',
                          output_activation_function='linear')

    for trial in range(N_TRIALS):
        # 1) Make random weights
        rand_weights = random_init(arch)
        # 2) Assign them to the model
        for layer_idx in range(len(rand_weights)):
            W = rand_weights[layer_idx]
            # biases of shape (layers[i+1],)
            b = np.random.uniform(-1, 1, size=(arch[layer_idx+1],))
            model.set_weights_and_biases(layer_idx, W, b)
        
        # 3) Compute MSE on test data
        y_pred_test = model.predict(X_test)
        mse_test = model.mse(y_test, y_pred_test)
        
        # 4) Check if better
        if mse_test < best_mse_for_arch:
            best_mse_for_arch = mse_test
            # Save all weights/biases
            best_weights_for_arch = [w.copy() for w in model.weights]
            best_biases_for_arch  = [b.copy() for b in model.biases]
        
        # 5) Early stop if below threshold
        if best_mse_for_arch < TARGET_MSE:
            break

    print(f"Best MSE for arch={arch} is {best_mse_for_arch:.4f}")
    if best_mse_for_arch < BEST_MSE:
        BEST_MSE = best_mse_for_arch
        BEST_INFO = (arch, best_weights_for_arch, best_biases_for_arch)




if BEST_INFO is not None:
    best_arch, best_weights, best_biases = BEST_INFO
    print("\n==== OVERALL BEST ====")
    print("Architecture:", best_arch)
    print("MSE on test:", BEST_MSE)
else:
    print("No improvements found (try increasing N_TRIALS).")