In [1]:
import numpy as np
import matplotlib.pyplot as plt
from module import generate_data, calculate_error, non_linear_target_function, linear_regression, pocket_pla, transform_data

## Non Linear Regression
Do 1000 executions of:
1. Choose a target function.
2. Generate 1000 points data from the distribution $(X \in [1, -1] \times [1, -1])$ and classify it with the target function.
3. Randomly select 10% of the points and invert their labels
4. Run the Linear Regression algorithm and the Linear Regression algorithm with transformation
5. Evaluate on a set of 1000 test points.

#### Run Linear Regression without any transformation, using the vector of attributes(1, x2, x2) to find the weight w. What is the approximate classification value of the average within-sample error Ein?

In [3]:

# Experimento
def experiment(num_runs, num_points):
    ein_list = []
    for _ in range(num_runs):
        nlg_tg = non_linear_target_function()
        X, y = generate_data(num_points, nlg_tg, noise_ratio=0.1)
        w = linear_regression(X, y) 
        ein = calculate_error(X, y, w)
        ein_list.append(ein)

    return np.mean(ein_list), np.std(ein_list)

# Parâmetros
num_runs = 1000
num_points = 1000

# Executar o experimento
mean_ein, std_ein = experiment(num_runs, num_points)
print(f"Ein mean: {mean_ein}")
print(f"Ein std: {std_ein}")


Ein mean: 0.50624
Ein std: 0.04278037400491025


#### Now, transform the N = 1000 training data following the non-linear attribute vector $(1, x_1, x_2, x_1x_2, x_1^2, x_2^2)$. Find the vector we that corresponds to the solution of the Linear Regression. Which of the following hypotheses is closest to the one you found? Evaluate the average result obtained after 1000 runs.

In [5]:


# Experimento
def experiment(num_runs, num_points):
    weights_list = []

    for _ in range(num_runs):
        nlg_tg = non_linear_target_function()
        X, y = generate_data(num_points, nlg_tg, noise_ratio=0.1)
        X_transformed = transform_data(X)
        w = linear_regression(X_transformed, y)
        weights_list.append(w)

    weights_mean = np.mean(weights_list, axis=0)
    return weights_mean

# Parâmetros
num_runs = 1000
num_points = 1000

# Executar o experimento
weights_mean = experiment(num_runs, num_points)
print(f"Pesos médios após 1000 execuções: {weights_mean}")

# Hipóteses fornecidas
hypotheses = {
    "a": np.array([-1, -0.05, 0.08, 0.13, 1.5, 1.5]),
    "b": np.array([-1, -0.05, 0.08, 0.13, 1.5, 15]),
    "c": np.array([-1, -0.05, 0.08, 0.13, 15, 1.5]),
    "d": np.array([-1, -1.5, 0.08, 0.13, 0.05, 0.05]),
    "e": np.array([-1, -0.05, 0.08, 1.5, 0.15, 0.15]),
}

# Comparar pesos médios encontrados com as hipóteses fornecidas
for key, hypothesis in hypotheses.items():
    distance = np.linalg.norm(weights_mean - hypothesis)
    print(f"Distância da hipótese {key}: {distance}")

# Encontre a hipótese mais próxima
closest_hypothesis = min(hypotheses, key=lambda k: np.linalg.norm(weights_mean - hypotheses[k]))
print(f"Hipótese mais próxima: {closest_hypothesis}")

Pesos médios após 1000 execuções: [-4.96454501e-01 -4.96454501e-01 -8.84360381e-05 -6.98526674e-04
  2.06003453e-03  1.55825422e+00  1.55536331e+00]


ValueError: operands could not be broadcast together with shapes (7,) (6,) 

#### Qual o valor mais próximo do erro de classificação fora-de-amostra Eout de sua hipótese na questão anterior? (Estime-o gerando um novo conjunto de 1000 pontos e usando 1000 execuções diferentes, como antes).

In [12]:
def experiment(num_runs, num_points_train, num_points_test):
    eout_list = []

    for _ in range(num_runs):
        # Dados de treinamento
        X_train, y_train = generate_data(num_points_train, noise_ratio=0.1)
        X_train_transformed = transform_data(X_train)
        w = linear_regression(X_train_transformed, y_train)
        
        # Dados de teste
        X_test, y_test = generate_data(num_points_test, noise_ratio=0.0)
        X_test_transformed = transform_data(X_test)
        eout = calculate_error(X_test_transformed, y_test, w)
        
        eout_list.append(eout)

    return np.mean(eout_list), np.std(eout_list)

In [14]:
import numpy as np


# Experimento
def experiment(num_runs, num_points_train, num_points_test):
    eout_list = []

    for _ in range(num_runs):
        # Dados de treinamento
        X_train, y_train = generate_data(num_points_train)
        X_train_transformed = transform_data(X_train)
        w = linear_regression(X_train_transformed, y_train)
        
        # Dados de teste
        X_test, y_test = generate_data(num_points_test, noise_ratio=0.0)
        X_test_transformed = transform_data(X_test)
        eout = calculate_eout(X_test_transformed, y_test, w)
        
        eout_list.append(eout)

    return np.mean(eout_list), np.std(eout_list)

# Parâmetros
num_runs = 1000
num_points_train = 1000
num_points_test = 1000

# Executar o experimento
mean_eout, std_eout = experiment(num_runs, num_points_train, num_points_test)
print(f"Média de E_out: {mean_eout}")
print(f"Desvio padrão de E_out: {std_eout}")

Média de E_out: 0.032398
Desvio padrão de E_out: 0.011196320645640693
