In [19]:
import numpy as np
import os
import pandas as pd
from DataSim2 import DataSim
from Lin_regression import LinRegression

In [86]:
ds = DataSim(
    n_samples=10000, latent_dim=20, 
    high_dim=100, std_A=5, 
    random_seed=2111, non_linear_ratio=0.5, 
    cross_ratio=0, sparsity=0.5, 
    s2nr=1
    )

In [87]:
x_dict = {
    'latent_x': ds.latent_x,
    'nl_latent_x': ds.latent_x[:, :10],
    'lin_latent_x': ds.latent_x[:, 10:],
    'hd__x': ds.hd_x,
    'nl_hd_x': ds.hd_x[:, :50],
    'lin_hd_x': ds.hd_x[:, 50:],    
    'transformed_hd_x': ds.non_linear_data_noisy,
    'nl_transformed_hd_x': ds.non_linear_data_noisy[:, :50],
    'lin_transformed_hd_x': ds.non_linear_data_noisy[:, 50:]
}

y = ds.y

lr = LinRegression(x_dict, y, True)

scores = lr.get_scores()

print(pd.DataFrame(scores).T)

                             mse       rmse        mae        r2  adjusted_r2
latent_x                0.978047   0.988963   0.787408  0.998153     0.998149
nl_latent_x           311.158337  17.639681  14.091954  0.412335     0.411688
lin_latent_x          223.475478  14.949096  11.947853  0.577936     0.577471
hd__x                   0.978047   0.988963   0.787408  0.998153     0.998134
nl_hd_x               311.158337  17.639681  14.091954  0.412335     0.409323
lin_hd_x              223.475478  14.949096  11.947853  0.577936     0.575772
transformed_hd_x      529.436843  23.009495  18.314035  0.000087    -0.010117
nl_transformed_hd_x   529.436843  23.009495  18.314035  0.000087    -0.005040
lin_transformed_hd_x  281.795663  16.786770  13.344451  0.467791     0.465062


In [79]:
norm_nl = np.linalg.norm(ds.beta[:10])
norm_lin = np.linalg.norm(ds.beta[10:])
print(f'Norm of non-linear part: {norm_nl}')
print(f'Norm of linear part: {norm_lin}')

Norm of non-linear part: 14.992706267886545
Norm of linear part: 17.58935845030641


In [89]:
data = np.genfromtxt('data4/39_sim_10000_1000_50_1_0_0.5_0.1/set_1/data.csv', delimiter=',', skip_header=1)
x = data[:, :-1]
y = data[:, -1]

beta_hat = np.linalg.solve(x.T @ x, x.T @ y)
y_hat = x @ beta_hat

r2 = 1 - np.sum((y - y_hat) ** 2) / np.sum((y - np.mean(y)) ** 2)

print(f"R^2: {r2}")

R^2: 0.5902745382456951
