In [14]:
import torch
import torch.nn as nn
import numpy as np
from torch.optim import optimizer
from sklearn import datasets
import matplotlib.pyplot as plt
from tqdm import tqdm
from torch.nn.functional import mse_loss
from torchmetrics.functional import r2_score
import pandas as pd
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split
house_dataset = fetch_california_housing()

df = pd.DataFrame(
    house_dataset.data,
    columns=house_dataset.feature_names
)
df.loc[:,"Price"] = house_dataset.target

from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
df.loc[:,:] = scaler.fit_transform(df)

col_feature = house_dataset.feature_names
col_target = ["Price"]

y = torch.Tensor(df[col_target].to_numpy())
x = torch.Tensor(df[col_feature].to_numpy())

feature_train, feature_test, train_target, test_target = train_test_split(x, y, test_size=0.2)

n_samples, n_features = x.shape

# 1) model
class LinearRegression(nn.Module):
    
    def __init__(self, input_dim, output_dim):
        super(LinearRegression, self).__init__()
        
        # define layers
        self.linear =  nn.Sequential(
                nn.Linear(input_dim, 300),
                nn.Tanh(),
                nn.Linear(300, 128),
                nn.Tanh(),
                nn.Linear(128, output_dim),
            )
        
    def forward(self, x):
    
        return self.linear(x)

model = LinearRegression(n_features, 1)

# 2) loss and optimizer
learning_rate = 0.001
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)








In [15]:
feature_train

tensor([[-2.3791e-01, -9.2485e-01,  1.8690e+00,  ..., -8.8901e-02,
          1.5816e+00, -1.5874e+00],
        [ 6.3467e-01, -3.6864e-01,  1.0537e-02,  ..., -7.2468e-03,
         -8.4829e-01,  8.1344e-01],
        [ 3.9755e+00, -6.0702e-01,  6.9107e-01,  ..., -2.2362e-02,
         -7.3592e-01,  5.1396e-01],
        ...,
        [ 8.7843e-01, -1.1632e+00,  3.3450e-01,  ..., -2.7074e-02,
         -1.0168e+00,  1.1129e+00],
        [-4.3119e-01, -6.0702e-01, -4.0641e-02,  ..., -1.1610e-03,
          7.8568e-01, -4.3439e-01],
        [-6.7606e-01,  1.1411e+00, -3.2529e-01,  ...,  2.0181e-02,
          1.0947e+00, -1.3877e+00]])

In [19]:
# 3) training loop
epochs = 2000
for epoch in range(epochs):
    model.train()
    # forward pass and loss
    y_predicted = model(feature_train)
    loss = criterion(y_predicted, train_target)
    r2 = r2_score(y_predicted, train_target)
    # backward pass
    loss.backward()
    # update
    optimizer.step()
    
    # init optimizer
    optimizer.zero_grad()
    print(f'epoch: {epoch+1}, loss = {loss.item(): .4f}, R2 = {r2}')
    
    model.eval()
    y_predicted = model(feature_train)
    loss = criterion(y_predicted, train_target)
    r2 = r2_score(y_predicted, train_target)
    
    print(f'test: loss = {loss.item(): .4f}, R2 = {r2}')

epoch: 1, loss =  0.0756, R2 = 0.9243568181991577
test: loss =  0.0757, R2 = 0.9243043661117554
epoch: 2, loss =  0.0757, R2 = 0.9243043661117554
test: loss =  0.0757, R2 = 0.9242651462554932
epoch: 3, loss =  0.0757, R2 = 0.9242651462554932
test: loss =  0.0757, R2 = 0.9242469668388367
epoch: 4, loss =  0.0757, R2 = 0.9242469668388367
test: loss =  0.0757, R2 = 0.9242547154426575
epoch: 5, loss =  0.0757, R2 = 0.9242547154426575
test: loss =  0.0757, R2 = 0.924291729927063
epoch: 6, loss =  0.0757, R2 = 0.924291729927063
test: loss =  0.0756, R2 = 0.9243543744087219
epoch: 7, loss =  0.0756, R2 = 0.9243543744087219
test: loss =  0.0755, R2 = 0.9244295358657837
epoch: 8, loss =  0.0755, R2 = 0.9244295358657837
test: loss =  0.0755, R2 = 0.9244977831840515
epoch: 9, loss =  0.0755, R2 = 0.9244977831840515
test: loss =  0.0754, R2 = 0.9245430827140808
epoch: 10, loss =  0.0754, R2 = 0.9245430827140808
test: loss =  0.0754, R2 = 0.9245599508285522
epoch: 11, loss =  0.0754, R2 = 0.9245599

In [13]:
model.train()
out_loss, num, tot_loss = 0, 0, []
y_out, y_tar = torch.Tensor([]),torch.Tensor([])
data_loader = tqdm(data_loader)
for step, (x, y) in enumerate(data_loader):

    x, y = x.cuda(), y.cuda()
    losses = model(x, y)
    tot_loss.append(losses)
        
    pred = model.inference(x)
    y_out = torch.cat((y_out, pred.cpu()), 0)
    y_tar = torch.cat((y_tar, y.cpu()), 0)
    #cor += (pred.argmax(-1) == y).sum().item()
    out_loss += mse_loss(pred, y, reduction='sum')
    num += x.size(0)
    
    data_loader.set_description(f'Train {epoch} | out_loss {torch.sqrt(out_loss/num)}')

train_out = mse_loss(y_out, y_tar).item()
train_r2 = r2_score(y_out, y_tar).item()
train_loss = numpy.sum(tot_loss, axis=0)

print(f'Train Epoch{epoch} out_loss {train_out}, R2 {train_r2}')

NameError: name 'tqdm' is not defined

In [16]:
col_target = data.columns[:6]
col_feature1 = data.columns[6:33].to_list() # 27 cols
col_feature2 = data.columns[33:43].to_list() # 10 cols
col_feature3 = data.columns[43:103].to_list() # 60 cols
col_feature4 = data.columns[103:].to_list() # 28 cols
y = data[col_target]
x = data[col_feature1 + col_feature2 + col_feature3 + col_feature4]
x = x.fillna(0)
x.shape

(635, 125)

In [17]:
clean_train, clean_test, train_label, test_label = train_test_split(x, y, test_size=0.2)
train_label.to_numpy()[0]

array([ 92, 100,  78,  78,  84,  68])

In [18]:
print((y == 0).sum())
print((y.isna()).sum())

sensor_point5_i_value     0
sensor_point6_i_value     0
sensor_point7_i_value     0
sensor_point8_i_value     0
sensor_point9_i_value     0
sensor_point10_i_value    0
dtype: int64
sensor_point5_i_value     0
sensor_point6_i_value     0
sensor_point7_i_value     0
sensor_point8_i_value     0
sensor_point9_i_value     0
sensor_point10_i_value    0
dtype: int64


In [19]:

for col in [col_feature1,col_feature2,col_feature3,col_feature4]:
    print((x[col]==0).sum(axis=0).value_counts().sort_index(),"\n")
    


0      22
238     1
286     3
621     1
dtype: int64 

0    10
dtype: int64 

0      20
16      2
18      3
21      5
131     5
162    10
164     3
167     1
171     1
573    10
dtype: int64 

0      15
27      5
65      5
111     1
117     1
579     1
dtype: int64 



In [20]:
for col in [col_feature1,col_feature2,col_feature3,col_feature4]:
    print((x[col]==0).sum(axis=1).value_counts().sort_index(),"\n")

1    321
2     28
3     14
4     62
5    210
dtype: int64 

0    635
dtype: int64 

0      62
10    345
11      4
12      3
15     33
20     69
25      3
30    109
38      2
40      5
dtype: int64 

0     52
1    380
2     86
3     25
6      4
7     88
dtype: int64 



In [48]:
# LinearAL 

data = "paint"
model =  "linearal"
for layer in range(1,11):
#for layer in [3]:
    log = f"result/{data}_{model}_l{layer}.log"
    !python3 dis_train_al.py --dataset {data} --model {model} --epoch 500 --num-layer {layer} --task regression  > {log}

  x = torch.tensor([x for x,y in batch], dtype=torch.float32)
Train 0 | out_loss 79.63187408447266: 100%|███████| 8/8 [00:00<00:00, 20.38it/s]
Train 1 | out_loss 79.25166320800781: 100%|██████| 8/8 [00:00<00:00, 695.60it/s]
Train 2 | out_loss 78.55249786376953: 100%|██████| 8/8 [00:00<00:00, 633.23it/s]
Train 3 | out_loss 78.12552642822266: 100%|██████| 8/8 [00:00<00:00, 704.66it/s]
Train 4 | out_loss 77.69197082519531: 100%|██████| 8/8 [00:00<00:00, 641.07it/s]
Train 5 | out_loss 77.02802276611328: 100%|██████| 8/8 [00:00<00:00, 704.56it/s]
Train 6 | out_loss 76.23699951171875: 100%|██████| 8/8 [00:00<00:00, 699.82it/s]
Train 7 | out_loss 75.69731140136719: 100%|██████| 8/8 [00:00<00:00, 703.52it/s]
Train 8 | out_loss 75.17314147949219: 100%|██████| 8/8 [00:00<00:00, 696.66it/s]
Train 9 | out_loss 74.6423110961914: 100%|███████| 8/8 [00:00<00:00, 709.26it/s]
Train 10 | out_loss 73.97625732421875: 100%|█████| 8/8 [00:00<00:00, 704.51it/s]
Train 11 | out_loss 73.28217315673828: 100%|███

In [28]:
from mit_d3m import load_dataset
import pandas as pd
from sklearn.model_selection import train_test_split
args.task = "regression"
args.feature_dim = 40
df = load_dataset('LL0_296_ailerons')

col_feature = df.X.columns[1:]
#col_target= df.y.columns[:]

y = df.y
x = df.X[col_feature]
x = x.fillna(0)
feature_train, feature_test, train_target, test_target = train_test_split(x, y, test_size=0.2)

NameError: name 'args' is not defined

In [37]:
train_target.to_numpy().shape

(8800,)

In [30]:
# LinearAL ailerons

data = "ailerons"

model =  "linearal"
#for layer in range(1,11):
for layer in [3]:
    log = f"result/{data}_{model}_l{layer}.log"
    !python3 dis_train_al.py --dataset {data} --model {model} --epoch 300 --num-layer {layer} --lr 0.00001 --task regression # > {log}



Start Training
  x = torch.tensor([x for x,y in batch], dtype=torch.float32)
Train 0 | out_loss 0.09604600071907043: 100%|█| 138/138 [00:00<00:00, 174.00it/s
Train Epoch0 out_loss -54904.53125
Test Epoch0 layer0 out_loss 0.3226132094860077
Test Epoch0 layer1 out_loss 0.261636346578598
Test Epoch0 layer2 out_loss 0.0825749933719635
Train 1 | out_loss 0.0734870508313179: 100%|█| 138/138 [00:00<00:00, 332.80it/s]
Train Epoch1 out_loss -32141.44140625
Test Epoch1 layer0 out_loss 0.259891539812088
Test Epoch1 layer1 out_loss 0.191255584359169
Test Epoch1 layer2 out_loss 0.06840277463197708
Train 2 | out_loss 0.06319232285022736: 100%|█| 138/138 [00:00<00:00, 334.77it/s
Train Epoch2 out_loss -23766.646484375
Test Epoch2 layer0 out_loss 0.34146255254745483
Test Epoch2 layer1 out_loss 0.13012146949768066
Test Epoch2 layer2 out_loss 0.05562148615717888
Train 3 | out_loss 0.05656816065311432: 100%|█| 138/138 [00:00<00:00, 292.12it/s
Train Epoch3 out_loss -19044.921875
Test Epoch3 layer0 out_loss

Collecting torchmetrics
  Downloading torchmetrics-0.9.3-py3-none-any.whl (419 kB)
[K     |████████████████████████████████| 419 kB 1.7 MB/s eta 0:00:01
Installing collected packages: torchmetrics
Successfully installed torchmetrics-0.9.3


In [44]:
import statistics
print(statistics.mean(y))
print(statistics.stdev(y))

-0.0008725115898554677
0.0004098966061174112
