In [17]:
import sys,os
import nn_models
from nn_models import *

import pandas as pd
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from parse import *

import tensorboard


from sklearn.linear_model import RidgeCV

from sklearn.linear_model import LinearRegression
import torch
from torch import nn
import pytorch_lightning as pl
from sklearn.neighbors import KNeighborsRegressor
from sklearn.ensemble import RandomForestRegressor
import os


from sklearn.preprocessing import StandardScaler, PolynomialFeatures
from sklearn.decomposition import PCA


from datetime import datetime
import numpy as np
from sklearn.model_selection import train_test_split

from sklearn.metrics import r2_score
from sklearn.pipeline import Pipeline
from sklearn.ensemble import VotingRegressor

from sklearn.datasets import make_regression
from sklearn.preprocessing import PolynomialFeatures
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import MinMaxScaler

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload
/home/nicolo_b/Desktop/PhD/IFX/Notebooks/deepLearning/TRANSFER_LEARNING_JOURNAL_PAPER


In [18]:
SEED = 3
np.random.seed(SEED)
torch.manual_seed(SEED)
torch.cuda.manual_seed(SEED)
torch.cuda.manual_seed_all(SEED)
torch.backends.cudnn.deterministic = True
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device

device(type='cuda')

```markdown
`make_regression()` function from the `sklearn.datasets` module is used to create sample data

This code will generate a dataset with 1,000,000 samples and 27 features. The `effective_rank` parameter controls the linear dependency between the features, and the `n_informative` parameter determines the number of informative features. The `noise` parameter adds random noise to the data. The resulting dataset will be stored in the `X` variable.

Skip the cell if data are already generate
```


In [4]:
X, _  = make_regression(n_samples=1000000, n_features=27, effective_rank = 14, n_informative=27, noise=0.1, random_state=SEED)

# add non_linearity to mimic polynomial features interactions
ground_truth = Pipeline([
    ('scaler', StandardScaler()),
    ('poly', PolynomialFeatures(2)),
    ('pca', PCA(1,random_state=SEED)),
])

y = ground_truth.fit_transform(X).reshape(-1)
y = MinMaxScaler().fit_transform(y.reshape(-1,1)).reshape(-1)


np.save(f'X_{SEED}.npy', X)
np.save(f'y_{SEED}.npy', y)

```markdown
To load the numpy data, use the `np.load()` function:
```

In [26]:
X = np.load(f'X_{SEED}.npy')
y = np.load(f'y_{SEED}.npy')

```markdown
Train and test split
Add shift and noise to A2 to mimic data from different product
```

In [19]:

X_A1, X_A2, y_A1, y_A2 = train_test_split(X, y, test_size=0.005, random_state=SEED)

# add to A2 some noise, to mimic a shift in the distributions
y_A2 = y_A2 + np.random.normal(5,0.01,len(y_A2))


X_A1_train, X_A1_test, y_A1_train, y_A1_test = train_test_split(X_A1, y_A1, test_size=0.01, random_state=SEED)
X_A2_train, X_A2_test, y_A2_train, y_A2_test = train_test_split(X_A2, y_A2, test_size=0.95, random_state=SEED)



In [6]:
checkpoint_path = f'./checkpoints'
checkpoint_name_fe = 'SoftOrdering_PL_FE.ckpt'
checkpoint_name_fe_ae = 'SoftOrdering_AE_FE.ckpt'

```markdown
Define the parameters of the NNs
```

In [7]:
parameter_dict = {'input_dim':27,
        'sign_size':32,
        'cha_input':32,
        'cha_hidden':32,
        'K':2,
        'dropout_input':0.1,
        'dropout_hidden':0.1,
        'dropout_output':0.1}

parameter_dict_ae = {'input_dim':27,
        'sign_size':32,
        'cha_input':32,
        'cha_hidden':64,
        'K':4,
        'dropout_input':0.2,
        'dropout_hidden':0.2,
        'dropout_output':0.2}

```markdown
Train the NNs on data from product A1, with abdundance of data.
Skip if .ckpt was already generated
```

In [None]:
transformer = SoftOrderingTrasnformer(SoftOrdering1DCNN,parameter_dict,pre_trained_ckpt=None,
                                               device='cuda',allow_training=True,batch_size=256,
                                              val_ratio=0.15,random_state=42,callbacks = 
                 [EarlyStopping(monitor='valid_loss',min_delta=.00001,patience=30,verbose=True,mode='min')],
                 epochs=100)

reg = Pipeline([
        ('scaler',StandardScaler()),
        ('transformer',transformer),
        ('regressor',RidgeCV(alphas=np.logspace(-6,6,2000)))
        ])

reg.fit(X_A1_train,y_A1_train)
print()
transformer.freeze()
with torch.no_grad():
        print(f'Transformer score on A1: {reg.score(X_A1_test,y_A1_test)}')


```markdown
Save the .ckpt
```

In [23]:
i = SEED
transformer.save_ckpt(f'checkpoints/myckpt_{i}.ckpt')

```markdown
Test the NNs on product A2
```

In [20]:
checkpoint_name_fe = f'myckpt_{SEED}.ckpt'
#checkpoint_name_fe = 'SoftOrdering_PL_FE.ckpt'

transformer = SoftOrderingTrasnformer(SoftOrdering1DCNN,parameter_dict,pre_trained_ckpt=os.path.join(checkpoint_path,checkpoint_name_fe),
                                               device='cuda',allow_training=False)

transformer.transformer.load_state_dict(torch.load(os.path.join(checkpoint_path,checkpoint_name_fe)))

transformer.freeze()

reg = Pipeline([
        ('scaler',StandardScaler()),
        ('transformer',transformer),
        ('regressor',RidgeCV(alphas=np.logspace(-6,6,2000)))
        ])

reg.fit(X_A2_train,y_A2_train)
print()
print(f'Transformer score on A2: {reg.score(X_A2_test,y_A2_test)}')



Transformer score on A2: 0.9707414935039516


```markdown
Benckmarks plus columns transformer to select specific or random columns (features adaptation)
```

In [21]:
BEST_COLUMNS = np.arange(27)
COLUMNS = np.arange(27)
checkpoint_name_fe = f'myckpt_{SEED}.ckpt'
transformer = SoftOrderingTrasnformer(SoftOrdering1DCNN,parameter_dict,pre_trained_ckpt=os.path.join(checkpoint_path,checkpoint_name_fe),
                                               device='cuda',allow_training=False)
regressors_dict = {
    'Baseline': Pipeline([
        ('scaler',StandardScaler()),
        ('PCA',PCA(14)),
        ('transformer',PolynomialFeatures(2)),
        ('regressor',RidgeCV(alphas=np.logspace(-6,6,2000)))
        ]),
    'Dummy Voting CNN': VotingRegressor(
        [(f'Dummy CNN {i}',Pipeline([
            ('scaler',StandardScaler()),
            ('selector',DummySelector(columns=COLUMNS,features_len=27)),
            ('transformer',SoftOrderingTrasnformer(SoftOrdering1DCNN,parameter_dict,pre_trained_ckpt=os.path.join(checkpoint_path,checkpoint_name_fe),
                                               device='cuda',allow_training=False)),                
            ('regressor',RidgeCV(alphas=np.logspace(-6,6,2000)))])) for i in range(10)],n_jobs=1),
    
    'SoftOrderingCNN' : Pipeline([
    ('scaler',StandardScaler()),
    ('transformer',SoftOrderingTrasnformer(SoftOrdering1DCNN,parameter_dict,pre_trained_ckpt=os.path.join(checkpoint_path,checkpoint_name_fe),
                                               device='cuda',allow_training=False)),
    ('regressor',RidgeCV(alphas=np.logspace(-6,6,2000)))
    ]),
    
    'SoftOrderingCNN_AE' : Pipeline([
    ('scaler',StandardScaler()),
    ('transformer',SoftOrderingTrasnformer(SoftOrdering1DCNN_AutoEncoder,parameter_dict_ae,pre_trained_ckpt=os.path.join(checkpoint_path,checkpoint_name_fe_ae),
                                            device='cuda',allow_training=False)),
    ('regressor',RidgeCV(alphas=np.logspace(-6,6,2000)))
    ]),         
    
    'KNN-5': Pipeline([
        ('scaler',StandardScaler()),
        ('regressor',KNeighborsRegressor(n_neighbors=5,n_jobs=-1))
        ]),
    'RandomForest':
        Pipeline([
        ('scaler',StandardScaler()),
        ('regressor',RandomForestRegressor(n_estimators=100,n_jobs=-1))
        ]),
    'Linear Ridge': Pipeline([
        ('scaler',StandardScaler()),
        ('regressor',RidgeCV(alphas=np.logspace(-6,6,2000)))
        ]),

    'Linear Reg': Pipeline([
        ('scaler',StandardScaler()),
        ('regressor',LinearRegression())
        ])

    }

In [22]:
from collections import defaultdict
mean_scores = defaultdict(list)
for name,regr in regressors_dict.items():
    for i in range(5):
        X_train, X_test, y_train, y_test = train_test_split(X_A2,y_A2,test_size=0.2,random_state=i)
        regr.fit(X_train,y_train)
        y_prd = regr.predict(X_test)
        mean_scores[name].append(r2_score(y_test,y_prd))
        print(f'{name} {mean_scores[name][-1]}')
    print(f'{name} mean {np.mean(mean_scores[name])}')

Baseline 0.9685075582008883
Baseline 0.9718647309529165
Baseline 0.9665193066074514
Baseline 0.9714035896436429
Baseline 0.9692541417175766
Baseline mean 0.9695098654244951
Dummy Voting CNN 0.6374953573666304
Dummy Voting CNN 0.7326906211827724
Dummy Voting CNN 0.7047901202508885
Dummy Voting CNN 0.6729858702793075
Dummy Voting CNN 0.681729400373116
Dummy Voting CNN mean 0.685938273890543
SoftOrderingCNN 0.9796672415695569
SoftOrderingCNN 0.9801398325300962
SoftOrderingCNN 0.9767516075970913
SoftOrderingCNN 0.9800413281351411
SoftOrderingCNN 0.9795576181812651
SoftOrderingCNN mean 0.9792315256026303
SoftOrderingCNN_AE 0.36164159354709013
SoftOrderingCNN_AE 0.3673916959180702
SoftOrderingCNN_AE 0.3608142493327975
SoftOrderingCNN_AE 0.31972331648860663
SoftOrderingCNN_AE 0.3145578139308025
SoftOrderingCNN_AE mean 0.34482573384347337
KNN-5 0.3520814490146841
KNN-5 0.3138142857951569
KNN-5 0.29511320778828787
KNN-5 0.2931626680743069
KNN-5 0.33854878616750417
KNN-5 mean 0.318544079367988
R

In [27]:
for name,regr in regressors_dict.items():
    print(f'{name} R2: mean {np.mean(mean_scores[name]):.4f} +- {np.std(mean_scores[name]):.4f} std')

Baseline R2: mean 0.9695 +- 0.0020 std
Dummy Voting CNN R2: mean 0.6859 +- 0.0319 std
SoftOrderingCNN R2: mean 0.9792 +- 0.0013 std
SoftOrderingCNN_AE R2: mean 0.3448 +- 0.0228 std
KNN-5 R2: mean 0.3185 +- 0.0234 std
RandomForest R2: mean 0.6543 +- 0.0222 std
Linear Ridge R2: mean -0.0008 +- 0.0008 std
Linear Reg R2: mean -0.0125 +- 0.0056 std


```markdown
CNNs based models offer a minor improvement over polynomial groundtruth baseline.
The performance gain with respect to shallow-learning algorithm is not neglibile
```