In [1]:
import copy
import torch
import random
import pandas as pd

from itertools import product
from sklearn.metrics import r2_score
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split

random_state = 42
preproc = True

In [2]:
def fix_random(seed):
    torch.manual_seed(seed)
    random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.benchmark = False
    torch.backends.cudnn.deterministic = True

fix_random(random_state)

## Device

In [3]:
# PyTorch Device
device = "cuda" if torch.cuda.is_available() else "mps" if torch.backends.mps.is_available() else "cpu"
print("Device: {}".format(device))

Device: cpu


## Data Loading


In [4]:
df = pd.read_csv("train.csv")
num_rows, num_cols = df.shape
print("Rows: ", num_rows)
print("Columns: ", num_cols)

Rows:  252175
Columns:  91


In [5]:
print("Null rows:", df.shape[0] - df.dropna().shape[0])
print("Duplicated rows:", df.duplicated().sum())
df.drop_duplicates(inplace=True)

Null rows: 0
Duplicated rows: 52


### Train Test Split

In [6]:
# Separate indices in train/val/set
# "stratify=y" makes sure to keep the classes proportions on the dataset (useful on imbalanced classes)
train, test = train_test_split(df, stratify=df["Year"], test_size=0.3, random_state=random_state)
val, test = train_test_split(test, stratify=test["Year"], test_size=(1 / 3), random_state=random_state)

X_train = train.drop(columns=["Year"])
y_train = train["Year"]

X_val = val.drop(columns=["Year"])
y_val = val["Year"]

X_test = test.drop(columns=["Year"])
y_test = test["Year"]

### Preprocessing 

In [7]:
from sklearn import preprocessing
from sklearn.covariance import OAS
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.pipeline import Pipeline

pipeline = Pipeline(
    steps=[
        ("min-max", preprocessing.MinMaxScaler()),
        ("lmax", preprocessing.Normalizer(norm="max")),
        ("lda", LinearDiscriminantAnalysis(solver="eigen", shrinkage=None, covariance_estimator=OAS())),
    ]
)

if preproc == True:
    # Fit the pipeline to the data
    pipeline.fit(X_train, y_train)

    # Transform the data using the pipeline
    X_train = pipeline.transform(X_train)
    X_test = pipeline.transform(X_test)
    X_val = pipeline.transform(X_val)

    X_train = pd.DataFrame(X_train)
    X_test = pd.DataFrame(X_test)
    X_val = pd.DataFrame(X_val)

    train = pd.DataFrame(X_train)
    test = pd.DataFrame(X_test)
    val = pd.DataFrame(X_val)

    y_train.reset_index(drop=True, inplace=True)
    y_test.reset_index(drop=True, inplace=True)
    y_val.reset_index(drop=True, inplace=True)

    train["Year"] = y_train
    test["Year"] = y_test
    val['Year'] = y_val


## Config


In [10]:
target = ["Year"]
continous_cols = list(train.columns)[:-1]

In [11]:
from pytorch_tabular import TabularModel
from pytorch_tabular.models import TabNetModelConfig
from pytorch_tabular.config import DataConfig, OptimizerConfig, TrainerConfig, ExperimentConfig
from pytorch_tabular.models.common.heads import LinearHeadConfig

In [12]:
data_config = DataConfig(target=target, continuous_cols=continous_cols, num_workers=0)

optimizer_config = OptimizerConfig(lr_scheduler="ReduceLROnPlateau", lr_scheduler_params={"patience": 10, "threshold": 1, "threshold_mode": "abs"},)

head_config = LinearHeadConfig(
    layers="",  # No additional layer in head, just a mapping layer to output_dim
    initialization="kaiming",
).__dict__  # Convert to dict to pass to the model config

if preproc == False:
    experiment_config = ExperimentConfig(
        project_name="TabNet",
        run_name="TabNet-raw",
        log_target="tensorboard",
    )
else:
    experiment_config = ExperimentConfig(
        project_name="TabNet",
        run_name="TabNet-preproc",
        log_target="tensorboard",
    )

## Train

In [13]:
learning_rates = [0.005]
batch_sizes = [256, 512]
virtual_batch_sizes = [128]
n_epochs = [200]
# Dimension of the prediction  layer
n_d = [32, 64]
# Dimension of the attention  layer
n_a = [16, 32]
# Number of successive steps in the network
n_steps = [3]
# Number of independent GLU layer in each GLU block
n_independent = [2, 3]
# Number of shared GLU layer in each GLU block
n_shared = [2, 3]
# Coefficient for feature reusage in the masks. A value close to 1 will make mask selection least correlated between layers. Values range from 1.0 to 2.0.
gamma = [1.5]


params = list(
    product(
        learning_rates,
        batch_sizes,
        virtual_batch_sizes,
        n_epochs,
        n_d,
        n_a,
        n_steps,
        n_independent,
        n_shared,
        gamma,
    )
)


comb = (
    len(learning_rates)
    * len(batch_sizes)
    * len(virtual_batch_sizes)
    * len(n_epochs)
    * len(n_d)
    * len(n_a)
    * len(n_steps)
    * len(n_independent)
    * len(n_shared)
    * len(gamma)
)


print("Number of combinations: ", comb)

Number of combinations:  32


### TabNet

In [14]:
best_mse = float("inf")
best_model = None
best_params = None
iter = 0
results = pd.DataFrame(
    columns=["loss", "r2", 'learning_rate', 'batch_size', 'virtual_batch_size', 'n_epochs', 'n_d', 'n_a', 'n_steps', 'n_independent', 'n_shared', 'gamma']
)


for learning_rate, batch_size, virtual_batch_size, n_epochs, n_d, n_a, n_steps, n_independent, n_shared, gamma in params:
    iter += 1
    print(f"\nIteration: {iter} of {comb}")
    trainer_config = TrainerConfig(
        batch_size=batch_size,
        max_epochs=n_epochs,
        early_stopping_patience=10,
    )

    model_config = TabNetModelConfig(
        task="regression",
        head="LinearHead",  # Linear Head
        head_config=head_config,  # Linear Head Config
        learning_rate=learning_rate,
        virtual_batch_size=virtual_batch_size,
        n_d=n_d,
        n_a=n_a,
        n_steps=n_steps,
        n_independent=n_independent,
        n_shared=n_shared,
        gamma=gamma
    )

    tabular_model = TabularModel(
        data_config=data_config,
        model_config=model_config,
        optimizer_config=optimizer_config,
        trainer_config=trainer_config,
        experiment_config=experiment_config,
        verbose=False
    )

    tabular_model.fit(train=train, validation=val)
    tabular_model.evaluate(test)

    y_pred = tabular_model.predict(X_test)
    mse = mean_squared_error(y_test, y_pred)
    r2 = r2_score(y_test, y_pred)

    print("MSE: ", mse)
    print("R2: ", r2)

    if mse < best_mse:
        best_mse = mse
        best_model = copy.deepcopy(tabular_model)
        best_params = (learning_rate, batch_size, virtual_batch_size, n_epochs, n_d, n_a, n_steps, n_independent, n_shared, gamma)
        print("Best model updated")

    config = [mse, r2, learning_rate, batch_size, virtual_batch_size, n_epochs, n_d, n_a, n_steps, n_independent, n_shared, gamma]
    results.loc[len(results)] = config


Iteration: 1 of 32


Seed set to 42
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
C:\Users\Win10\AppData\Roaming\Python\Python310\site-packages\pytorch_lightning\callbacks\model_checkpoint.py:639: Checkpoint directory C:\Users\Win10\Desktop\tabnet\saved_models exists and is not empty.


Output()

Output()

C:\Users\Win10\AppData\Roaming\Python\Python310\site-packages\pytorch_lightning\trainer\connectors\data_connector.py:441: The 'test_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=3` in the `DataLoader` to improve performance.


MSE:  79.45419650233988
R2:  0.27816985300849095
Best model updated

Iteration: 2 of 32


Seed set to 42
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
C:\Users\Win10\AppData\Roaming\Python\Python310\site-packages\pytorch_lightning\callbacks\model_checkpoint.py:639: Checkpoint directory C:\Users\Win10\Desktop\tabnet\saved_models exists and is not empty.


Output()

Output()

C:\Users\Win10\AppData\Roaming\Python\Python310\site-packages\pytorch_lightning\trainer\connectors\data_connector.py:441: The 'test_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=3` in the `DataLoader` to improve performance.


Seed set to 42


MSE:  71.63860990710069
R2:  0.3491733527506379
Best model updated

Iteration: 3 of 32


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
C:\Users\Win10\AppData\Roaming\Python\Python310\site-packages\pytorch_lightning\callbacks\model_checkpoint.py:639: Checkpoint directory C:\Users\Win10\Desktop\tabnet\saved_models exists and is not empty.


Output()

Output()

C:\Users\Win10\AppData\Roaming\Python\Python310\site-packages\pytorch_lightning\trainer\connectors\data_connector.py:441: The 'test_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=3` in the `DataLoader` to improve performance.


Seed set to 42


MSE:  77.4729651929494
R2:  0.2961690594725497

Iteration: 4 of 32


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
C:\Users\Win10\AppData\Roaming\Python\Python310\site-packages\pytorch_lightning\callbacks\model_checkpoint.py:639: Checkpoint directory C:\Users\Win10\Desktop\tabnet\saved_models exists and is not empty.


Output()

Output()

C:\Users\Win10\AppData\Roaming\Python\Python310\site-packages\pytorch_lightning\trainer\connectors\data_connector.py:441: The 'test_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=3` in the `DataLoader` to improve performance.


Seed set to 42


MSE:  85.82181337461297
R2:  0.2203209535766889

Iteration: 5 of 32


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
C:\Users\Win10\AppData\Roaming\Python\Python310\site-packages\pytorch_lightning\callbacks\model_checkpoint.py:639: Checkpoint directory C:\Users\Win10\Desktop\tabnet\saved_models exists and is not empty.


Output()

Output()

C:\Users\Win10\AppData\Roaming\Python\Python310\site-packages\pytorch_lightning\trainer\connectors\data_connector.py:441: The 'test_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=3` in the `DataLoader` to improve performance.


Seed set to 42


MSE:  80.44395395042542
R2:  0.2691780464622314

Iteration: 6 of 32


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
C:\Users\Win10\AppData\Roaming\Python\Python310\site-packages\pytorch_lightning\callbacks\model_checkpoint.py:639: Checkpoint directory C:\Users\Win10\Desktop\tabnet\saved_models exists and is not empty.


Output()

Output()

C:\Users\Win10\AppData\Roaming\Python\Python310\site-packages\pytorch_lightning\trainer\connectors\data_connector.py:441: The 'test_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=3` in the `DataLoader` to improve performance.


Seed set to 42


MSE:  78.35034142000617
R2:  0.2881982204379545

Iteration: 7 of 32


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
C:\Users\Win10\AppData\Roaming\Python\Python310\site-packages\pytorch_lightning\callbacks\model_checkpoint.py:639: Checkpoint directory C:\Users\Win10\Desktop\tabnet\saved_models exists and is not empty.


Output()

Output()

C:\Users\Win10\AppData\Roaming\Python\Python310\site-packages\pytorch_lightning\trainer\connectors\data_connector.py:441: The 'test_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=3` in the `DataLoader` to improve performance.


Seed set to 42


MSE:  84.30996237180683
R2:  0.2340559062868668

Iteration: 8 of 32


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
C:\Users\Win10\AppData\Roaming\Python\Python310\site-packages\pytorch_lightning\callbacks\model_checkpoint.py:639: Checkpoint directory C:\Users\Win10\Desktop\tabnet\saved_models exists and is not empty.


Output()

Output()

C:\Users\Win10\AppData\Roaming\Python\Python310\site-packages\pytorch_lightning\trainer\connectors\data_connector.py:441: The 'test_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=3` in the `DataLoader` to improve performance.


Seed set to 42


MSE:  80.30978316647663
R2:  0.27039696907378763

Iteration: 9 of 32


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
C:\Users\Win10\AppData\Roaming\Python\Python310\site-packages\pytorch_lightning\callbacks\model_checkpoint.py:639: Checkpoint directory C:\Users\Win10\Desktop\tabnet\saved_models exists and is not empty.


Output()

Output()

C:\Users\Win10\AppData\Roaming\Python\Python310\site-packages\pytorch_lightning\trainer\connectors\data_connector.py:441: The 'test_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=3` in the `DataLoader` to improve performance.


Seed set to 42


MSE:  70.75628701794761
R2:  0.35718913151133025
Best model updated

Iteration: 10 of 32


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
C:\Users\Win10\AppData\Roaming\Python\Python310\site-packages\pytorch_lightning\callbacks\model_checkpoint.py:639: Checkpoint directory C:\Users\Win10\Desktop\tabnet\saved_models exists and is not empty.


Output()

Output()

Seed set to 42


MSE:  79.48861224406575
R2:  0.2778571908584423

Iteration: 11 of 32


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
C:\Users\Win10\AppData\Roaming\Python\Python310\site-packages\pytorch_lightning\callbacks\model_checkpoint.py:639: Checkpoint directory C:\Users\Win10\Desktop\tabnet\saved_models exists and is not empty.


Output()

Output()

C:\Users\Win10\AppData\Roaming\Python\Python310\site-packages\pytorch_lightning\trainer\connectors\data_connector.py:441: The 'test_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=3` in the `DataLoader` to improve performance.


Seed set to 42


MSE:  79.31557970151452
R2:  0.2794291670550727

Iteration: 12 of 32


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
C:\Users\Win10\AppData\Roaming\Python\Python310\site-packages\pytorch_lightning\callbacks\model_checkpoint.py:639: Checkpoint directory C:\Users\Win10\Desktop\tabnet\saved_models exists and is not empty.


Output()

Output()

C:\Users\Win10\AppData\Roaming\Python\Python310\site-packages\pytorch_lightning\trainer\connectors\data_connector.py:441: The 'test_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=3` in the `DataLoader` to improve performance.


Seed set to 42


MSE:  78.2319793629831
R2:  0.2892735229994331

Iteration: 13 of 32


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
C:\Users\Win10\AppData\Roaming\Python\Python310\site-packages\pytorch_lightning\callbacks\model_checkpoint.py:639: Checkpoint directory C:\Users\Win10\Desktop\tabnet\saved_models exists and is not empty.


Output()

Output()

C:\Users\Win10\AppData\Roaming\Python\Python310\site-packages\pytorch_lightning\trainer\connectors\data_connector.py:441: The 'test_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=3` in the `DataLoader` to improve performance.


Seed set to 42


MSE:  79.82769596752871
R2:  0.2747766631490911

Iteration: 14 of 32


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
C:\Users\Win10\AppData\Roaming\Python\Python310\site-packages\pytorch_lightning\callbacks\model_checkpoint.py:639: Checkpoint directory C:\Users\Win10\Desktop\tabnet\saved_models exists and is not empty.


Output()

Output()

C:\Users\Win10\AppData\Roaming\Python\Python310\site-packages\pytorch_lightning\trainer\connectors\data_connector.py:441: The 'test_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=3` in the `DataLoader` to improve performance.


Seed set to 42


MSE:  85.41507160487424
R2:  0.22401614507522039

Iteration: 15 of 32


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
C:\Users\Win10\AppData\Roaming\Python\Python310\site-packages\pytorch_lightning\callbacks\model_checkpoint.py:639: Checkpoint directory C:\Users\Win10\Desktop\tabnet\saved_models exists and is not empty.


Output()

Output()

C:\Users\Win10\AppData\Roaming\Python\Python310\site-packages\pytorch_lightning\trainer\connectors\data_connector.py:441: The 'test_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=3` in the `DataLoader` to improve performance.


Seed set to 42


MSE:  83.0598783929539
R2:  0.24541274257657186

Iteration: 16 of 32


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
C:\Users\Win10\AppData\Roaming\Python\Python310\site-packages\pytorch_lightning\callbacks\model_checkpoint.py:639: Checkpoint directory C:\Users\Win10\Desktop\tabnet\saved_models exists and is not empty.


Output()

Output()

Seed set to 42


MSE:  78.562105668962
R2:  0.28627437216208873

Iteration: 17 of 32


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
C:\Users\Win10\AppData\Roaming\Python\Python310\site-packages\pytorch_lightning\callbacks\model_checkpoint.py:639: Checkpoint directory C:\Users\Win10\Desktop\tabnet\saved_models exists and is not empty.


Output()

Output()

Seed set to 42


MSE:  82.27163430214654
R2:  0.2525738287492313

Iteration: 18 of 32


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
C:\Users\Win10\AppData\Roaming\Python\Python310\site-packages\pytorch_lightning\callbacks\model_checkpoint.py:639: Checkpoint directory C:\Users\Win10\Desktop\tabnet\saved_models exists and is not empty.


Output()

Output()

C:\Users\Win10\AppData\Roaming\Python\Python310\site-packages\pytorch_lightning\trainer\connectors\data_connector.py:441: The 'test_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=3` in the `DataLoader` to improve performance.


Seed set to 42


MSE:  84.33713255986723
R2:  0.23380906896794873

Iteration: 19 of 32


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
C:\Users\Win10\AppData\Roaming\Python\Python310\site-packages\pytorch_lightning\callbacks\model_checkpoint.py:639: Checkpoint directory C:\Users\Win10\Desktop\tabnet\saved_models exists and is not empty.


Output()

Output()

C:\Users\Win10\AppData\Roaming\Python\Python310\site-packages\pytorch_lightning\trainer\connectors\data_connector.py:441: The 'test_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=3` in the `DataLoader` to improve performance.


Seed set to 42


MSE:  81.10716097272939
R2:  0.2631529043870564

Iteration: 20 of 32


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
C:\Users\Win10\AppData\Roaming\Python\Python310\site-packages\pytorch_lightning\callbacks\model_checkpoint.py:639: Checkpoint directory C:\Users\Win10\Desktop\tabnet\saved_models exists and is not empty.


Output()

Output()

C:\Users\Win10\AppData\Roaming\Python\Python310\site-packages\pytorch_lightning\trainer\connectors\data_connector.py:441: The 'test_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=3` in the `DataLoader` to improve performance.


Seed set to 42


MSE:  80.28204994489033
R2:  0.2706489214727221

Iteration: 21 of 32


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
C:\Users\Win10\AppData\Roaming\Python\Python310\site-packages\pytorch_lightning\callbacks\model_checkpoint.py:639: Checkpoint directory C:\Users\Win10\Desktop\tabnet\saved_models exists and is not empty.


Output()

Output()

C:\Users\Win10\AppData\Roaming\Python\Python310\site-packages\pytorch_lightning\trainer\connectors\data_connector.py:441: The 'test_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=3` in the `DataLoader` to improve performance.


Seed set to 42


MSE:  85.21819588517694
R2:  0.2258047331667754

Iteration: 22 of 32


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
C:\Users\Win10\AppData\Roaming\Python\Python310\site-packages\pytorch_lightning\callbacks\model_checkpoint.py:639: Checkpoint directory C:\Users\Win10\Desktop\tabnet\saved_models exists and is not empty.


Output()

Output()

C:\Users\Win10\AppData\Roaming\Python\Python310\site-packages\pytorch_lightning\trainer\connectors\data_connector.py:441: The 'test_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=3` in the `DataLoader` to improve performance.


Seed set to 42


MSE:  80.08848408948367
R2:  0.2724074399149371

Iteration: 23 of 32


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
C:\Users\Win10\AppData\Roaming\Python\Python310\site-packages\pytorch_lightning\callbacks\model_checkpoint.py:639: Checkpoint directory C:\Users\Win10\Desktop\tabnet\saved_models exists and is not empty.


Output()

Output()

Seed set to 42


MSE:  85.0555995796998
R2:  0.22728189762440587

Iteration: 24 of 32


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
C:\Users\Win10\AppData\Roaming\Python\Python310\site-packages\pytorch_lightning\callbacks\model_checkpoint.py:639: Checkpoint directory C:\Users\Win10\Desktop\tabnet\saved_models exists and is not empty.


Output()

Output()

Seed set to 42


MSE:  81.0514034976424
R2:  0.2636594531687402

Iteration: 25 of 32


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
C:\Users\Win10\AppData\Roaming\Python\Python310\site-packages\pytorch_lightning\callbacks\model_checkpoint.py:639: Checkpoint directory C:\Users\Win10\Desktop\tabnet\saved_models exists and is not empty.


Output()

Output()

C:\Users\Win10\AppData\Roaming\Python\Python310\site-packages\pytorch_lightning\trainer\connectors\data_connector.py:441: The 'test_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=3` in the `DataLoader` to improve performance.


Seed set to 42


MSE:  88.39475138100093
R2:  0.1969461753886499

Iteration: 26 of 32


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
C:\Users\Win10\AppData\Roaming\Python\Python310\site-packages\pytorch_lightning\callbacks\model_checkpoint.py:639: Checkpoint directory C:\Users\Win10\Desktop\tabnet\saved_models exists and is not empty.


Output()

Output()

C:\Users\Win10\AppData\Roaming\Python\Python310\site-packages\pytorch_lightning\trainer\connectors\data_connector.py:441: The 'test_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=3` in the `DataLoader` to improve performance.


Seed set to 42


MSE:  84.83199526829969
R2:  0.22931331119437537

Iteration: 27 of 32


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
C:\Users\Win10\AppData\Roaming\Python\Python310\site-packages\pytorch_lightning\callbacks\model_checkpoint.py:639: Checkpoint directory C:\Users\Win10\Desktop\tabnet\saved_models exists and is not empty.


Output()

Output()

C:\Users\Win10\AppData\Roaming\Python\Python310\site-packages\pytorch_lightning\trainer\connectors\data_connector.py:441: The 'test_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=3` in the `DataLoader` to improve performance.


Seed set to 42


MSE:  74.84982127203816
R2:  0.3199999512990813

Iteration: 28 of 32


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
C:\Users\Win10\AppData\Roaming\Python\Python310\site-packages\pytorch_lightning\callbacks\model_checkpoint.py:639: Checkpoint directory C:\Users\Win10\Desktop\tabnet\saved_models exists and is not empty.


Output()

Output()

C:\Users\Win10\AppData\Roaming\Python\Python310\site-packages\pytorch_lightning\trainer\connectors\data_connector.py:441: The 'test_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=3` in the `DataLoader` to improve performance.


Seed set to 42


MSE:  85.85043833211311
R2:  0.2200608998829945

Iteration: 29 of 32


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
C:\Users\Win10\AppData\Roaming\Python\Python310\site-packages\pytorch_lightning\callbacks\model_checkpoint.py:639: Checkpoint directory C:\Users\Win10\Desktop\tabnet\saved_models exists and is not empty.


Output()

Output()

C:\Users\Win10\AppData\Roaming\Python\Python310\site-packages\pytorch_lightning\trainer\connectors\data_connector.py:441: The 'test_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=3` in the `DataLoader` to improve performance.


Seed set to 42


MSE:  79.88516187903838
R2:  0.27425459346393866

Iteration: 30 of 32


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
C:\Users\Win10\AppData\Roaming\Python\Python310\site-packages\pytorch_lightning\callbacks\model_checkpoint.py:639: Checkpoint directory C:\Users\Win10\Desktop\tabnet\saved_models exists and is not empty.


Output()

Output()

C:\Users\Win10\AppData\Roaming\Python\Python310\site-packages\pytorch_lightning\trainer\connectors\data_connector.py:441: The 'test_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=3` in the `DataLoader` to improve performance.


Seed set to 42


MSE:  78.34970638802918
R2:  0.28820398961372473

Iteration: 31 of 32


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
C:\Users\Win10\AppData\Roaming\Python\Python310\site-packages\pytorch_lightning\callbacks\model_checkpoint.py:639: Checkpoint directory C:\Users\Win10\Desktop\tabnet\saved_models exists and is not empty.


Output()

Output()

C:\Users\Win10\AppData\Roaming\Python\Python310\site-packages\pytorch_lightning\trainer\connectors\data_connector.py:441: The 'test_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=3` in the `DataLoader` to improve performance.


Seed set to 42


MSE:  79.05779701077184
R2:  0.28177108637138193

Iteration: 32 of 32


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
C:\Users\Win10\AppData\Roaming\Python\Python310\site-packages\pytorch_lightning\callbacks\model_checkpoint.py:639: Checkpoint directory C:\Users\Win10\Desktop\tabnet\saved_models exists and is not empty.


Output()

Output()

C:\Users\Win10\AppData\Roaming\Python\Python310\site-packages\pytorch_lightning\trainer\connectors\data_connector.py:441: The 'test_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=3` in the `DataLoader` to improve performance.


MSE:  81.11822930435045
R2:  0.263052350158423


In [15]:
results.sort_values(by="r2", ascending=False).head()

Unnamed: 0,loss,r2,learning_rate,batch_size,virtual_batch_size,n_epochs,n_d,n_a,n_steps,n_independent,n_shared,gamma
8,70.756287,0.357189,0.005,256.0,128.0,200.0,64.0,16.0,3.0,2.0,2.0,1.5
1,71.63861,0.349173,0.005,256.0,128.0,200.0,32.0,16.0,3.0,2.0,3.0,1.5
26,74.849821,0.32,0.005,512.0,128.0,200.0,64.0,16.0,3.0,3.0,2.0,1.5
2,77.472965,0.296169,0.005,256.0,128.0,200.0,32.0,16.0,3.0,3.0,2.0,1.5
11,78.231979,0.289274,0.005,256.0,128.0,200.0,64.0,16.0,3.0,3.0,3.0,1.5


In [16]:
if preproc == True:
    results.sort_values(by="r2", ascending=False).to_csv("tabnet-preproc.csv")
else:
    results.sort_values(by="r2", ascending=False).to_csv("tabnet-raw.csv")

In [22]:
import pickle

file = open("3_" + "TB" + "_preproc_" + ".save", "wb")
pickle.dump(pipeline, file)

file = open("3_" + "TB" + "_model_" + ".save", "wb")
pickle.dump(best_model, file)
file.close()