In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error, r2_score
from pytorch_tabular import TabularModel
from pytorch_tabular.models import TabTransformerConfig
from pytorch_tabular.config import (
    DataConfig,
    OptimizerConfig,
    TrainerConfig,
    ExperimentConfig,
)
import torch

FILENAME = "../train.csv"
df = pd.read_csv(FILENAME)

X = df.drop("Year", axis=1)
y = df["Year"]

# Esegui lo split in set di addestramento, di test e di validazione
from sklearn.model_selection import train_test_split
train, test = train_test_split(df, random_state=42, test_size=0.2)
train, val = train_test_split(train, random_state=42, test_size=0.2)

# Configurazione del modello TabNet per la regressione
data_config = DataConfig(
    target=["Year"],
    continuous_cols=X.columns.tolist(),
)

optimizer_config = OptimizerConfig()  # Imposta manualmente il tasso di apprendimento
model_config = TabTransformerConfig(
    task="regression",
    metrics=['mean_squared_error', 'mean_absolute_error', 'mean_absolute_percentage_error', 'r2_score'],
)

trainer_config = TrainerConfig(
    batch_size=256,
    auto_lr_find=False,  # Disabilita la ricerca automatica del tasso di apprendimento
    max_epochs=100,
    early_stopping_patience=10,
    accelerator="cpu"
)

# Creazione e addestramento del modello TabNet
tabular_model = TabularModel(
    data_config=data_config,
    model_config=model_config,
    optimizer_config=optimizer_config,
    trainer_config=trainer_config,
)

# Aumenta il numero di workers nei DataLoader
tabular_model.fit(train=train, validation=val)

torch.save(tabular_model, open("tabTransf.save", 'wb'))

result = tabular_model.evaluate(test)
pred_df = tabular_model.predict(test)




Seed set to 42


GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
/Users/alessandrotocco/Library/Python/3.9/lib/python/site-packages/pytorch_lightning/trainer/setup.py:187: GPU available but not used. You can set it by doing `Trainer(accelerator='gpu')`.


/Users/alessandrotocco/Library/Python/3.9/lib/python/site-packages/pytorch_lightning/callbacks/model_checkpoint.py:639: Checkpoint directory /Users/alessandrotocco/Desktop/Universita/Data Analysis/DataAnalyticsProject/Training_Module/saved_models exists and is not empty.


Output()

Output()

/Users/alessandrotocco/Library/Python/3.9/lib/python/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:441: The 'test_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=7` in the `DataLoader` to improve performance.


: 