In [16]:
import torch
import arff
import pandas as pd

In [2]:
import tabicl

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
from tabicl import train, TabICL, TabICLClassifier

  from .autonotebook import tqdm as notebook_tqdm


In [None]:
from tabicl.model.tabicl import TabICL

model = TabICL()

In [19]:
def cnt_params(model):
    return sum(param.numel() for param in model.parameters())


print(cnt_params(model))

# 27 051 666

27051666


In [9]:
for name, module in model.named_modules():
    params = sum(p.numel() for p in module.parameters() if p.requires_grad)
    if params > 0:
        print(f"Module: {name} | Params: {params}")

Module:  | Params: 27051658
Module: col_embedder | Params: 877824
Module: col_embedder.in_linear | Params: 256
Module: col_embedder.tf_col | Params: 844032
Module: col_embedder.tf_col.blocks | Params: 844032
Module: col_embedder.tf_col.blocks.0 | Params: 281344
Module: col_embedder.tf_col.blocks.0.multihead_attn1 | Params: 132480
Module: col_embedder.tf_col.blocks.0.multihead_attn1.linear1 | Params: 33024
Module: col_embedder.tf_col.blocks.0.multihead_attn1.linear2 | Params: 32896
Module: col_embedder.tf_col.blocks.0.multihead_attn1.norm1 | Params: 256
Module: col_embedder.tf_col.blocks.0.multihead_attn1.norm2 | Params: 256
Module: col_embedder.tf_col.blocks.0.multihead_attn1.attn | Params: 66048
Module: col_embedder.tf_col.blocks.0.multihead_attn1.attn.out_proj | Params: 16512
Module: col_embedder.tf_col.blocks.0.multihead_attn2 | Params: 132480
Module: col_embedder.tf_col.blocks.0.multihead_attn2.linear1 | Params: 33024
Module: col_embedder.tf_col.blocks.0.multihead_attn2.linear2 | P

In [20]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
checkpoint = torch.load(
    "/home/D32485/exercice/tabicl-classifier-v1.1-0506.ckpt", map_location="cpu"
)
model.load_state_dict(checkpoint["state_dict"])
model.to(device)

  checkpoint = torch.load(


TabICL(
  (col_embedder): ColEmbedding(
    (in_linear): SkippableLinear(in_features=1, out_features=128, bias=True)
    (tf_col): SetTransformer(
      (blocks): ModuleList(
        (0-2): 3 x InducedSelfAttentionBlock(
          (multihead_attn1): MultiheadAttentionBlock(
            (linear1): Linear(in_features=128, out_features=256, bias=True)
            (dropout): Dropout(p=0.0, inplace=False)
            (linear2): Linear(in_features=256, out_features=128, bias=True)
            (norm1): LayerNorm((128,), eps=1e-05, elementwise_affine=True)
            (norm2): LayerNorm((128,), eps=1e-05, elementwise_affine=True)
            (dropout1): Dropout(p=0.0, inplace=False)
            (dropout2): Dropout(p=0.0, inplace=False)
            (attn): MultiheadAttention(
              (out_proj): NonDynamicallyQuantizableLinear(in_features=128, out_features=128, bias=True)
            )
          )
          (multihead_attn2): MultiheadAttentionBlock(
            (linear1): Linear(in_featu

In [None]:
checkpoint = torch.load(
    "/home/D32485/exercice/tabicl-classifier-v1.1-0506.ckpt", map_location="cpu"
)
print(checkpoint.keys())

dict_keys(['config', 'state_dict'])


  checkpoint = torch.load("/home/D32485/exercice/tabicl-classifier-v1.1-0506.ckpt", map_location="cpu")


In [21]:
import torch.nn.init as init
import torch.optim as optim
from torch.optim.lr_scheduler import ReduceLROnPlateau

In [None]:
def train(model, micro_X, y, train_len, micro_d, device, learning_rate=5e-5, epochs=40):
    # Move model to the specified device
    model.to(device)

    # Define the loss function (CrossEntropy includes softmax)
    criterion = torch.nn.CrossEntropyLoss()

    # Define the optimizer (only updating the last layer)
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)

    # Learning rate scheduler (Reduce when loss stops improving)
    scheduler = ReduceLROnPlateau(
        optimizer, mode="min", factor=0.5, patience=5, verbose=True
    )

    # Lists to store loss and accuracy per epoch
    train_losses = []
    train_accuracies = []

    # Training loop
    for epoch in range(epochs):
        model.train()  # Set the model to training mode

        optimizer.zero_grad()  # Reset accumulated gradients

        # Forward pass (only the last layer)
        # outputs = model(data, labels)
        # print("Labels dim : ", labels.shape)
        # print("Outputs dim : ", outputs.shape)
        # loss = criterion(outputs, labels)  # Compute the loss
        y_train = y[:, :train_len]
        y_test = y[:, train_len:]
        pred = model(micro_X, y_train, micro_d)  # (B, test_size, max_classes)
        pred = pred.flatten(end_dim=-2)
        true = y_test.long().flatten()
        print("Pred shape : ", pred.shape)
        print("True shape: ", true.shape)
        loss = criterion(pred, true)

        loss.backward()  # Compute gradients
        optimizer.step()  # Update weights
        scheduler.step(loss)  # Step the scheduler based on validation loss

        # Compute accuracy
        _, predicted = pred.max(1)
        # Compare predicted with labels to check how many predictions are correct
        accuracy = (predicted == y_test).sum().item() / y_test.size(0)

        # Store metrics
        train_losses.append(loss.item())
        train_accuracies.append(accuracy)

        # Print epoch results
        print(
            f"Epoch {epoch + 1}/{epochs}, Loss: {loss.item():.4f}, lr: {optimizer.param_groups[0]['lr']:.6f}, Accuracy: {accuracy:.6f}"
        )

    return train_losses, train_accuracies

In [2]:
with open("data.arff") as f:
    dataset = arff.load(f)

print(dataset["attributes"])  # list of attribute names and types
print(dataset["data"])  # list of rows

[('V2', ['bohemian', 'Brief', 'Casual', 'cute', 'fashion', 'Flare', 'Novelty', 'OL', 'party', 'sexy', 'Sexy', 'vintage', 'work']), ('V3', ['Average', 'high', 'High', 'low', 'Low', 'Medium', 'very-high']), ('V4', 'NUMERIC'), ('V5', ['free', 'L', 'M', 's', 'S', 'small', 'XL']), ('V6', ['Automn', 'Autumn', 'spring', 'Spring', 'summer', 'Summer', 'winter', 'Winter']), ('V7', ['backless', 'boat-neck', 'bowneck', 'halter', 'mandarin-collor', 'o-neck', 'open', 'peterpan-collor', 'ruffled', 'Scoop', 'slash-neck', 'sqare-collor', 'sweetheart', 'Sweetheart', 'turndowncollor', 'v-neck']), ('V8', ['butterfly', 'capsleeves', 'cap-sleeves', 'full', 'half', 'halfsleeve', 'Petal', 'short', 'sleeevless', 'sleeveless', 'sleevless', 'sleveless', 'threequarter', 'threequater', 'thressqatar', 'turndowncollor', 'urndowncollor']), ('V9', ['dropped', 'empire', 'natural', 'princess']), ('V10', ['acrylic', 'cashmere', 'chiffonfabric', 'cotton', 'knitting', 'lace', 'linen', 'lycra', 'microfiber', 'milksilk', 'mi

In [3]:
with open("data.arff") as f:
    dataset = arff.load(f)

df = pd.DataFrame(dataset["data"], columns=[attr[0] for attr in dataset["attributes"]])

print(df.head())

        V2       V3   V4 V5      V6      V7         V8       V9  \
0     Sexy      Low  4.6  M  Summer  o-neck  sleevless   empire   
1   Casual      Low  0.0  L  Summer  o-neck      Petal  natural   
2  vintage     High  0.0  L  Automn  o-neck       full  natural   
3    Brief  Average  4.6  L  Spring  o-neck       full  natural   
4     cute      Low  4.5  M  Summer  o-neck  butterfly  natural   

             V10      V11         V12     V13 Class  
0           None  chiffon     ruffles  animal     2  
1     microfiber     None     ruffles  animal     1  
2       polyster     None        None   print     1  
3           silk  chiffon  embroidary   print     2  
4  chiffonfabric  chiffon         bow     dot     1  


In [4]:
print(f"Number of rows: {df.shape[0]}")
print(f"Number of columns: {df.shape[1]}")

Number of rows: 500
Number of columns: 13


In [5]:
from sklearn.model_selection import train_test_split

# Separate features and target
X = df.drop(columns=["Class"])
y = df["Class"]

# First split: Train (80%) and Temp (20% for val + test)
X_train, X_temp, y_train, y_temp = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

# Second split: Val (10%) and Test (10%)
X_val, X_test, y_val, y_test = train_test_split(
    X_temp, y_temp, test_size=0.5, random_state=42, stratify=y_temp
)

# Check the sizes
print("Train:", X_train.shape, y_train.shape)
print("Val:  ", X_val.shape, y_val.shape)
print("Test: ", X_test.shape, y_test.shape)

print(y.unique())

Train: (400, 12) (400,)
Val:   (50, 12) (50,)
Test:  (50, 12) (50,)
['2' '1']


In [14]:
import torch
from torch.utils.data import TensorDataset, DataLoader
from sklearn.preprocessing import OneHotEncoder, LabelEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline

# 1. Identify categorical and numerical columns
cat_cols = X_train.select_dtypes(include=["object", "category"]).columns.tolist()
num_cols = X_train.select_dtypes(include=["number"]).columns.tolist()

# 2. Create a preprocessing pipeline for train/val/test

# OneHotEncode categorical features, passthrough numerical features
preprocessor = ColumnTransformer(
    transformers=[
        ("cat", OneHotEncoder(handle_unknown="ignore", sparse_output=False), cat_cols),
        ("num", "passthrough", num_cols),
    ]
)

# Fit on train, transform train/val/test
X_train_processed = preprocessor.fit_transform(X_train)
X_val_processed = preprocessor.transform(X_val)
X_test_processed = preprocessor.transform(X_test)

# Encode labels

le = LabelEncoder()
y_train_encoded = le.fit_transform(y_train)
y_val_encoded = le.transform(y_val)
y_test_encoded = le.transform(y_test)

# 3. Convert to torch tensors
X_train_tensor = torch.tensor(X_train_processed, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train_encoded, dtype=torch.long)

# Shape becomes (B=1, T = train_rows + val_rows, H = features)
X_train_tensor = X_train_tensor.unsqueeze(0)  # Add batch dim

# 6. y_train only for the first part (train set)
y_train_tensor = torch.tensor(y_train_tensor, dtype=torch.long).unsqueeze(0)

X_val_tensor = torch.tensor(X_val_processed, dtype=torch.float32)
y_val_tensor = torch.tensor(y_val_encoded, dtype=torch.long)

X_test_tensor = torch.tensor(X_test_processed, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test_encoded, dtype=torch.long)

# Create TensorDatasets and DataLoaders
train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
val_dataset = TensorDataset(X_val_tensor, y_val_tensor)
test_dataset = TensorDataset(X_test_tensor, y_test_tensor)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)


  y_train_tensor = torch.tensor(y_train_tensor, dtype=torch.long).unsqueeze(0)


In [6]:
import torch
from torch.utils.data import TensorDataset, DataLoader
from sklearn.preprocessing import OneHotEncoder, LabelEncoder
from sklearn.compose import ColumnTransformer
import numpy as np

# 1. Identify categorical and numerical columns
cat_cols = X_train.select_dtypes(include=["object", "category"]).columns.tolist()
num_cols = X_train.select_dtypes(include=["number"]).columns.tolist()

# 2. Preprocessor: OneHotEncode categorical, passthrough numerical
preprocessor = ColumnTransformer(
    transformers=[
        ("cat", OneHotEncoder(handle_unknown="ignore", sparse_output=False), cat_cols),
        ("num", "passthrough", num_cols),
    ]
)

# 3. Fit on train, transform train/val/test
X_train_processed = preprocessor.fit_transform(X_train)
X_val_processed = preprocessor.transform(X_val)
X_test_processed = preprocessor.transform(X_test)

# 4. Encode labels (only once across train/val/test)
le = LabelEncoder()
y_train_encoded = le.fit_transform(y_train)
y_val_encoded = le.transform(y_val)
y_test_encoded = le.transform(y_test)

y_val_tensor = torch.tensor(y_val_encoded, dtype=torch.long)

# 5. Concatenate X_train and X_val (for in-context format)
X_trainval_processed = torch.tensor(
    np.vstack([X_train_processed, X_val_processed]), dtype=torch.float32
)

# Shape becomes (B=1, T = train_rows + val_rows, H = features)
X_trainval_tensor = X_trainval_processed.unsqueeze(0)  # Add batch dim

# 6. y_train only for the first part (train set)
y_train_tensor = torch.tensor(y_train_encoded, dtype=torch.long).unsqueeze(0)

# 7. (Optional) Prepare test set separately (B=1, T_test, H)
X_test_tensor = torch.tensor(X_test_processed, dtype=torch.float32).unsqueeze(0)
y_test_tensor = torch.tensor(y_test_encoded, dtype=torch.long).unsqueeze(0)

# 8. Datasets & loaders (note: for your model you likely feed B=1 full table at once)
train_dataset = TensorDataset(X_trainval_tensor, y_train_tensor)
train_loader = DataLoader(train_dataset, batch_size=1, shuffle=False)

test_dataset = TensorDataset(X_test_tensor, y_test_tensor)
test_loader = DataLoader(test_dataset, batch_size=1, shuffle=False)


In [22]:
import sklearn
from sklearn.base import BaseEstimator, ClassifierMixin
from sklearn.utils.multiclass import check_classification_targets
from sklearn.utils.validation import check_is_fitted
from sklearn.preprocessing import LabelEncoder

from huggingface_hub import hf_hub_download
from huggingface_hub.utils import LocalEntryNotFoundError

from tabicl.sklearn.preprocessing import TransformToNumerical, EnsembleGenerator
from tabicl import InferenceConfig

In [23]:
from typing import Optional, List, Dict

In [24]:
y_encoder_ = LabelEncoder()
y = y_encoder_.fit_transform(y)
classes_ = y_encoder_.classes_
n_classes_ = len(y_encoder_.classes_)


#  Transform input features
X_encoder_ = TransformToNumerical(verbose=False)
X = X_encoder_.fit_transform(X)

n_estimators: int = (32,)
norm_methods: Optional[str | List[str]] = (None,)
feat_shuffle_method: str = ("latin",)
class_shift: bool = (True,)
outlier_threshold: float = (4.0,)
softmax_temperature: float = (0.9,)
average_logits: bool = (True,)
use_hierarchical: bool = True
random_state: int | None = (42,)

seed = random_state if isinstance(random_state, (int, type(None))) else None

# Fit ensemble generator to create multiple dataset views
ensemble_generator_ = EnsembleGenerator(
    n_estimators=32,
    norm_methods=["none", "power"],
    feat_shuffle_method="latin",
    class_shift=class_shift,
    outlier_threshold=outlier_threshold,
    random_state=seed,
)
ensemble_generator_.fit(X, y)


In [25]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=seed, stratify=y
)

In [44]:
import torch
from torch.utils.data import TensorDataset, DataLoader

X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.long)

X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test, dtype=torch.long)

In [None]:
X_trainval_tensor.shape

torch.Size([1, 450, 158])

In [43]:
y_val_tensor.shape[0]

50

In [11]:
from tabicl.train.run import Trainer
from tabicl.train.train_config import build_parser

In [12]:
import sys

sys.argv = [""]

parser = build_parser()
# config = parser.parse_args([])  # empty list means: use only defaults
config = parser.parse_args(
    [
        "--checkpoint_path",
        "/home/D32485/exercice/tabicl-classifier-v1.1-0506.ckpt",
        "--only_load_model",
        "True",
    ]
)

trainer = Trainer(config)

No DDP training
Model has 27051658 parameters.
PriorDataset(
  prior_type: mix_scm
  batch_size: 512
  batch_size_per_gp: 4
  features: 5 - 100
  max classes: 10
  seq_len: None - 1024
  sequence length varies across groups: False
  train_size: 0.1 - 0.9
  device: cpu
)
Automatic Mixed Precision is enabled.
Loading checkpoint from /home/D32485/exercice/tabicl-classifier-v1.1-0506.ckpt
Only loading model weights


In [31]:
X_train_tensor.shape

torch.Size([400, 12])

In [27]:
batch_size, T, H = X_trainval_tensor.shape


NameError: name 'X_trainval_tensor' is not defined

In [45]:
X_train_tensor = X_train_tensor.unsqueeze(0)
y_train_tensor = y_train_tensor.unsqueeze(0)

In [22]:
X_train_tensor.shape

torch.Size([1, 400, 12])

In [35]:
y_train_tensor.shape

torch.Size([1, 400])

In [46]:
batch_size, T, H = X_train_tensor.shape
d = torch.full((batch_size,), H, dtype=torch.long)
seq_len = T
train_size = 300

print("Batch size: ", batch_size)
print("seq_len : ", seq_len)
print("train_size : ", train_size)

seq_len_tensor = torch.full((batch_size,), seq_len, dtype=torch.long)  # or float
train_size_tensor = torch.full((batch_size,), train_size, dtype=torch.long)

batch = X_train_tensor, y_train_tensor, d, seq_len_tensor, train_size_tensor
results = trainer.run_batch(batch)

Batch size:  1
seq_len :  400
train_size :  300
Micro_y shape :  torch.Size([1, 400])
seq_len:  400
Pred shape :  torch.Size([100, 10])
True shape:  torch.Size([100])


In [47]:
def train(model, micro_X, y, train_len, micro_d, device, learning_rate=5e-5, epochs=40):
    # Move model to the specified device
    model.to(device)

    # Define the loss function (CrossEntropy includes softmax)
    criterion = torch.nn.CrossEntropyLoss()

    # Define the optimizer (only updating the last layer)
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)

    # Learning rate scheduler (Reduce when loss stops improving)
    scheduler = ReduceLROnPlateau(
        optimizer, mode="min", factor=0.5, patience=5, verbose=True
    )

    # Lists to store loss and accuracy per epoch
    train_losses = []
    train_accuracies = []

    # Training loop
    for epoch in range(epochs):
        model.train()  # Set the model to training mode

        optimizer.zero_grad()  # Reset accumulated gradients

        # Forward pass (only the last layer)
        # outputs = model(data, labels)
        # print("Labels dim : ", labels.shape)
        # print("Outputs dim : ", outputs.shape)
        # loss = criterion(outputs, labels)  # Compute the loss
        y_train = y[:, :train_len]
        y_test = y[:, train_len:]
        micro_X = micro_X.to(device)
        y_train = y_train.to(device)
        micro_d = micro_d.to(device)
        y_test = y_test.to(device)
        pred = model(micro_X, y_train, micro_d)  # (B, test_size, max_classes)
        pred = pred.flatten(end_dim=-2)
        true = y_test.long().flatten()
        print("Pred shape : ", pred.shape)
        print("True shape: ", true.shape)
        loss = criterion(pred, true)

        loss.backward()  # Compute gradients
        optimizer.step()  # Update weights
        scheduler.step(loss)  # Step the scheduler based on validation loss

        # Compute accuracy
        _, predicted = pred.max(1)
        # Compare predicted with labels to check how many predictions are correct
        accuracy = (predicted == y_test).sum().item() / y_test.size(0)

        # Store metrics
        train_losses.append(loss.item())
        train_accuracies.append(accuracy)

        # Print epoch results
        print(
            f"Epoch {epoch + 1}/{epochs}, Loss: {loss.item():.4f}, lr: {optimizer.param_groups[0]['lr']:.6f}, Accuracy: {accuracy:.6f}"
        )

    return train_losses, train_accuracies

In [48]:
train_losses, train_accuracies = train(
    model=model,
    micro_X=X_train_tensor,
    y=y_train_tensor,
    train_len=300,
    micro_d=d,
    device=device,
    learning_rate=5e-5,
    epochs=30,
)



Pred shape :  torch.Size([100, 10])
True shape:  torch.Size([100])
Epoch 1/30, Loss: 0.0049, lr: 0.000050, Accuracy: 100.000000
Pred shape :  torch.Size([100, 10])
True shape:  torch.Size([100])
Epoch 2/30, Loss: 0.6282, lr: 0.000050, Accuracy: 89.000000
Pred shape :  torch.Size([100, 10])
True shape:  torch.Size([100])
Epoch 3/30, Loss: 0.3926, lr: 0.000050, Accuracy: 86.000000
Pred shape :  torch.Size([100, 10])
True shape:  torch.Size([100])
Epoch 4/30, Loss: 0.1414, lr: 0.000050, Accuracy: 94.000000
Pred shape :  torch.Size([100, 10])
True shape:  torch.Size([100])
Epoch 5/30, Loss: 0.1352, lr: 0.000050, Accuracy: 96.000000
Pred shape :  torch.Size([100, 10])
True shape:  torch.Size([100])
Epoch 6/30, Loss: 0.1566, lr: 0.000050, Accuracy: 96.000000
Pred shape :  torch.Size([100, 10])
True shape:  torch.Size([100])
Epoch 7/30, Loss: 0.1064, lr: 0.000025, Accuracy: 97.000000
Pred shape :  torch.Size([100, 10])
True shape:  torch.Size([100])
Epoch 8/30, Loss: 0.0822, lr: 0.000025, Acc

In [60]:
def inference(model, micro_X, y, train_len, micro_d, device):
    model.to(device)
    model.eval()  # set evaluation mode

    # Split the labels like in training
    y_train = y[:, :train_len].to(device)
    y_test = y[:, train_len:].to(device)

    micro_X = micro_X.to(device)
    micro_d = micro_d.to(device)

    with torch.no_grad():
        # Forward pass
        pred_logits = model(
            micro_X, y_train, micro_d
        )  # shape: (B, test_size, max_classes)

        # Flatten to (B * test_size, max_classes)
        pred_logits = pred_logits.flatten(end_dim=-2)

        # Get predicted class indices
        predicted_classes = pred_logits.argmax(dim=1)

    return predicted_classes, y_test.flatten()

In [61]:
X_test_tensor.shape

torch.Size([1, 100, 12])

In [62]:
pred_classes, true_labels = inference(
    model, X_test_tensor, y_test_tensor, 50, d, device
)

# Compute accuracy
accuracy = (pred_classes == true_labels).float().mean().item()
print(f"Inference accuracy: {accuracy:.4f}")

Inference accuracy: 0.5600


In [49]:
model.eval()

TabICL(
  (col_embedder): ColEmbedding(
    (in_linear): SkippableLinear(in_features=1, out_features=128, bias=True)
    (tf_col): SetTransformer(
      (blocks): ModuleList(
        (0-2): 3 x InducedSelfAttentionBlock(
          (multihead_attn1): MultiheadAttentionBlock(
            (linear1): Linear(in_features=128, out_features=256, bias=True)
            (dropout): Dropout(p=0.0, inplace=False)
            (linear2): Linear(in_features=256, out_features=128, bias=True)
            (norm1): LayerNorm((128,), eps=1e-05, elementwise_affine=True)
            (norm2): LayerNorm((128,), eps=1e-05, elementwise_affine=True)
            (dropout1): Dropout(p=0.0, inplace=False)
            (dropout2): Dropout(p=0.0, inplace=False)
            (attn): MultiheadAttention(
              (out_proj): NonDynamicallyQuantizableLinear(in_features=128, out_features=128, bias=True)
            )
          )
          (multihead_attn2): MultiheadAttentionBlock(
            (linear1): Linear(in_featu

In [50]:
X_test_tensor.shape
X_test_tensor = X_test_tensor.unsqueeze(0)
y_test_tensor = y_test_tensor.unsqueeze(0)

In [43]:
X_val_tensor = X_val_tensor.unsqueeze(0)
y_val_tensor = y_val_tensor.unsqueeze(0)
X_val_tensor.shape

torch.Size([1, 1, 1, 400, 12])

In [52]:
from tabicl.sklearn.classifier import TabICLClassifier

In [56]:
from tabicl import TabICLClassifier

In [57]:
clf = TabICLClassifier()

In [58]:
clf.model_ = model

In [59]:
clf.predict(X_test_tensor)

AttributeError: 'TabICLClassifier' object has no attribute 'X_encoder_'

In [51]:
pred = model(X_test_tensor)

TypeError: TabICL.forward() missing 1 required positional argument: 'y_train'

In [None]:
model()

In [24]:
print(results)

{'ce': 0.010134246200323105, 'accuracy': 0.009999999776482582}


In [15]:
X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.long)
batch_size = 64

T, H = X_train_tensor.shape
d = torch.full((len(X_train_tensor),), H, dtype=torch.long)
seq_len_tensor = torch.full((len(X_train_tensor),), T, dtype=torch.long)
train_size_tensor = torch.full(
    (len(X_train_tensor),), 300, dtype=torch.long
)  # fixed here

print(X_train_tensor.shape)
print(y_train_tensor.shape)

# Create dataset
train_dataset = torch.utils.data.TensorDataset(
    X_train_tensor, y_train_tensor, d, seq_len_tensor, train_size_tensor
)

# Create DataLoader
train_loader = torch.utils.data.DataLoader(
    train_dataset, batch_size=batch_size, shuffle=True
)

# Assign to trainer and call train()
trainer.dataloader = train_loader
trainer.train()

torch.Size([400, 12])
torch.Size([400])


Step:   0%|          | 0/60000 [00:00<?, ?it/s]

Micro_y shape :  torch.Size([8])
seq_len:  400





IndexError: too many indices for tensor of dimension 1

In [22]:
X_trainval_tensor.shape

torch.Size([1, 450, 158])

In [14]:
X_trainval_tensor = X_trainval_tensor.to(device)

In [15]:
y_val_tensor = y_val_tensor.to(device)

In [30]:
y_val_tensor.shape

torch.Size([50])

In [19]:
X_train_tensor.shape

torch.Size([400, 158])

In [13]:
X_train_tensor = X_train_tensor.unsqueeze(0)

In [27]:
X_train_tensor.shape

torch.Size([1, 400, 158])

In [14]:
y_train_tensor.shape
y_train_tensor = y_train_tensor.unsqueeze(0)

In [29]:
y_train_tensor.shape

torch.Size([1, 400])

In [15]:
X_train_tensor = X_train_tensor.to(device)

In [32]:
y_train_tensor = y_train_tensor.to(device)

In [None]:
def train_last_layer(
    model, train_dataloader, device, learning_rate=5e-5, epochs=30, batch_size=16
):
    model.to(device)
    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
    criterion = torch.nn.CrossEntropyLoss()

    for epoch in range(epochs):
        model.train()
        total_loss, correct = 0, 0

        for data, label in train_dataloader:
            optimizer.zero_grad()
            outputs = model(data, label)  # model's forward

            loss = criterion(outputs.squeeze(0), label.squeeze(0))
            loss.backward()
            optimizer.step()

            total_loss += loss.item()
            correct += (outputs.argmax(dim=-1) == batch_labels).sum().item()

        print(
            f"Epoch {epoch + 1} | Loss: {total_loss:.4f} | Acc: {correct / n_samples:.4f}"
        )


In [26]:
train_losses, train_accuracies = train_last_layer(
    model=model,
    data=X_trainval_tensor,
    labels=y_train_tensor,
    device=device,
    learning_rate=5e-5,
    epochs=30,
)

Data shape :  torch.Size([1, 450, 158])


RuntimeError: CUDA error: device-side assert triggered
CUDA kernel errors might be asynchronously reported at some other API call, so the stacktrace below might be incorrect.
For debugging consider passing CUDA_LAUNCH_BLOCKING=1
Compile with `TORCH_USE_CUDA_DSA` to enable device-side assertions.


In [4]:
clf = TabICLClassifier()

In [None]:
clf = TabICLClassifier(
    n_estimators=32,  # number of ensemble members
    norm_methods=["none", "power"],  # normalization methods to try
    feat_shuffle_method="latin",  # feature permutation strategy
    class_shift=True,  # whether to apply cyclic shifts to class labels
    outlier_threshold=4.0,  # z-score threshold for outlier detection and clipping
    softmax_temperature=0.9,  # controls prediction confidence
    average_logits=True,  # whether ensemble averaging is done on logits or probabilities
    use_hierarchical=True,  # enable hierarchical classification for datasets with many classe
    batch_size=8,  # process this many ensemble members together (reduce RAM usage)
    use_amp=True,  # use automatic mixed precision for faster inference
    model_path=None,  # where the model checkpoint is stored
    allow_auto_download=True,  # whether automatic download to the specified path is allowed
    checkpoint_version="tabicl-classifier-v1.1-0506.ckpt",  # the version of pretrained checkpoint to use
    device=None,  # specify device for inference
    random_state=42,  # random seed for reproducibility
    n_jobs=None,  # number of threads to use for PyTorch
    verbose=False,  # print detailed information during inference
    inference_config=None,  # inference configuration for fine-grained control
)

In [13]:
clf.fit(X_train, y_train)  # this is cheap

INFO: You are downloading 'tabicl-classifier-v1.1-0506.ckpt', the latest best-performing version of TabICL.
To reproduce results from the original paper, please use 'tabicl-classifier-v1-0208.ckpt'.

Checkpoint 'tabicl-classifier-v1.1-0506.ckpt' not cached.
 Downloading from Hugging Face Hub (jingang/TabICL-clf).



LocalEntryNotFoundError: An error happened while trying to locate the file on the Hub and we cannot find the requested files in the local cache. Please check your connection and try again or make sure your Internet connection is on.

In [9]:
clf = TabICLClassifier()
clf.fit(X_train, y_train)  # this is cheap
clf.predict(X_test)  # in-context learning happens here

INFO: You are downloading 'tabicl-classifier-v1.1-0506.ckpt', the latest best-performing version of TabICL.
To reproduce results from the original paper, please use 'tabicl-classifier-v1-0208.ckpt'.

Checkpoint 'tabicl-classifier-v1.1-0506.ckpt' not cached.
 Downloading from Hugging Face Hub (jingang/TabICL-clf).



LocalEntryNotFoundError: An error happened while trying to locate the file on the Hub and we cannot find the requested files in the local cache. Please check your connection and try again or make sure your Internet connection is on.