In [1]:
import pandas as pd
import torch
from sklearn.metrics import accuracy_score

df_features = pd.read_json('/kaggle/input/indoml-phase2/train.features',lines=True)
df_labels = pd.read_json('/kaggle/input/indoml-phase2/train.labels',lines=True)

In [2]:
df = pd.merge(df_features,df_labels,on="indoml_id")

In [3]:
# df = df[:10000]

In [4]:
from sklearn.preprocessing import LabelEncoder
group_encoder = LabelEncoder()
supergroup_encoder = LabelEncoder()
module_encoder = LabelEncoder()
brand_encoder = LabelEncoder()

# Fit and transform each column
df['group'] = group_encoder.fit_transform(df['group'])
df['supergroup'] = supergroup_encoder.fit_transform(df['supergroup'])
df['module'] = module_encoder.fit_transform(df['module'])
df['brand'] = brand_encoder.fit_transform(df['brand'])

In [5]:
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, Dataset
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from tqdm import tqdm
import torch
import torch.nn as nn
import torch.nn.functional as F
from transformers import DebertaModel, DebertaTokenizer
from torch.utils.data import Dataset, DataLoader
from sklearn.metrics import accuracy_score
from torch.distributions import Categorical
import numpy as np

In [6]:
# For using the new one
# tokenizer = DebertaTokenizer.from_pretrained("microsoft/deberta-base")

# For loading the saved one
model_dir = "/kaggle/input/new-transformer-experiment-12-embedding-tmp/New_model"
tokenizer = DebertaTokenizer.from_pretrained(model_dir)

In [14]:
MAX_LENGTH = 12
BATCH_SIZE = 64
LEARNING_RATE = 5e-5
NUM_EPOCHS = 34
DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device=DEVICE
PATIENCE = 5  # Early stopping patience
PATIENCE_LR = 3  # Reduce LR on plateau patience

In [8]:
from sklearn.metrics import accuracy_score
import torch

In [9]:
class ProductDataset(Dataset):
    def __init__(self, texts, labels1, labels2, labels3, labels4):
        self.texts = texts
        self.labels1 = labels1
        self.labels2 = labels2
        self.labels3 = labels3
        self.labels4 = labels4
        self.encodings = tokenizer(texts, truncation=True, padding='max_length', max_length=MAX_LENGTH, return_tensors="pt")

    def __len__(self):
        return len(self.texts)

    def __getitem__(self, idx):
        item = {key: val[idx].to(DEVICE) for key, val in self.encodings.items()}
        item['labels1'] = torch.tensor(self.labels1[idx], device=DEVICE)
        item['labels2'] = torch.tensor(self.labels2[idx], device=DEVICE)
        item['labels3'] = torch.tensor(self.labels3[idx], device=DEVICE)
        item['labels4'] = torch.tensor(self.labels4[idx], device=DEVICE)
        return item
        
def compute_accuracy(preds, labels):
    # Convert each tensor in the list to numpy arrays
    preds_np = [p.cpu().numpy() for p in preds]
    labels_np = [l.cpu().numpy() for l in labels]
    # Individual accuracies for each of the 4 labels
    accuracies = [accuracy_score(labels_np[i], preds_np[i]) for i in range(4)]
    # Overall accuracy where all 4 labels match
    overall_accuracy = accuracy_score(
        np.all([labels_np[i] == preds_np[i] for i in range(4)], axis=0), 
        np.ones(len(labels_np[0]))
    )
    # Return the 5 accuracies (4 individual, 1 overall)
    return accuracies + [overall_accuracy]

In [10]:
# Split data
train_texts, val_texts, train_labels1, val_labels1, train_labels2, val_labels2, train_labels3, val_labels3, train_labels4, val_labels4 = train_test_split(
    df['description'], 
    df['supergroup'], 
    df['group'], 
    df['module'], 
    df['brand'], 
    test_size=0.2, 
    random_state=42
)

train_dataset = ProductDataset(
    train_texts.tolist(), 
    train_labels1.tolist(), 
    train_labels2.tolist(), 
    train_labels3.tolist(), 
    train_labels4.tolist()
)

val_dataset = ProductDataset(
    val_texts.tolist(), 
    val_labels1.tolist(), 
    val_labels2.tolist(), 
    val_labels3.tolist(), 
    val_labels4.tolist()
)

train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False)

In [11]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from transformers import DebertaModel, DebertaTokenizer
from torch.distributions import Categorical
import os

class AdvancedHierarchicalClassifier(nn.Module):
    def __init__(self, num_supergroups, num_groups, num_modules, num_brands, hidden_size=768, projection_dim=128):
        super().__init__()
        # For loading the new model
        self.deberta = DebertaModel.from_pretrained("microsoft/deberta-base")
        self.hidden_size = hidden_size
        # Classifiers for each hierarchy level
        self.supergroup_classifier = nn.Linear(hidden_size, num_supergroups)
        self.group_classifier = nn.Linear(hidden_size + num_supergroups, num_groups)
        self.module_classifier = nn.Linear(hidden_size + num_supergroups + num_groups, num_modules)
        self.brand_classifier = nn.Linear(hidden_size + num_supergroups + num_groups + num_modules, num_brands)
        # RL Policy networks for each level
        self.supergroup_policy = nn.Linear(hidden_size, num_supergroups)
        self.group_policy = nn.Linear(hidden_size + num_supergroups, num_groups)
        self.module_policy = nn.Linear(hidden_size + num_supergroups + num_groups, num_modules)
        self.brand_policy = nn.Linear(hidden_size + num_supergroups + num_groups + num_modules, num_brands)
        # Contrastive learning projection head
        self.projection = nn.Sequential(
            nn.Linear(hidden_size, hidden_size),
            nn.ReLU(),
            nn.Linear(hidden_size, projection_dim)
        )
        # Few-shot learning prototypes
        self.prototypes = nn.Parameter(torch.randn(num_supergroups + num_groups + num_modules + num_brands, hidden_size))
    def forward(self, input_ids, attention_mask):
        outputs = self.deberta(input_ids=input_ids, attention_mask=attention_mask)
        hidden_states = outputs.last_hidden_state[:, 0, :]  # Use [CLS] token representation
        # Supervised classification logits
        supergroup_logits = self.supergroup_classifier(hidden_states)
        group_input = torch.cat([hidden_states, torch.softmax(supergroup_logits, dim=1)], dim=1)
        group_logits = self.group_classifier(group_input)
        module_input = torch.cat([group_input, torch.softmax(group_logits, dim=1)], dim=1)
        module_logits = self.module_classifier(module_input)
        brand_input = torch.cat([module_input, torch.softmax(module_logits, dim=1)], dim=1)
        brand_logits = self.brand_classifier(brand_input)
        # RL policy logits
        supergroup_policy = self.supergroup_policy(hidden_states)
        group_policy = self.group_policy(group_input)
        module_policy = self.module_policy(module_input)
        brand_policy = self.brand_policy(brand_input)
        # Contrastive learning projection
        projection = self.projection(hidden_states)
        # Few-shot learning
        prototype_distances = torch.cdist(hidden_states, self.prototypes)
        few_shot_logits = -prototype_distances  # Negative distance as logits
        return (supergroup_logits, group_logits, module_logits, brand_logits), \
               (supergroup_policy, group_policy, module_policy, brand_policy), \
               projection, few_shot_logits
    def sample_actions(self, policies):
        return [Categorical(logits=policy).sample() for policy in policies]


In [12]:
# class JointAccuracyTrainer:
#     def __init__(self, model, supervised_lr=1e-5, rl_lr=1e-4, contrastive_temperature=0.07, loss_weights=None):
#         self.model = model
#         self.device = next(model.parameters()).device  # Get the device from the model's parameters
#         self.supervised_optimizer = torch.optim.AdamW(model.parameters(), lr=supervised_lr)
#         self.rl_optimizer = torch.optim.AdamW(model.parameters(), lr=rl_lr)
#         self.criterion = nn.CrossEntropyLoss()
#         self.contrastive_temperature = contrastive_temperature
#         if loss_weights is None:
#             self.loss_weights = [1.0, 1.0, 1.0, 1.0]
#         else:
#             self.loss_weights = loss_weights
#     def compute_joint_loss(self, all_outputs, true_labels):
#         # Unpack the model outputs
#         supervised_logits, policy_logits, projection, few_shot_logits = all_outputs
#         logits_supergroup, logits_group1, logits_group2, logits_group3 = supervised_logits
#         # Compute classification losses for all levels
#         loss_supergroup = self.criterion(logits_supergroup, true_labels['supergroup'])
#         loss_group1 = self.criterion(logits_group1, true_labels['group1'])
#         loss_group2 = self.criterion(logits_group2, true_labels['group2'])
#         loss_group3 = self.criterion(logits_group3, true_labels['group3'])
#         # Combine losses using weighted sum
#         total_loss = (
#             self.loss_weights[0] * loss_supergroup + 
#             self.loss_weights[1] * loss_group1 + 
#             self.loss_weights[2] * loss_group2 + 
#             self.loss_weights[3] * loss_group3
#         )
#         return total_loss
        
#     def supervised_step(self, batch, true_labels):
#         self.supervised_optimizer.zero_grad()
#         # Forward pass through the model
#         all_outputs = self.model(batch['input_ids'], batch['attention_mask'])
#         # Compute joint loss for all levels
#         total_loss = self.compute_joint_loss(all_outputs, true_labels)
#         # Backpropagate and update model weights
#         total_loss.backward()
#         torch.nn.utils.clip_grad_norm_(self.model.parameters(), max_norm=5.0)
#         self.supervised_optimizer.step()
#         return total_loss.item()
        
#     def validation_step(self, batch, true_labels):
#         with torch.no_grad():
#             # Forward pass during validation
#             all_outputs = self.model(batch['input_ids'], batch['attention_mask'])
#             supervised_logits, _, _, _ = all_outputs
#             logits_supergroup, logits_group1, logits_group2, logits_group3 = supervised_logits
#             # Compute predictions
#             preds_supergroup = torch.argmax(logits_supergroup, dim=-1)
#             preds_group1 = torch.argmax(logits_group1, dim=-1)
#             preds_group2 = torch.argmax(logits_group2, dim=-1)
#             preds_group3 = torch.argmax(logits_group3, dim=-1)
#             # Compute accuracies
#             supergroup_acc = (preds_supergroup == true_labels['supergroup']).float().mean().item()
#             group1_acc = (preds_group1 == true_labels['group1']).float().mean().item()
#             group2_acc = (preds_group2 == true_labels['group2']).float().mean().item()
#             group3_acc = (preds_group3 == true_labels['group3']).float().mean().item()
#             # Joint accuracy
#             item_acc = ((preds_supergroup == true_labels['supergroup']) &
#                        (preds_group1 == true_labels['group1']) &
#                        (preds_group2 == true_labels['group2']) &
#                        (preds_group3 == true_labels['group3'])).float().mean().item()
#         return supergroup_acc, group1_acc, group2_acc, group3_acc, item_acc
        
# def train_and_evaluate(model, train_loader, val_loader, num_epochs=10):
#     trainer = JointAccuracyTrainer(model)
#     for epoch in range(num_epochs):
#         model.train()
#         total_sup_loss = 0.0
#         # Training loop
#         for batch in train_loader:
#             batch = {k: v.to(device) for k, v in batch.items()}
#             true_labels = {
#                 'supergroup': batch['labels1'],
#                 'group1': batch['labels2'],
#                 'group2': batch['labels3'],
#                 'group3': batch['labels4']
#             }
#             sup_loss = trainer.supervised_step(batch, true_labels)
#             total_sup_loss += sup_loss
#         # Validation loop
#         model.eval()
#         val_accuracies = [0, 0, 0, 0, 0]
#         for batch in val_loader:
#             batch = {k: v.to(device) for k, v in batch.items()}
#             true_labels = {
#                 'supergroup': batch['labels1'],
#                 'group1': batch['labels2'],
#                 'group2': batch['labels3'],
#                 'group3': batch['labels4']
#             }
#             accs = trainer.validation_step(batch, true_labels)
#             val_accuracies = [sum(x) for x in zip(val_accuracies, accs)]
#         # Average accuracies
#         val_accuracies = [x / len(val_loader) for x in val_accuracies]
#         print(f"Epoch {epoch + 1}/{num_epochs} - "
#               f"Train Loss: {total_sup_loss / len(train_loader):.4f}, "
#               f"Val Accuracies - Supergroup: {val_accuracies[0]:.4f}, Group1: {val_accuracies[1]:.4f}, "
#               f"Group2: {val_accuracies[2]:.4f}, Group3: {val_accuracies[3]:.4f}, Item Accuracy: {val_accuracies[4]:.4f}")

# # Usage
# model = AdvancedHierarchicalClassifier(num_supergroups=32, num_groups=228, num_modules=449, num_brands=5679).to(DEVICE)
# # For loading the model saved after pre-training the saved dict
# model_path = '/kaggle/input/new-transformer-experiment-12-embedding-tmp/New_model/model.pth'
# model.load_state_dict(torch.load(model_path))
# # Assuming you have train_loader and val_loader already defined
# train_and_evaluate(model, train_loader, val_loader, num_epochs=NUM_EPOCHS)

In [None]:
class JointAccuracyTrainer:
    def __init__(self, model, supervised_lr=5e-6, rl_lr=1e-4, contrastive_temperature=0.07, loss_weights=None):
        self.model = model
        self.device = next(model.parameters()).device
        
        # Split parameters into pretrained and task-specific groups
        pretrained_params = {'params': model.deberta.parameters(), 'lr': supervised_lr * 0.1}
        task_params = {'params': [p for n, p in model.named_parameters() if not n.startswith('deberta')], 
                      'lr': supervised_lr}
        
        self.supervised_optimizer = torch.optim.AdamW(
            [pretrained_params, task_params],
            weight_decay=0.1,
            betas=(0.9, 0.999)
        )
        
        self.scheduler = torch.optim.lr_scheduler.CosineAnnealingWarmRestarts(
            self.supervised_optimizer,
            T_0=5,
            T_mult=1,
            eta_min=supervised_lr * 0.1
        )
        
        self.rl_optimizer = torch.optim.AdamW(model.parameters(), lr=rl_lr)
        self.criterion = nn.CrossEntropyLoss(label_smoothing=0.15)
        self.contrastive_temperature = contrastive_temperature
        
        if loss_weights is None:
            self.loss_weights = [1.2, 1.0, 1.0, 1.0]
        else:
            self.loss_weights = loss_weights
            
        self.mixup_alpha = 0.2  # Reduced from 0.4 to make mixing less aggressive
        
    def mixup_data(self, input_ids, attention_mask, labels_dict):
        """
        Applies mixup to input data and labels
        """
        batch_size = input_ids.size(0)
        
        # Generate mixup parameter and permutation
        lam = np.random.beta(self.mixup_alpha, self.mixup_alpha)
        lam = max(lam, 1-lam)  # Ensure lambda is at least 0.5 to preserve majority class
        index = torch.randperm(batch_size).to(self.device)
        
        # Mix the embeddings rather than raw input_ids
        mixed_input_ids = input_ids  # Keep original input_ids
        mixed_attention_mask = attention_mask  # Keep original attention_mask
        
        # Mix the labels
        mixed_labels = {}
        for key, labels in labels_dict.items():
            mixed_labels[key] = labels  # Keep original labels for computing both losses
            mixed_labels[f"{key}_mixed"] = labels[index]  # Store permuted labels
            mixed_labels[f"{key}_lambda"] = lam
            
        return mixed_input_ids, mixed_attention_mask, mixed_labels
        
    def compute_loss(self, logits, labels, mixed_labels, lam):
        """
        Compute loss with mixup
        """
        return lam * self.criterion(logits, labels) + (1 - lam) * self.criterion(logits, mixed_labels)

    def compute_joint_loss(self, all_outputs, labels_dict):
        supervised_logits, policy_logits, projection, few_shot_logits = all_outputs
        logits_supergroup, logits_group1, logits_group2, logits_group3 = supervised_logits
        
        # Compute losses with proper label handling
        losses = []
        for i, logits in enumerate(supervised_logits):
            label_key = f'labels{i+1}'
            labels = labels_dict[label_key]
            mixed_labels = labels_dict[f"{label_key}_mixed"]
            lam = labels_dict[f"{label_key}_lambda"]
            
            loss = self.compute_loss(logits, labels, mixed_labels, lam)
            losses.append(loss * self.loss_weights[i])
        
        # Add L2 regularization
        l2_reg = 0
        for param in self.model.parameters():
            l2_reg += torch.norm(param, 2)
        reg_loss = 0.01 * l2_reg
        
        total_loss = sum(losses) + reg_loss
        return total_loss
        
    def supervised_step(self, batch):
        self.supervised_optimizer.zero_grad()
        
        # Prepare labels dictionary
        labels_dict = {
            'labels1': batch['labels1'],
            'labels2': batch['labels2'],
            'labels3': batch['labels3'],
            'labels4': batch['labels4']
        }
        
        # Apply mixup
        mixed_ids, mixed_mask, mixed_labels = self.mixup_data(
            batch['input_ids'],
            batch['attention_mask'],
            labels_dict
        )
        
        # Forward pass
        all_outputs = self.model(mixed_ids, mixed_mask)
        
        # Compute loss
        total_loss = self.compute_joint_loss(all_outputs, mixed_labels)
        
        # Backward pass
        total_loss.backward()
        torch.nn.utils.clip_grad_norm_(self.model.parameters(), max_norm=0.5)
        self.supervised_optimizer.step()
        
        return total_loss.item()

    def validation_step(self, batch):
        """
        Perform a validation step without mixup
        """
        with torch.no_grad():
            outputs = self.model(batch['input_ids'], batch['attention_mask'])
            supervised_logits = outputs[0]
            
            accuracies = []
            correct_predictions = None
            
            # Compute accuracies for each level
            for i, logits in enumerate(supervised_logits):
                preds = torch.argmax(logits, dim=1)
                label_key = f'labels{i+1}'
                current_correct = (preds == batch[label_key])
                acc = current_correct.float().mean().item()
                accuracies.append(acc)
                
                if correct_predictions is None:
                    correct_predictions = current_correct
                else:
                    correct_predictions &= current_correct
            
            # Compute all-levels accuracy
            overall_acc = correct_predictions.float().mean().item()
            accuracies.append(overall_acc)
            
            return accuracies

def train_and_evaluate(model, train_loader, val_loader, num_epochs=30):
    trainer = JointAccuracyTrainer(model)
    best_accuracy = 0
    best_epoch = 0
    accumulation_steps = 4
    
    for epoch in range(num_epochs):
        model.train()
        total_sup_loss = 0.0
        
        for i, batch in enumerate(train_loader):
            batch = {k: v.to(trainer.device) for k, v in batch.items()}
            
            # Compute loss with gradient accumulation
            sup_loss = trainer.supervised_step(batch) / accumulation_steps
            total_sup_loss += sup_loss * accumulation_steps
            
            if (i + 1) % accumulation_steps == 0:
                trainer.supervised_optimizer.step()
                trainer.supervised_optimizer.zero_grad()
        
        # Validation phase
        model.eval()
        val_accuracies = [0, 0, 0, 0, 0]
        num_batches = 0
        
        for batch in val_loader:
            batch = {k: v.to(trainer.device) for k, v in batch.items()}
            accs = trainer.validation_step(batch)
            val_accuracies = [a + b for a, b in zip(val_accuracies, accs)]
            num_batches += 1
            
        val_accuracies = [acc / num_batches for acc in val_accuracies]
        current_accuracy = val_accuracies[4]  # Overall accuracy
        
        # Save best model
        if current_accuracy > best_accuracy:
            best_accuracy = current_accuracy
            best_epoch = epoch
            torch.save({
                'epoch': epoch,
                'model_state_dict': model.state_dict(),
                'optimizer_state_dict': trainer.supervised_optimizer.state_dict(),
                'scheduler_state_dict': trainer.scheduler.state_dict(),
                'accuracy': current_accuracy,
                'best_accuracy': best_accuracy
            }, 'best_model_checkpoint.pth')
        
        # Update learning rate
        trainer.scheduler.step()
        
        print(f"Epoch {epoch + 1}/{num_epochs} - "
              f"Train Loss: {total_sup_loss / len(train_loader):.4f}, "
              f"Val Accuracies - Supergroup: {val_accuracies[0]:.4f}, "
              f"Group1: {val_accuracies[1]:.4f}, Group2: {val_accuracies[2]:.4f}, "
              f"Group3: {val_accuracies[3]:.4f}, Item Accuracy: {val_accuracies[4]:.4f}, "
              f"LR: {trainer.supervised_optimizer.param_groups[0]['lr']:.2e}")
        
    print(f"Best accuracy: {best_accuracy:.4f} achieved at epoch {best_epoch + 1}")
    return best_accuracy

# Usage
model = AdvancedHierarchicalClassifier(num_supergroups=32, num_groups=228, 
                                     num_modules=449, num_brands=5679).to(device)

# Load previous checkpoint
model_path = '/kaggle/input/new-transformer-experiment-12-embedding-tmp/New_model/model.pth'
model.load_state_dict(torch.load(model_path))

# Continue training
train_and_evaluate(model, train_loader, val_loader, num_epochs=NUM_EPOCHS)

  model.load_state_dict(torch.load(model_path))


Epoch 1/34 - Train Loss: 78.5958, Val Accuracies - Supergroup: 0.8827, Group1: 0.8561, Group2: 0.8443, Group3: 0.8622, Item Accuracy: 0.7632, LR: 5.00e-07
Epoch 2/34 - Train Loss: 75.5069, Val Accuracies - Supergroup: 0.8846, Group1: 0.8585, Group2: 0.8473, Group3: 0.8669, Item Accuracy: 0.7708, LR: 5.00e-07
Epoch 3/34 - Train Loss: 73.6866, Val Accuracies - Supergroup: 0.8856, Group1: 0.8597, Group2: 0.8487, Group3: 0.8686, Item Accuracy: 0.7740, LR: 5.00e-07
Epoch 4/34 - Train Loss: 72.3978, Val Accuracies - Supergroup: 0.8860, Group1: 0.8603, Group2: 0.8490, Group3: 0.8690, Item Accuracy: 0.7751, LR: 5.00e-07
Epoch 5/34 - Train Loss: 71.3649, Val Accuracies - Supergroup: 0.8864, Group1: 0.8607, Group2: 0.8494, Group3: 0.8698, Item Accuracy: 0.7769, LR: 5.00e-07
Epoch 6/34 - Train Loss: 70.0033, Val Accuracies - Supergroup: 0.8868, Group1: 0.8612, Group2: 0.8499, Group3: 0.8712, Item Accuracy: 0.7783, LR: 5.00e-07
Epoch 7/34 - Train Loss: 68.2473, Val Accuracies - Supergroup: 0.8870,

In [None]:
import os
# Define the directory to save the model and tokenizer
save_directory = "New_model"
# Create the directory if it doesn't exist

if not os.path.exists(save_directory):
    os.makedirs(save_directory)
# Save the model's state dictionary

model_save_path = os.path.join(save_directory, "model.pth")
torch.save(model.state_dict(), model_save_path)
print(f"Model saved to {model_save_path}")
# Save the tokenizer

tokenizer.save_pretrained(save_directory)
print(f"Tokenizer saved to {save_directory}")

In [None]:
df_test_feat = pd.read_json('/kaggle/input/indoml-phase2/final_test_data.features',lines=True)

In [None]:
df_test_feat.head()

In [None]:
def predict(model, tokenizer, text):
    inputs = tokenizer(
        text, 
        return_tensors="pt", 
        truncation=True, 
        padding='max_length', 
        max_length=MAX_LENGTH
    ).to(DEVICE)
    with torch.no_grad():
        logits, _, _, _ = model(inputs['input_ids'], inputs['attention_mask'])
    predictions = [torch.argmax(logit, dim=1).item() for logit in logits]
    return predictions

# Example prediction
sample_text = "Short product description here"
predictions = predict(model, tokenizer, sample_text)
print(f"Supergroup: {predictions[0]}, Group: {predictions[1]}, Module: {predictions[2]}, Brand: {predictions[3]}")

In [None]:
# test_tmp = df_test_feat[:5]

In [None]:
def make_test_pred_and_save(df_test_feat):
    supergroups_list = []
    groups_list = []
    modules_list = []
    brands_list = []
    indoml_id_list = range(0, len(df_test_feat))
    length_df = df_test_feat.shape[0]
    with torch.no_grad():
        for i in range(length_df):
            if i % 1000 == 0:
                print(f"Processing {i} of {length_df - 1}")
            inputs = tokenizer(
                df_test_feat.iloc[i].description, 
                return_tensors="pt", 
                truncation=True, 
                padding='max_length', 
                max_length=MAX_LENGTH
            ).to(DEVICE)
            logits, _, _, _ = model(inputs['input_ids'], inputs['attention_mask'])
            predictions = [torch.argmax(logit, dim=1).item() for logit in logits]
            # Append predictions to respective lists
            supergroups_list.append(predictions[0])
            groups_list.append(predictions[1])
            modules_list.append(predictions[2])
            brands_list.append(predictions[3])
        try:
            supergroups_names = supergroup_encoder.inverse_transform(supergroups_list)
        except ValueError as e:
            print(f"Error in supergroups: {e}")
            supergroups_names = ['Unknown' if x not in supergroup_encoder.classes_ else x for x in supergroups_list]
        try:
            groups_names = group_encoder.inverse_transform(groups_list)
        except ValueError as e:
            print(f"Error in groups: {e}")
            groups_names = ['Unknown' if x not in group_encoder.classes_ else x for x in groups_list]
        try:
            modules_names = module_encoder.inverse_transform(modules_list)
        except ValueError as e:
            print(f"Error in modules: {e}")
            modules_names = ['Unknown' if x not in module_encoder.classes_ else x for x in modules_list]
        try:
            brands_names = brand_encoder.inverse_transform(brands_list)
        except ValueError as e:
            print(f"Error in brands: {e}")
            brands_names = ['Unknown' if x not in brand_encoder.classes_ else x for x in brands_list]
        # Create a DataFrame with predictions
        predictions_df = pd.DataFrame({
            'indoml_id': indoml_id_list,
            'supergroup': supergroups_names,
            'group': groups_names,
            'module': modules_names,
            'brand': brands_names
        })
        predictions_df.to_json('/kaggle/working/predictions.predict', orient='records', lines=True)
        print("predictions.predict saved")
print(make_test_pred_and_save(df_test_feat))