In [1]:
!pip -q uninstall -y torch-xla


[0m

In [2]:
import os, glob, numpy as np, pandas as pd, torch
from torch.utils.data import Dataset, DataLoader
from transformers import XLMRobertaTokenizer, XLMRobertaForSequenceClassification

os.environ["TRANSFORMERS_OFFLINE"]="1"
device = "cuda" if torch.cuda.is_available() else "cpu"
print("Device:", device)

ckpt = "/kaggle/input/best_model.pt/pytorch/default/1/best_model.pt"

# 2) Tokenizer + model, then LOAD WEIGHTS
MAX_LEN, BATCH = 128, 64
tokenizer = XLMRobertaTokenizer.from_pretrained("xlm-roberta-base")
model = XLMRobertaForSequenceClassification.from_pretrained("xlm-roberta-base", num_labels=1)
state = torch.load(ckpt, map_location=device)
if isinstance(state, dict) and "state_dict" in state: state = state["state_dict"]
model.load_state_dict(state, strict=True)
model.to(device).eval()

# 3) Load test (uses 'content')
df_test = pd.read_csv("/kaggle/input/jigsaw-multilingual-toxic-comment-classification/test.csv")
texts = df_test["content"].astype(str).fillna("").tolist()
enc = tokenizer(texts, truncation=True, padding=True, max_length=MAX_LEN)

class TestDS(Dataset):
    def __init__(self,e): self.e=e
    def __len__(self): return len(self.e["input_ids"])
    def __getitem__(self,i): return {k: torch.tensor(v[i]) for k,v in self.e.items()}

loader = DataLoader(TestDS(enc), batch_size=BATCH, shuffle=False, num_workers=2, pin_memory=True)

# 4) Inference with progress
probs = []
with torch.no_grad():
    for i,b in enumerate(loader,1):
        ids=b["input_ids"].to(device, non_blocking=True)
        mask=b["attention_mask"].to(device, non_blocking=True)
        p = torch.sigmoid(model(input_ids=ids, attention_mask=mask).logits.squeeze(-1)).cpu().numpy()
        probs.append(p)
        if i%100==0 or i==len(loader): print(f"{i}/{len(loader)} ({100*i/len(loader):.1f}%)")

probs = np.concatenate(probs)
sub = pd.DataFrame({"id": df_test["id"], "toxic": probs})

# align to sample order (just in case)
sample = pd.read_csv("/kaggle/input/jigsaw-multilingual-toxic-comment-classification/sample_submission.csv")
sub = sub.set_index("id").loc[sample["id"]].reset_index()

out = "/kaggle/working/submission.csv"
sub.to_csv(out, index=False)
print("Saved:", out, sub.shape)



2025-09-07 20:56:19.083101: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1757278579.462376      19 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1757278579.565107      19 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


Device: cuda


tokenizer_config.json:   0%|          | 0.00/25.0 [00:00<?, ?B/s]

sentencepiece.bpe.model:   0%|          | 0.00/5.07M [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/9.10M [00:00<?, ?B/s]

config.json:   0%|          | 0.00/615 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/1.12G [00:00<?, ?B/s]

Some weights of XLMRobertaForSequenceClassification were not initialized from the model checkpoint at xlm-roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


100/998 (10.0%)
200/998 (20.0%)
300/998 (30.1%)
400/998 (40.1%)
500/998 (50.1%)
600/998 (60.1%)
700/998 (70.1%)
800/998 (80.2%)
900/998 (90.2%)
998/998 (100.0%)
Saved: /kaggle/working/submission.csv (63812, 2)
