# 1. Section Marker

In [2]:
import tempfile
import tarfile
import shutil
import os

In [3]:
BASE_DATA_DIR = "../data"

datasets = {
    "20_news_dataset.tar.gz": "20_news",
    "multi_domain_sentiment_dataset.tar.gz": "multi_domain_sentiment"
}

In [4]:
os.makedirs(BASE_DATA_DIR, exist_ok = True)

In [5]:
for gz_file, folder_name in datasets.items():
	gz_path = os.path.join(BASE_DATA_DIR, gz_file)
	target_dir = os.path.join(BASE_DATA_DIR, folder_name)

	if os.path.exists(target_dir) and len(os.listdir(target_dir)) > 0:
		print(f"Folder {folder_name} already extracted")
		continue

	if os.path.exists(gz_path):
		print(f"Extracting {gz_file} into '{folder_name}'...")
		os.makedirs(target_dir, exist_ok=True)

		with tempfile.TemporaryDirectory() as tmp_dir:
			with tarfile.open(gz_path, "r:gz") as tar:
				tar.extractall(path = tmp_dir)

			for item in os.listdir(tmp_dir):
				src_path = os.path.join(tmp_dir, item)
				if os.path.isdir(src_path):
					for sub_item in os.listdir(src_path):
						shutil.move(os.path.join(src_path, sub_item), target_dir)
				else:
					shutil.move(src_path, target_dir)
	else:
		print(f"File not found: {gz_path}")


Folder 20_news already extracted
Folder multi_domain_sentiment already extracted


# 2. Section Marker

In [6]:
import pandas as pd
import os

In [7]:
NEWS_DIR = os.path.join(BASE_DATA_DIR, "20_news")
MULTIDOMAIN_DIR = os.path.join(BASE_DATA_DIR, "multi_domain_sentiment")

In [8]:
def load_20_news(base_dir):
	data = []

	for category in os.listdir(base_dir):
		category_path = os.path.join(base_dir, category)
		if not os.path.isdir(category_path):
			continue
		
		for filename in os.listdir(category_path):
			file_path = os.path.join(category_path, filename)
			try:
				with open(file_path, "rb") as f:
					raw = f.read()
				try:
					text = raw.decode("utf-8").strip()
				except UnicodeDecodeError:
					text = raw.decode("latin-1").strip()

				data.append({
					"label": category, 
					"document": text
				})

			except Exception as e:
				print(f"Error reading {file_path}: {e}")
	
	return pd.DataFrame(data)

In [9]:
def load_multidomain(base_dir):
	data = []

	for domain in os.listdir(base_dir):
		domain_path = os.path.join(base_dir, domain)
		if not os.path.isdir(domain_path):
			continue
	
		for filename in os.listdir(domain_path):
			file_path = os.path.join(domain_path, filename)
			try:
				with open(file_path, "rb") as f:
					raw = f.read()
				try:
					content = raw.decode("utf-8").strip()
				except UnicodeDecodeError:
					content = raw.decode("latin-1").strip()

				for line in content.splitlines():
					line = line.strip()
					if not line:
						continue

					if "#label#:" in line:
						text_part, label_part = line.split("#label#:")
						label = label_part.strip()
					else:
						text_part = line
						label = None

					tokens = [tok.split(":")[0] for tok in text_part.split()]
					text = " ".join(tokens)

					data.append({
						"document": text,
						"label": label,
					})

			except Exception as e:
				print(f"Error reading {file_path}: {e}")

	return pd.DataFrame(data)


In [10]:
NEWS_DATAFRAME = load_20_news(NEWS_DIR)
MULTIDOMAIN_DATAFRAME = load_multidomain(MULTIDOMAIN_DIR)

In [11]:
NEWS_DATAFRAME

Unnamed: 0,label,document
0,sci.crypt,From: tcmay@netcom.com (Timothy C. May)\nSubje...
1,sci.crypt,"From: ""Jon \\lnes"" <jon@ifi.uio.no>\nSubject: ..."
2,sci.crypt,From: hooper@ccs.QueensU.CA (Andy Hooper)\nSub...
3,sci.crypt,From: warlord@MIT.EDU (Derek Atkins)\nSubject:...
4,sci.crypt,From: pmetzger@snark.shearson.com (Perry E. Me...
...,...,...
18823,comp.windows.x,From: dev@hollywood.acsc.com ()\nSubject: Circ...
18824,comp.windows.x,From: jra@wti.com (Jim Atkinson)\nSubject: How...
18825,comp.windows.x,From: dealy@narya.gsfc.nasa.gov (Brian Dealy -...
18826,comp.windows.x,From: vinod@sommerfeld.WPI.EDU (Vinod K Nair)\...


In [12]:
MULTIDOMAIN_DATAFRAME

Unnamed: 0,document,label
0,i movie_could movies_i in_only minutes_and bor...,negative
1,your by_disney many_drug can't_even classic_ru...,negative
2,old complicated fun_to moves breaking we_tried...,negative
3,enjoy_what find_that add_some and_when add sum...,negative
4,holes movie_however shooting_fish not_sure lat...,negative
...,...,...
27672,akin go barely mornings_because annoyed i_like...,positive
27673,vibrant anyone well a_rather overheated relate...,positive
27674,very_hefty you'd put_it for_chicken cut when_y...,positive
27675,am_very flatware_set needless_to &amp;quot;non...,positive


# 3. Section marker

In [13]:
import re

In [14]:
def clean_20_news(text):
    # Remove common email headers
    text = re.sub(r'^(From|Subject|Lines|Organization|Reply-To|NNTP-Posting-Host|Keywords|Summary):.*$', '', text, flags = re.MULTILINE)

    # Remove email addresses and URLs
    text = re.sub(r'\S+@\S+', '', text)
    text = re.sub(r'http\S+|www\S+', '', text)

    # Remove lines of signatures or separators
    text = re.sub(r'--+\s*$', '', text, flags=re.MULTILINE)
    text = re.sub(r'_+', '', text)

    # Remove quoted lines (beginning with > or :)
    text = re.sub(r'(^>.*$|^:.*$)', '', text, flags=re.MULTILINE)

    # Collapse multiple newlines and spaces
    text = re.sub(r'\n{2,}', '\n', text)
    text = re.sub(r'\s+', ' ', text)

    return text.strip()


In [15]:
def clean_multidomain(text):
    # Replace underscores with spaces
    text = text.replace("_", " ")

    # Remove special tokens like <num>
    text = re.sub(r"<num>", "", text)

    # Remove multiple spaces

    return text.strip()

In [16]:
NEWS_DATAFRAME["document_clean"] = NEWS_DATAFRAME["document"].apply(clean_20_news)
MULTIDOMAIN_DATAFRAME["document_clean"] = MULTIDOMAIN_DATAFRAME["document"].apply(clean_multidomain)

In [17]:
NEWS_DATAFRAME

Unnamed: 0,label,document,document_clean
0,sci.crypt,From: tcmay@netcom.com (Timothy C. May)\nSubje...,David Sternlight wrote: ...cascades elided to ...
1,sci.crypt,"From: ""Jon \\lnes"" <jon@ifi.uio.no>\nSubject: ...",acceptance of the wiretap chip) In article (Da...
2,sci.crypt,From: hooper@ccs.QueensU.CA (Andy Hooper)\nSub...,Isn't Clipper a trademark of Fairchild Semicon...
3,sci.crypt,From: warlord@MIT.EDU (Derek Atkins)\nSubject:...,-----BEGIN PGP SIGNED MESSAGE I find this a ve...
4,sci.crypt,From: pmetzger@snark.shearson.com (Perry E. Me...,(Stephen R. Tate) writes: Even if they somehow...
...,...,...,...
18823,comp.windows.x,From: dev@hollywood.acsc.com ()\nSubject: Circ...,Will there be any support for round or circula...
18824,comp.windows.x,From: jra@wti.com (Jim Atkinson)\nSubject: How...,I am trying to find out if my application is r...
18825,comp.windows.x,From: dealy@narya.gsfc.nasa.gov (Brian Dealy -...,The Only directory I know of that lists commer...
18826,comp.windows.x,From: vinod@sommerfeld.WPI.EDU (Vinod K Nair)\...,"Hello, I am writing a program which forks of a..."


In [18]:
MULTIDOMAIN_DATAFRAME

Unnamed: 0,document,label,document_clean
0,i movie_could movies_i in_only minutes_and bor...,negative,i movie could movies i in only minutes and bor...
1,your by_disney many_drug can't_even classic_ru...,negative,your by disney many drug can't even classic ru...
2,old complicated fun_to moves breaking we_tried...,negative,old complicated fun to moves breaking we tried...
3,enjoy_what find_that add_some and_when add sum...,negative,enjoy what find that add some and when add sum...
4,holes movie_however shooting_fish not_sure lat...,negative,holes movie however shooting fish not sure lat...
...,...,...,...
27672,akin go barely mornings_because annoyed i_like...,positive,akin go barely mornings because annoyed i like...
27673,vibrant anyone well a_rather overheated relate...,positive,vibrant anyone well a rather overheated relate...
27674,very_hefty you'd put_it for_chicken cut when_y...,positive,very hefty you'd put it for chicken cut when y...
27675,am_very flatware_set needless_to &amp;quot;non...,positive,am very flatware set needless to &amp;quot;non...


# 4. Section Marker

In [19]:
from sklearn.preprocessing import LabelEncoder

In [20]:
label_encoder_news = LabelEncoder()
label_encoder_news.fit(NEWS_DATAFRAME["label"])

In [21]:
NEWS_DATAFRAME["label_id"] = label_encoder_news.transform(NEWS_DATAFRAME["label"])

In [22]:
label_encoder_multidomain = LabelEncoder()
label_encoder_multidomain.fit(MULTIDOMAIN_DATAFRAME["label"])

In [23]:
MULTIDOMAIN_DATAFRAME["label_id"] = label_encoder_multidomain.transform(MULTIDOMAIN_DATAFRAME["label"])

# 5. Section Marker

In [24]:
from sklearn.model_selection import train_test_split

In [25]:
RANDOM_SEED = 42

In [26]:
def split_dataframe(dataframe):
    
	X = dataframe["document_clean"]
	y = dataframe["label_id"]

	# 60% for training, 40% for second split
	X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size = 0.4, random_state = RANDOM_SEED, stratify = y)

	# 10% for validation, 30% for test
	X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size = 0.75, random_state = RANDOM_SEED, stratify = y_temp)

	train_df = X_train.to_frame("text")
	train_df["label"] = y_train.values

	val_df = X_val.to_frame("text")
	val_df["label"] = y_val.values

	test_df = X_test.to_frame("text")
	test_df["label"] = y_test.values

	print(f"Train size: {len(train_df)}")
	print(f"Val size:   {len(val_df)}")
	print(f"Test size:  {len(test_df)}")

	return train_df, val_df, test_df

In [27]:
train_df_news, val_df_news, test_df_news = split_dataframe(NEWS_DATAFRAME)

Train size: 11296
Val size:   1883
Test size:  5649


In [28]:
train_df_multidomain, val_df_multidomain, test_df_multidomain = split_dataframe(MULTIDOMAIN_DATAFRAME)

Train size: 16606
Val size:   2767
Test size:  8304


# 6. Section Marker

In [29]:
from transformers import AutoTokenizer

In [30]:
bert_base_uncased_tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased", do_lower_case = True)

In [31]:
distilbert_base_uncased_tokenizer = AutoTokenizer.from_pretrained("distilbert-base-uncased", do_lower_case = True)

tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/483 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

# 7. Section Marker

In [32]:
from torch.utils.data import Dataset, DataLoader
import torch

In [33]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'

In [34]:
class CustomDataset(Dataset):
    
	def __init__(self, data, tokenizer):
		self.embeddings = tokenizer(data["text"].values.tolist(), padding = 'max_length', truncation = True, max_length = 256, return_tensors = 'pt')
		self.labels = torch.tensor(data['label'].values).long()


	def __getitem__(self, idx):
		return {
			"input_ids": self.embeddings["input_ids"][idx],
			"token_type_ids": self.embeddings["token_type_ids"][idx],
			"attention_mask": self.embeddings["attention_mask"][idx],
			"labels": self.labels[idx]
    }

	def __len__(self):
		return len(self.labels)

In [35]:
train_news_dataset = CustomDataset(train_df_news, bert_base_uncased_tokenizer)
val_news_dataset = CustomDataset(val_df_news, bert_base_uncased_tokenizer)
test_news_dataset = CustomDataset(test_df_news, bert_base_uncased_tokenizer)

train_news_loader = DataLoader(train_news_dataset, batch_size = 32, shuffle = True)
val_news_loader = DataLoader(val_news_dataset, batch_size = 32, shuffle = True)
test_news_loader = DataLoader(test_news_dataset, batch_size = 32, shuffle = True)

In [36]:
train_multidomain_dataset = CustomDataset(train_df_multidomain, bert_base_uncased_tokenizer)
val_multidomain_dataset = CustomDataset(val_df_multidomain, bert_base_uncased_tokenizer)
test_multidomain_dataset = CustomDataset(test_df_multidomain, bert_base_uncased_tokenizer)

train_multidomain_loader = DataLoader(train_multidomain_dataset, batch_size = 32, shuffle = True)
val_multidomain_loader = DataLoader(val_multidomain_dataset, batch_size = 32, shuffle = True)
test_multidomain_loader = DataLoader(test_multidomain_dataset, batch_size = 32, shuffle = True)

# 9. Section Marker

In [37]:
from sklearn.metrics import classification_report, accuracy_score, f1_score, precision_score, recall_score
from transformers import AutoModelForSequenceClassification
from tqdm import tqdm

In [38]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'

In [39]:
def get_accuracy(y, output, size):
  y_pred = output.argmax(dim = -1).reshape(-1)
  return (y.reshape(-1) == y_pred).sum().item() / size

In [40]:
def train(dataloader, model, loss_function, optimizer):
    
	loss = 0
	accuracy = 0
	batch_num = 0

	model.train()

	for batch in tqdm(dataloader):
		batch_num += 1
		batch = {key: v.to(device) for key, v in batch.items()}
		output = model(**batch)
		optimizer.zero_grad()
		batch_loss = loss_function(output.logits, batch["labels"])
		batch_loss.backward()
		optimizer.step()
		loss += batch_loss.item()

		accuracy += get_accuracy(batch["labels"], output.logits.detach(), len(dataloader.dataset))

	loss = loss / batch_num
	print(f"Loss: {loss}, Accuracy: {accuracy}")

	return loss, accuracy


In [41]:
def evaluate(dataloader, model, loss_function):

  loss = 0
  accuracy = 0
  batch_num = 0

  model.eval()
  
  with torch.no_grad():
    for batch in tqdm(dataloader):
      batch_num += 1
      batch = {key: v.to(device) for key, v in batch.items()}
      output = model(**batch)
      batch_loss = loss_function(output.logits, batch["labels"])
      loss += batch_loss.item()
      accuracy += get_accuracy(batch["labels"], output.logits.detach(), len(dataloader.dataset))

  loss = loss / batch_num

  print(f"Loss: {loss}, Accuracy: {accuracy}")

  return loss, accuracy

In [42]:
def evaluate_model(model, dataloader, device, print_name, checkpoint_path = None, num_labels = None, model_name = None):

	if checkpoint_path is not None:
		if model_name and num_labels:
			model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels = num_labels)

	state_dict = torch.load(checkpoint_path, map_location=device)
	if any(k.startswith("_orig_mod.") for k in state_dict.keys()):
		new_state_dict = {k.replace("_orig_mod.", ""): v for k, v in state_dict.items()}
		state_dict = new_state_dict

	model.load_state_dict(state_dict)
	model.to(device)
	model.eval()

	all_preds, all_labels = [], []
	with torch.no_grad():
		for batch in tqdm(dataloader, desc = "Evaluating model"):
			batch = {k: v.to(device) for k, v in batch.items()}
			outputs = model(**batch)
			preds = torch.argmax(outputs.logits, dim = -1)
			all_preds.extend(preds.cpu().numpy())
			all_labels.extend(batch["labels"].cpu().numpy())

	acc = accuracy_score(all_labels, all_preds)
	precision_micro = precision_score(all_labels, all_preds, average = 'micro')
	recall_micro = recall_score(all_labels, all_preds, average = 'micro')
	f1_micro = f1_score(all_labels, all_preds, average = 'micro')

	precision_macro = precision_score(all_labels, all_preds, average = 'macro')
	recall_macro = recall_score(all_labels, all_preds, average = 'macro')
	f1_macro = f1_score(all_labels, all_preds, average = 'macro')

	report = classification_report(all_labels, all_preds, digits = 4)

	print(f"\nFinal results {print_name} in test")
	print(f"Accuracy: {acc:.4f}")
	print(f"Precision (micro): {precision_micro:.4f}")
	print(f"Recall (micro): {recall_micro:.4f}")
	print(f"F1 (micro): {f1_micro:.4f}")
	print(f"Precision (macro): {precision_macro:.4f}")
	print(f"Recall (macro): {recall_macro:.4f}")
	print(f"F1 (macro): {f1_macro:.4f}")
	print("\nClassification Report:")
	print(report)

# 10. Section Marker

In [43]:
from torch.optim import Adam
import torch.nn as nn
import torch

In [44]:
model_20_news = AutoModelForSequenceClassification.from_pretrained("bert-base-uncased", num_labels = 20).to(device)

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [45]:
model_20_news

BertForSequenceClassification(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0-11): 12 x BertLayer(
          (attention): BertAttention(
            (self): BertSdpaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e

In [46]:
model_20_news = torch.compile(model_20_news.to(device), backend = "eager")
loss_function = nn.CrossEntropyLoss()
optimizer = Adam(model_20_news.parameters(), lr = 1e-5)
model_20_news = model_20_news.to(device)

In [47]:
num_epochs = 10
best_val_acc = 0.0

for epoch in range(num_epochs):
    print(f"\n  Epoch {epoch+1}/{num_epochs}")

    train_loss, train_acc = train(train_news_loader, model_20_news, loss_function, optimizer)
    val_loss, val_acc = evaluate(val_news_loader, model_20_news, loss_function)

    if val_acc > best_val_acc:
        best_val_acc = val_acc
        torch.save(model_20_news.state_dict(), "../models/best_bert_base_uncased_model_news.pt")
        print("Model saved (best acc in validation so far)")



  Epoch 1/10


100%|██████████| 353/353 [01:58<00:00,  2.97it/s]


Loss: 2.0770344514684704, Accuracy: 0.43794263456090565


 98%|█████████▊| 58/59 [00:08<00:00,  8.26it/s]W1106 16:47:41.162000 43781 torch/fx/experimental/symbolic_shapes.py:6833] [0/3] _maybe_guard_rel() was called on non-relation expression Eq(s18, s43) | Eq(s43, 1)
100%|██████████| 59/59 [00:11<00:00,  5.14it/s]


Loss: 1.3366818771523945, Accuracy: 0.6287838555496548
Model saved (best acc in validation so far)

  Epoch 2/10


100%|██████████| 353/353 [01:56<00:00,  3.04it/s]


Loss: 1.1014800971695788, Accuracy: 0.7030807365439103


100%|██████████| 59/59 [00:07<00:00,  8.34it/s]


Loss: 0.9763469201023296, Accuracy: 0.7254381306425915
Model saved (best acc in validation so far)

  Epoch 3/10


100%|██████████| 353/353 [01:54<00:00,  3.09it/s]


Loss: 0.7687784847051496, Accuracy: 0.7861189801699743


100%|██████████| 59/59 [00:07<00:00,  8.40it/s]


Loss: 0.8455427491058738, Accuracy: 0.7514604354753052
Model saved (best acc in validation so far)

  Epoch 4/10


100%|██████████| 353/353 [01:55<00:00,  3.04it/s]


Loss: 0.5588833523792855, Accuracy: 0.8436614730878221


100%|██████████| 59/59 [00:06<00:00,  8.45it/s]


Loss: 0.8013640356771017, Accuracy: 0.7663303239511418
Model saved (best acc in validation so far)

  Epoch 5/10


100%|██████████| 353/353 [01:55<00:00,  3.05it/s]


Loss: 0.40937861555358845, Accuracy: 0.8884560906515625


100%|██████████| 59/59 [00:07<00:00,  8.43it/s]


Loss: 0.7755623313329988, Accuracy: 0.7796070100902814
Model saved (best acc in validation so far)

  Epoch 6/10


100%|██████████| 353/353 [01:54<00:00,  3.08it/s]


Loss: 0.3010227991526593, Accuracy: 0.9180240793201182


100%|██████████| 59/59 [00:06<00:00,  8.45it/s]


Loss: 0.7677441320176852, Accuracy: 0.7865108868826343
Model saved (best acc in validation so far)

  Epoch 7/10


100%|██████████| 353/353 [01:56<00:00,  3.03it/s]


Loss: 0.2058641247704583, Accuracy: 0.9501593484419322


100%|██████████| 59/59 [00:06<00:00,  8.51it/s]


Loss: 0.7743554943698948, Accuracy: 0.7960701009028143
Model saved (best acc in validation so far)

  Epoch 8/10


100%|██████████| 353/353 [01:54<00:00,  3.08it/s]


Loss: 0.1418430205044091, Accuracy: 0.9676876770538309


100%|██████████| 59/59 [00:06<00:00,  8.52it/s]


Loss: 0.8177365963741884, Accuracy: 0.7950079660116837

  Epoch 9/10


100%|██████████| 353/353 [01:53<00:00,  3.10it/s]


Loss: 0.10023910673097747, Accuracy: 0.9769830028328681


100%|██████████| 59/59 [00:06<00:00,  8.49it/s]


Loss: 0.8397617668418561, Accuracy: 0.8003186404673391
Model saved (best acc in validation so far)

  Epoch 10/10


100%|██████████| 353/353 [01:54<00:00,  3.08it/s]


Loss: 0.07533565382064233, Accuracy: 0.9838881019830101


100%|██████████| 59/59 [00:06<00:00,  8.58it/s]


Loss: 0.8376897916955462, Accuracy: 0.8003186404673397
Model saved (best acc in validation so far)


In [48]:
evaluate_model(model = None, 
               dataloader = test_news_loader, 
               device = device, 
               checkpoint_path = "../models/best_bert_base_uncased_model_news.pt", 
               num_labels = 20, 
               model_name = "bert-base-uncased", 
               print_name = "BERT BASE UNCASED (20 NEWS)")

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Evaluating model: 100%|██████████| 177/177 [00:21<00:00,  8.29it/s]



Final results BERT BASE UNCASED (20 NEWS) in test
Accuracy: 0.7954
Precision (micro): 0.7954
Recall (micro): 0.7954
F1 (micro): 0.7954
Precision (macro): 0.7918
Recall (macro): 0.7890
F1 (macro): 0.7891

Classification Report:
              precision    recall  f1-score   support

           0     0.6463    0.7917    0.7116       240
           1     0.7082    0.7397    0.7236       292
           2     0.6564    0.7230    0.6881       296
           3     0.6102    0.6475    0.6283       295
           4     0.8140    0.7292    0.7692       288
           5     0.7925    0.8571    0.8235       294
           6     0.8491    0.8288    0.8388       292
           7     0.8526    0.8182    0.8351       297
           8     0.8920    0.8591    0.8752       298
           9     0.9189    0.9097    0.9143       299
          10     0.9298    0.9267    0.9282       300
          11     0.8493    0.8350    0.8421       297
          12     0.7882    0.6837    0.7322       294
          13   

# 11. Section Marker

In [49]:
from torch.optim import Adam
import torch.nn as nn
import torch

In [50]:
model_multidomain = AutoModelForSequenceClassification.from_pretrained("bert-base-uncased", num_labels = 2).to(device)

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [51]:
model_multidomain

BertForSequenceClassification(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0-11): 12 x BertLayer(
          (attention): BertAttention(
            (self): BertSdpaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e

In [52]:
model_multidomain = torch.compile(model_multidomain.to(device), backend = "eager")
loss_function = nn.CrossEntropyLoss()
optimizer = Adam(model_multidomain.parameters(), lr = 1e-5)
model_multidomain = model_multidomain.to(device)

In [53]:
num_epochs = 10
best_val_acc = 0.0

for epoch in range(num_epochs):
    print(f"\n  Epoch {epoch+1}/{num_epochs}")

    train_loss, train_acc = train(train_multidomain_loader, model_multidomain, loss_function, optimizer)
    val_loss, val_acc = evaluate(val_multidomain_loader, model_multidomain, loss_function)

    if val_acc > best_val_acc:
        best_val_acc = val_acc
        torch.save(model_multidomain.state_dict(), "../models/best_bert_base_uncased_model_multidomain.pt")
        print("Model saved (best acc in validation so far)")



  Epoch 1/10


  0%|          | 0/519 [00:00<?, ?it/s]W1106 17:06:27.353000 43781 torch/fx/experimental/symbolic_shapes.py:6833] [0/4] _maybe_guard_rel() was called on non-relation expression Eq(s18, s43) | Eq(s43, 1)
  0%|          | 1/519 [00:01<12:16,  1.42s/it]W1106 17:06:28.771000 43781 torch/fx/experimental/symbolic_shapes.py:6833] [0/5] _maybe_guard_rel() was called on non-relation expression Eq(s18, s43) | Eq(s43, 1)
100%|██████████| 519/519 [02:52<00:00,  3.00it/s]


Loss: 0.3806818248219573, Accuracy: 0.8281946284475475


  0%|          | 0/87 [00:00<?, ?it/s]W1106 17:09:20.072000 43781 torch/fx/experimental/symbolic_shapes.py:6833] [0/6] _maybe_guard_rel() was called on non-relation expression Eq(s18, s43) | Eq(s43, 1)
100%|██████████| 87/87 [00:11<00:00,  7.56it/s]


Loss: 0.3115231188378115, Accuracy: 0.8717022045536688
Model saved (best acc in validation so far)

  Epoch 2/10


100%|██████████| 519/519 [02:49<00:00,  3.06it/s]


Loss: 0.24631968495904366, Accuracy: 0.9026857762254588


100%|██████████| 87/87 [00:10<00:00,  8.44it/s]


Loss: 0.2920445463266866, Accuracy: 0.8854354897000369
Model saved (best acc in validation so far)

  Epoch 3/10


100%|██████████| 519/519 [02:48<00:00,  3.08it/s]


Loss: 0.1704607131085242, Accuracy: 0.9398410213175936


100%|██████████| 87/87 [00:10<00:00,  8.50it/s]


Loss: 0.3604697398413187, Accuracy: 0.8724250090350564

  Epoch 4/10


100%|██████████| 519/519 [02:49<00:00,  3.07it/s]


Loss: 0.10174999120313251, Accuracy: 0.9684451403107311


100%|██████████| 87/87 [00:10<00:00,  8.37it/s]


Loss: 0.3680543551842372, Accuracy: 0.8778460426454648

  Epoch 5/10


100%|██████████| 519/519 [02:50<00:00,  3.05it/s]


Loss: 0.06967859113093659, Accuracy: 0.978260869565219


100%|██████████| 87/87 [00:10<00:00,  8.55it/s]


Loss: 0.42000195938537177, Accuracy: 0.8796530538489344

  Epoch 6/10


100%|██████████| 519/519 [02:47<00:00,  3.10it/s]


Loss: 0.04901768251767235, Accuracy: 0.9860291460917772


100%|██████████| 87/87 [00:10<00:00,  8.56it/s]


Loss: 0.38876925971915666, Accuracy: 0.8847126852186487

  Epoch 7/10


100%|██████████| 519/519 [02:53<00:00,  2.98it/s]


Loss: 0.03806008489444486, Accuracy: 0.9886185716006294


100%|██████████| 87/87 [00:10<00:00,  8.30it/s]


Loss: 0.5105321459129624, Accuracy: 0.8800144560896282

  Epoch 8/10


100%|██████████| 519/519 [02:50<00:00,  3.04it/s]


Loss: 0.029440604996605488, Accuracy: 0.9910875587137218


100%|██████████| 87/87 [00:10<00:00,  8.64it/s]


Loss: 0.49972547107943516, Accuracy: 0.8890495121069757
Model saved (best acc in validation so far)

  Epoch 9/10


100%|██████████| 519/519 [02:47<00:00,  3.10it/s]


Loss: 0.02252117769930849, Accuracy: 0.9928339154522507


100%|██████████| 87/87 [00:10<00:00,  8.51it/s]


Loss: 0.5788238142113918, Accuracy: 0.8904951210697514
Model saved (best acc in validation so far)

  Epoch 10/10


100%|██████████| 519/519 [02:48<00:00,  3.07it/s]


Loss: 0.020185201771795995, Accuracy: 0.9940382994098566


100%|██████████| 87/87 [00:10<00:00,  8.50it/s]

Loss: 0.5653132675536747, Accuracy: 0.878207444886159





In [54]:
evaluate_model(model = None, 
               dataloader = test_multidomain_loader, 
               device = device, 
               checkpoint_path = "../models/best_bert_base_uncased_model_multidomain.pt", 
               num_labels = 2, 
               model_name = "bert-base-uncased", 
               print_name = "BERT BASE UNCASED (MULTIDOMAIN)")

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Evaluating model: 100%|██████████| 260/260 [00:31<00:00,  8.31it/s]



Final results BERT BASE UNCASED (MULTIDOMAIN) in test
Accuracy: 0.8899
Precision (micro): 0.8899
Recall (micro): 0.8899
F1 (micro): 0.8899
Precision (macro): 0.8901
Recall (macro): 0.8899
F1 (macro): 0.8899

Classification Report:
              precision    recall  f1-score   support

           0     0.8971    0.8802    0.8885      4139
           1     0.8831    0.8996    0.8913      4165

    accuracy                         0.8899      8304
   macro avg     0.8901    0.8899    0.8899      8304
weighted avg     0.8901    0.8899    0.8899      8304



# 12. Section Marker

In [63]:
class CustomDataset(Dataset):
    
	def __init__(self, data, tokenizer):
		self.embeddings = tokenizer(data["text"].values.tolist(), padding = 'max_length', truncation = True, max_length = 256, return_tensors = 'pt')
		self.labels = torch.tensor(data['label'].values).long()


	def __getitem__(self, idx):
		return {
			"input_ids": self.embeddings["input_ids"][idx],
			"attention_mask": self.embeddings["attention_mask"][idx],
			"labels": self.labels[idx]
    }

	def __len__(self):
		return len(self.labels)

In [64]:
train_news_dataset = CustomDataset(train_df_news, distilbert_base_uncased_tokenizer)
val_news_dataset = CustomDataset(val_df_news, distilbert_base_uncased_tokenizer)
test_news_dataset = CustomDataset(test_df_news, distilbert_base_uncased_tokenizer)

train_news_loader = DataLoader(train_news_dataset, batch_size = 32, shuffle = True)
val_news_loader = DataLoader(val_news_dataset, batch_size = 32, shuffle = True)
test_news_loader = DataLoader(test_news_dataset, batch_size = 32, shuffle = True)

In [65]:
train_multidomain_dataset = CustomDataset(train_df_multidomain, distilbert_base_uncased_tokenizer)
val_multidomain_dataset = CustomDataset(val_df_multidomain, distilbert_base_uncased_tokenizer)
test_multidomain_dataset = CustomDataset(test_df_multidomain, distilbert_base_uncased_tokenizer)

train_multidomain_loader = DataLoader(train_multidomain_dataset, batch_size = 32, shuffle = True)
val_multidomain_loader = DataLoader(val_multidomain_dataset, batch_size = 32, shuffle = True)
test_multidomain_loader = DataLoader(test_multidomain_dataset, batch_size = 32, shuffle = True)

# 13. Section Marker

In [66]:
from torch.optim import Adam
import torch.nn as nn
import torch

In [67]:
model_20_news = AutoModelForSequenceClassification.from_pretrained("distilbert-base-uncased", num_labels = 20).to(device)

Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [68]:
model_20_news

DistilBertForSequenceClassification(
  (distilbert): DistilBertModel(
    (embeddings): Embeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (transformer): Transformer(
      (layer): ModuleList(
        (0-5): 6 x TransformerBlock(
          (attention): DistilBertSdpaAttention(
            (dropout): Dropout(p=0.1, inplace=False)
            (q_lin): Linear(in_features=768, out_features=768, bias=True)
            (k_lin): Linear(in_features=768, out_features=768, bias=True)
            (v_lin): Linear(in_features=768, out_features=768, bias=True)
            (out_lin): Linear(in_features=768, out_features=768, bias=True)
          )
          (sa_layer_norm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
          (ffn): FFN(
            (dropout): Dropout(p=0.1, inplace=False)


In [69]:
model_20_news = torch.compile(model_20_news.to(device), backend = "eager")
loss_function = nn.CrossEntropyLoss()
optimizer = Adam(model_20_news.parameters(), lr = 1e-5)
model_20_news = model_20_news.to(device)

In [None]:
num_epochs = 10
best_val_acc = 0.0

for epoch in range(num_epochs):
    print(f"\n  Epoch {epoch+1}/{num_epochs}")

    train_loss, train_acc = train(train_news_loader, model_20_news, loss_function, optimizer)
    val_loss, val_acc = evaluate(val_news_loader, model_20_news, loss_function)

    if val_acc > best_val_acc:
        best_val_acc = val_acc
        torch.save(model_20_news.state_dict(), "../models/best_distilbert_base_uncased_model_news.pt")
        print("Model saved (best acc in validation so far)")



  Epoch 1/10


100%|██████████| 353/353 [00:59<00:00,  5.91it/s]


Loss: 2.03898872869886, Accuracy: 0.4926522662889512


 97%|█████████▋| 57/59 [00:04<00:00, 16.32it/s]W1106 17:41:22.494000 43781 torch/fx/experimental/symbolic_shapes.py:6833] [1/3] _maybe_guard_rel() was called on non-relation expression Eq(s43, 1) | Eq(s72, s43)
100%|██████████| 59/59 [00:05<00:00, 11.46it/s]


Loss: 1.3172121846069724, Accuracy: 0.6675517790759425
Model saved (best acc in validation so far)

  Epoch 2/10


100%|██████████| 353/353 [00:59<00:00,  5.97it/s]


Loss: 1.083825946360742, Accuracy: 0.7091890934844203


100%|██████████| 59/59 [00:03<00:00, 16.29it/s]


Loss: 0.9875276452403957, Accuracy: 0.7185342538502391
Model saved (best acc in validation so far)

  Epoch 3/10


100%|██████████| 353/353 [00:58<00:00,  6.03it/s]


Loss: 0.7888290608570866, Accuracy: 0.7764695467422118


100%|██████████| 59/59 [00:03<00:00, 16.06it/s]


Loss: 0.855917084520146, Accuracy: 0.742963356346256
Model saved (best acc in validation so far)

  Epoch 4/10


100%|██████████| 353/353 [00:58<00:00,  6.05it/s]


Loss: 0.6039111776747041, Accuracy: 0.8272839943342811


100%|██████████| 59/59 [00:03<00:00, 16.06it/s]


Loss: 0.8294320455041982, Accuracy: 0.7551779075942647
Model saved (best acc in validation so far)

  Epoch 5/10


100%|██████████| 353/353 [00:58<00:00,  6.05it/s]


Loss: 0.4708487835551794, Accuracy: 0.8644652974504287


100%|██████████| 59/59 [00:03<00:00, 16.09it/s]


Loss: 0.7983829121468431, Accuracy: 0.7668613913967075
Model saved (best acc in validation so far)

  Epoch 6/10


100%|██████████| 353/353 [00:59<00:00,  5.96it/s]


Loss: 0.3622638784961052, Accuracy: 0.8965120396600611


100%|██████████| 59/59 [00:03<00:00, 16.22it/s]


Loss: 0.8009144209199033, Accuracy: 0.7700477960701011
Model saved (best acc in validation so far)

  Epoch 7/10


100%|██████████| 353/353 [00:58<00:00,  6.06it/s]


Loss: 0.2764991804450835, Accuracy: 0.9261685552407982


100%|██████████| 59/59 [00:03<00:00, 16.10it/s]


Loss: 0.7944561126878706, Accuracy: 0.7812002124269783
Model saved (best acc in validation so far)

  Epoch 8/10


100%|██████████| 353/353 [00:58<00:00,  6.05it/s]


Loss: 0.21046025770328539, Accuracy: 0.9468838526912239


100%|██████████| 59/59 [00:03<00:00, 16.10it/s]


Loss: 0.8297725324913606, Accuracy: 0.7796070100902814

  Epoch 9/10


100%|██████████| 353/353 [00:58<00:00,  6.05it/s]


Loss: 0.15334030888214834, Accuracy: 0.9627301699716776


100%|██████████| 59/59 [00:03<00:00, 16.11it/s]


Loss: 0.8536972585370985, Accuracy: 0.7838555496548061
Model saved (best acc in validation so far)

  Epoch 10/10


100%|██████████| 353/353 [00:58<00:00,  6.05it/s]


Loss: 0.11456713817549326, Accuracy: 0.9728222379603466


100%|██████████| 59/59 [00:03<00:00, 16.10it/s]

Loss: 0.8877009769617501, Accuracy: 0.7833244822092404





In [74]:
evaluate_model(model = None, 
               dataloader = test_news_loader, 
               device = device, 
               checkpoint_path = "../models/best_distilbert_base_uncased_model_news.pt", 
               num_labels = 20, 
               model_name = "distilbert-base-uncased", 
               print_name = "DISTILBERT BASE UNCASED (20 NEWS)")

Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Evaluating model: 100%|██████████| 177/177 [00:10<00:00, 16.84it/s]


Final results DISTILBERT BASE UNCASED (20 NEWS) in test
Accuracy: 0.7826
Precision (micro): 0.7826
Recall (micro): 0.7826
F1 (micro): 0.7826
Precision (macro): 0.7775
Recall (macro): 0.7749
F1 (macro): 0.7748

Classification Report:
              precision    recall  f1-score   support

           0     0.6705    0.7208    0.6948       240
           1     0.6361    0.7363    0.6825       292
           2     0.7341    0.6622    0.6963       296
           3     0.6621    0.6508    0.6564       295
           4     0.8083    0.7465    0.7762       288
           5     0.8225    0.8197    0.8211       294
           6     0.8732    0.8253    0.8486       292
           7     0.8446    0.8418    0.8432       297
           8     0.9161    0.8423    0.8776       298
           9     0.9231    0.8829    0.9026       299
          10     0.9091    0.9333    0.9211       300
          11     0.8507    0.8249    0.8376       297
          12     0.7394    0.7143    0.7266       294
         




# 14. Section Marker

In [75]:
from torch.optim import Adam
import torch.nn as nn
import torch

In [76]:
model_multidomain = AutoModelForSequenceClassification.from_pretrained("distilbert-base-uncased", num_labels = 2).to(device)

Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [77]:
model_multidomain

DistilBertForSequenceClassification(
  (distilbert): DistilBertModel(
    (embeddings): Embeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (transformer): Transformer(
      (layer): ModuleList(
        (0-5): 6 x TransformerBlock(
          (attention): DistilBertSdpaAttention(
            (dropout): Dropout(p=0.1, inplace=False)
            (q_lin): Linear(in_features=768, out_features=768, bias=True)
            (k_lin): Linear(in_features=768, out_features=768, bias=True)
            (v_lin): Linear(in_features=768, out_features=768, bias=True)
            (out_lin): Linear(in_features=768, out_features=768, bias=True)
          )
          (sa_layer_norm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
          (ffn): FFN(
            (dropout): Dropout(p=0.1, inplace=False)


In [78]:
model_multidomain = torch.compile(model_multidomain.to(device), backend = "eager")
loss_function = nn.CrossEntropyLoss()
optimizer = Adam(model_multidomain.parameters(), lr = 1e-5)
model_multidomain = model_multidomain.to(device)

In [79]:
num_epochs = 10
best_val_acc = 0.0

for epoch in range(num_epochs):
    print(f"\n  Epoch {epoch+1}/{num_epochs}")

    train_loss, train_acc = train(train_multidomain_loader, model_multidomain, loss_function, optimizer)
    val_loss, val_acc = evaluate(val_multidomain_loader, model_multidomain, loss_function)

    if val_acc > best_val_acc:
        best_val_acc = val_acc
        torch.save(model_multidomain.state_dict(), "../models/best_distilbert_base_uncased_model_multidomain.pt")
        print("Model saved (best acc in validation so far)")



  Epoch 1/10


  0%|          | 0/519 [00:00<?, ?it/s]W1106 17:54:47.289000 43781 torch/fx/experimental/symbolic_shapes.py:6833] [1/4] _maybe_guard_rel() was called on non-relation expression Eq(s43, 1) | Eq(s72, s43)
  0%|          | 1/519 [00:00<06:47,  1.27it/s]W1106 17:54:48.052000 43781 torch/fx/experimental/symbolic_shapes.py:6833] [1/5] _maybe_guard_rel() was called on non-relation expression Eq(s43, 1) | Eq(s72, s43)
100%|██████████| 519/519 [01:26<00:00,  6.02it/s]


Loss: 0.39189831275126835, Accuracy: 0.8199445983379481


  0%|          | 0/87 [00:00<?, ?it/s]W1106 17:56:13.467000 43781 torch/fx/experimental/symbolic_shapes.py:6833] [1/6] _maybe_guard_rel() was called on non-relation expression Eq(s43, 1) | Eq(s72, s43)
100%|██████████| 87/87 [00:05<00:00, 14.52it/s]


Loss: 0.32707299549004126, Accuracy: 0.8633899530177093
Model saved (best acc in validation so far)

  Epoch 2/10


100%|██████████| 519/519 [01:25<00:00,  6.06it/s]


Loss: 0.26890047246263665, Accuracy: 0.8908225942430412


100%|██████████| 87/87 [00:05<00:00, 16.12it/s]


Loss: 0.3290226775938752, Accuracy: 0.8698951933501993
Model saved (best acc in validation so far)

  Epoch 3/10


100%|██████████| 519/519 [01:25<00:00,  6.05it/s]


Loss: 0.2003024024081368, Accuracy: 0.9258099482114879


100%|██████████| 87/87 [00:05<00:00, 16.11it/s]


Loss: 0.32432578718182686, Accuracy: 0.8753162269606074
Model saved (best acc in validation so far)

  Epoch 4/10


100%|██████████| 519/519 [01:25<00:00,  6.05it/s]


Loss: 0.13921270024045757, Accuracy: 0.9530892448512579


100%|██████████| 87/87 [00:05<00:00, 16.14it/s]


Loss: 0.35553054460163774, Accuracy: 0.881821467293098
Model saved (best acc in validation so far)

  Epoch 5/10


100%|██████████| 519/519 [01:25<00:00,  6.05it/s]


Loss: 0.0935442727767936, Accuracy: 0.9706130314344203


100%|██████████| 87/87 [00:05<00:00, 16.12it/s]


Loss: 0.3818455669472272, Accuracy: 0.8803758583303223

  Epoch 6/10


100%|██████████| 519/519 [01:25<00:00,  6.06it/s]


Loss: 0.06702427440529297, Accuracy: 0.9799470071058676


100%|██████████| 87/87 [00:05<00:00, 16.15it/s]


Loss: 0.4964274335438493, Accuracy: 0.8731478135164442

  Epoch 7/10


100%|██████████| 519/519 [01:25<00:00,  6.05it/s]


Loss: 0.04884310190876325, Accuracy: 0.9860893652896577


100%|██████████| 87/87 [00:05<00:00, 16.13it/s]


Loss: 0.5156639002520462, Accuracy: 0.8727864112757507

  Epoch 8/10


100%|██████████| 519/519 [01:25<00:00,  6.05it/s]


Loss: 0.03716460415359536, Accuracy: 0.9908466819221994


100%|██████████| 87/87 [00:05<00:00, 16.07it/s]


Loss: 0.5127793723910019, Accuracy: 0.8782074448861588

  Epoch 9/10


100%|██████████| 519/519 [01:25<00:00,  6.05it/s]


Loss: 0.03442163021404203, Accuracy: 0.9906058051306791


100%|██████████| 87/87 [00:05<00:00, 16.13it/s]


Loss: 0.5236168567776337, Accuracy: 0.8800144560896286

  Epoch 10/10


100%|██████████| 519/519 [01:25<00:00,  6.07it/s]


Loss: 0.02894291133253776, Accuracy: 0.992593038660729


100%|██████████| 87/87 [00:05<00:00, 16.15it/s]

Loss: 0.5765876715103614, Accuracy: 0.8680881821467298





In [80]:
evaluate_model(model = None, 
               dataloader = test_multidomain_loader, 
               device = device, 
               checkpoint_path = "../models/best_distilbert_base_uncased_model_multidomain.pt", 
               num_labels = 2, 
               model_name = "distilbert-base-uncased", 
               print_name = "DISTILBERT BASE UNCASED (MULTIDOMAIN)")

Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Evaluating model: 100%|██████████| 260/260 [00:15<00:00, 16.71it/s]


Final results DISTILBERT BASE UNCASED (MULTIDOMAIN) in test
Accuracy: 0.8825
Precision (micro): 0.8825
Recall (micro): 0.8825
F1 (micro): 0.8825
Precision (macro): 0.8825
Recall (macro): 0.8824
F1 (macro): 0.8825

Classification Report:
              precision    recall  f1-score   support

           0     0.8862    0.8768    0.8815      4139
           1     0.8788    0.8881    0.8834      4165

    accuracy                         0.8825      8304
   macro avg     0.8825    0.8824    0.8825      8304
weighted avg     0.8825    0.8825    0.8825      8304




