In [7]:
from transformers import BertTokenizer, BertForSequenceClassification, Trainer, TrainingArguments
from datasets import Dataset
import torch
import numpy as np
from sklearn.metrics import accuracy_score, precision_recall_fscore_support
import pandas as pd


train_df = pd.read_csv('train.tsv', sep='\t')
test_df = pd.read_csv('test.tsv', sep='\t')

print("Train columns:", train_df.columns.tolist())
print("\nSample data:")
print(train_df.head())
print("\nUnique labels:", train_df[train_df.columns[-1]].unique())

label_mapping = {'NOCUOUS': 0, 'INNOCUOUS': 1}  


text_column = train_df.columns[0]  
label_column = train_df.columns[-1]  

train_df[text_column] = train_df[text_column].astype(str)
test_df[text_column] = test_df[text_column].astype(str)
train_df[label_column] = train_df[label_column].map(label_mapping)
test_df[label_column] = test_df[label_column].map(label_mapping)


train_dataset = Dataset.from_pandas(train_df)
val_dataset = Dataset.from_pandas(test_df)


tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")

def preprocess_function(examples):
    tokenized = tokenizer(
        examples[text_column],
        padding='max_length',
        truncation=True,
        max_length=128,
        return_tensors=None
    )
    
    tokenized['labels'] = examples[label_column]
    
    return tokenized

train_tokenized = train_dataset.map(preprocess_function, batched=True, remove_columns=train_dataset.column_names)
val_tokenized = val_dataset.map(preprocess_function, batched=True, remove_columns=val_dataset.column_names)


train_tokenized.set_format(type="torch")
val_tokenized.set_format(type="torch")


model = BertForSequenceClassification.from_pretrained("bert-base-uncased", num_labels=2)

# Define metrics
def compute_metrics(pred):
    logits, labels = pred
    predictions = np.argmax(logits, axis=-1)
    precision, recall, f1, _ = precision_recall_fscore_support(labels, predictions, average="binary")
    acc = accuracy_score(labels, predictions)
    return {
        "accuracy": acc,
        "f1": f1,
        "precision": precision,
        "recall": recall
    }

training_args = TrainingArguments(
    output_dir="./results",
    evaluation_strategy="epoch",
    save_strategy="epoch",
    learning_rate=2e-5,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=16,
    num_train_epochs=10,
    weight_decay=0.01,
    logging_dir='./logs',
    load_best_model_at_end=True,
    metric_for_best_model='f1',
    report_to="none",
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_tokenized,
    eval_dataset=val_tokenized,
    tokenizer=tokenizer,
    compute_metrics=compute_metrics,
)


trainer.train()

eval_results = trainer.evaluate()
print("\nEvaluation Results:")
print(eval_results)


model_save_path = "./ambiguity_detection_model"
model.save_pretrained(model_save_path)
tokenizer.save_pretrained(model_save_path)
print(f"\nModel saved to {model_save_path}")

import json
with open(f"{model_save_path}/label_mapping.json", 'w') as f:
    json.dump(label_mapping, f)

Train columns: ['Unnamed: 0', 'ID', 'Sentence', 'Detected as']

Sample data:
   Unnamed: 0            ID  \
0           1    library#01   
1           2  library#02-1   
2           3  library#02-2   
3           4    library#03   
4           5    library#04   

                                            Sentence Detected as  
0  All material that is stored in the repository ...     NOCUOUS  
1  The Library may want to accept important digit...     NOCUOUS  
2  The Library may want to accept important digit...   INNOCUOUS  
3  Once material has arrived, <referential>it</re...   INNOCUOUS  
4  Allows resources to be reviewed before a decis...   INNOCUOUS  

Unique labels: ['NOCUOUS' 'INNOCUOUS']




Map:   0%|          | 0/139 [00:00<?, ? examples/s]

Map:   0%|          | 0/73 [00:00<?, ? examples/s]

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


  0%|          | 0/90 [00:00<?, ?it/s]

  0%|          | 0/5 [00:00<?, ?it/s]

{'eval_loss': 0.6931583285331726, 'eval_accuracy': 0.4657534246575342, 'eval_f1': 0.2909090909090909, 'eval_precision': 0.34782608695652173, 'eval_recall': 0.25, 'eval_runtime': 2.3765, 'eval_samples_per_second': 30.717, 'eval_steps_per_second': 2.104, 'epoch': 1.0}


  0%|          | 0/5 [00:00<?, ?it/s]

{'eval_loss': 0.7032955884933472, 'eval_accuracy': 0.4794520547945205, 'eval_f1': 0.5869565217391305, 'eval_precision': 0.45, 'eval_recall': 0.84375, 'eval_runtime': 0.8203, 'eval_samples_per_second': 88.99, 'eval_steps_per_second': 6.095, 'epoch': 2.0}


  0%|          | 0/5 [00:00<?, ?it/s]

{'eval_loss': 0.7131272554397583, 'eval_accuracy': 0.4657534246575342, 'eval_f1': 0.5617977528089887, 'eval_precision': 0.43859649122807015, 'eval_recall': 0.78125, 'eval_runtime': 0.8466, 'eval_samples_per_second': 86.228, 'eval_steps_per_second': 5.906, 'epoch': 3.0}


  0%|          | 0/5 [00:00<?, ?it/s]

{'eval_loss': 0.7214285731315613, 'eval_accuracy': 0.4931506849315068, 'eval_f1': 0.43076923076923074, 'eval_precision': 0.42424242424242425, 'eval_recall': 0.4375, 'eval_runtime': 0.7289, 'eval_samples_per_second': 100.156, 'eval_steps_per_second': 6.86, 'epoch': 4.0}


  0%|          | 0/5 [00:00<?, ?it/s]

{'eval_loss': 0.8145079612731934, 'eval_accuracy': 0.4520547945205479, 'eval_f1': 0.6, 'eval_precision': 0.4411764705882353, 'eval_recall': 0.9375, 'eval_runtime': 0.7224, 'eval_samples_per_second': 101.056, 'eval_steps_per_second': 6.922, 'epoch': 5.0}


  0%|          | 0/5 [00:00<?, ?it/s]

{'eval_loss': 0.7923984527587891, 'eval_accuracy': 0.4520547945205479, 'eval_f1': 0.5744680851063829, 'eval_precision': 0.43548387096774194, 'eval_recall': 0.84375, 'eval_runtime': 0.824, 'eval_samples_per_second': 88.592, 'eval_steps_per_second': 6.068, 'epoch': 6.0}


  0%|          | 0/5 [00:00<?, ?it/s]

{'eval_loss': 0.786288857460022, 'eval_accuracy': 0.4520547945205479, 'eval_f1': 0.5555555555555556, 'eval_precision': 0.43103448275862066, 'eval_recall': 0.78125, 'eval_runtime': 0.9181, 'eval_samples_per_second': 79.511, 'eval_steps_per_second': 5.446, 'epoch': 7.0}


  0%|          | 0/5 [00:00<?, ?it/s]

{'eval_loss': 0.8313037753105164, 'eval_accuracy': 0.4520547945205479, 'eval_f1': 0.5652173913043479, 'eval_precision': 0.43333333333333335, 'eval_recall': 0.8125, 'eval_runtime': 1.0404, 'eval_samples_per_second': 70.165, 'eval_steps_per_second': 4.806, 'epoch': 8.0}


  0%|          | 0/5 [00:00<?, ?it/s]

{'eval_loss': 0.8764577507972717, 'eval_accuracy': 0.4520547945205479, 'eval_f1': 0.5652173913043479, 'eval_precision': 0.43333333333333335, 'eval_recall': 0.8125, 'eval_runtime': 0.7711, 'eval_samples_per_second': 94.669, 'eval_steps_per_second': 6.484, 'epoch': 9.0}


  0%|          | 0/5 [00:00<?, ?it/s]

{'eval_loss': 0.8708861470222473, 'eval_accuracy': 0.4794520547945205, 'eval_f1': 0.5777777777777777, 'eval_precision': 0.4482758620689655, 'eval_recall': 0.8125, 'eval_runtime': 5.5577, 'eval_samples_per_second': 13.135, 'eval_steps_per_second': 0.9, 'epoch': 10.0}
{'train_runtime': 172.8266, 'train_samples_per_second': 8.043, 'train_steps_per_second': 0.521, 'train_loss': 0.653740734524197, 'epoch': 10.0}


  0%|          | 0/5 [00:00<?, ?it/s]


Evaluation Results:
{'eval_loss': 0.8145079612731934, 'eval_accuracy': 0.4520547945205479, 'eval_f1': 0.6, 'eval_precision': 0.4411764705882353, 'eval_recall': 0.9375, 'eval_runtime': 1.0806, 'eval_samples_per_second': 67.555, 'eval_steps_per_second': 4.627, 'epoch': 10.0}

Model saved to ./ambiguity_detection_model


: 

In [1]:
from transformers import BertTokenizer, BertForSequenceClassification, Trainer, TrainingArguments
from datasets import Dataset
import torch
import numpy as np
from sklearn.metrics import accuracy_score, precision_recall_fscore_support
import pandas as pd

# Load data
train_df = pd.read_csv('train.tsv', sep='\t')
test_df = pd.read_csv('test.tsv', sep='\t')

# Preprocessing
label_mapping = {'NOCUOUS': 0, 'INNOCUOUS': 1}
text_column = train_df.columns[0]
label_column = train_df.columns[-1]

train_df[text_column] = train_df[text_column].astype(str)
test_df[text_column] = test_df[text_column].astype(str)
train_df[label_column] = train_df[label_column].map(label_mapping)
test_df[label_column] = test_df[label_column].map(label_mapping)

# Convert to Dataset
train_dataset = Dataset.from_pandas(train_df)
val_dataset = Dataset.from_pandas(test_df)

# Tokenization
tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")

def preprocess_function(examples):
    tokenized = tokenizer(
        examples[text_column],
        padding='max_length',
        truncation=True,
        max_length=128,
        return_tensors=None
    )
    tokenized['labels'] = examples[label_column]
    return tokenized

train_tokenized = train_dataset.map(preprocess_function, batched=True, remove_columns=train_dataset.column_names)
val_tokenized = val_dataset.map(preprocess_function, batched=True, remove_columns=val_dataset.column_names)

train_tokenized.set_format(type="torch")
val_tokenized.set_format(type="torch")

# Model
model = BertForSequenceClassification.from_pretrained(
    "bert-base-uncased",
    num_labels=2,
    problem_type="single_label_classification"
)

# Metrics
def compute_metrics(pred):
    logits, labels = pred
    predictions = np.argmax(logits, axis=-1)
    precision, recall, f1, _ = precision_recall_fscore_support(labels, predictions, average="binary")
    acc = accuracy_score(labels, predictions)
    return {
        "accuracy": acc,
        "f1": f1,
        "precision": precision,
        "recall": recall
    }

# Training Arguments
training_args = TrainingArguments(
    output_dir="./results",
    evaluation_strategy="epoch",
    save_strategy="epoch",
    learning_rate=2e-5,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=16,
    num_train_epochs=10,
    weight_decay=0.01,
    warmup_steps=500,
    logging_dir='./logs',
    load_best_model_at_end=True,
    metric_for_best_model='f1',
    report_to="tensorboard",
    fp16=True,
)

# Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_tokenized,
    eval_dataset=val_tokenized,
    tokenizer=tokenizer,
    compute_metrics=compute_metrics,
)

# Train
trainer.train()

# Evaluate
eval_results = trainer.evaluate()
print("\nEvaluation Results:")
print(eval_results)

# Save Model
model_save_path = "./ambiguity_detection_model"
model.save_pretrained(model_save_path)
tokenizer.save_pretrained(model_save_path)
print(f"\nModel saved to {model_save_path}")

# Save Label Mapping
import json
with open(f"{model_save_path}/label_mapping.json", 'w') as f:
    json.dump(label_mapping, f)



Map:   0%|          | 0/139 [00:00<?, ? examples/s]

Map:   0%|          | 0/73 [00:00<?, ? examples/s]

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


ValueError: FP16 Mixed precision training with AMP or APEX (`--fp16`) and FP16 half precision evaluation (`--fp16_full_eval`) can only be used on CUDA devices.

: 