In [None]:
from transformers import BertForSequenceClassification, AutoTokenizer
import torch
from torch.utils.data import DataLoader, Dataset

# Load model and tokenizer
tokenizer = AutoTokenizer.from_pretrained('TimSchopf/nlp_taxonomy_classifier')
model = BertForSequenceClassification.from_pretrained('TimSchopf/nlp_taxonomy_classifier')

# Example Data
papers = [
    {'title': 'Attention Is All You Need', 'abstract': 'The dominant sequence transduction models are based on complex recurrent or convolutional neural networks...'},
    {'title': 'SimCSE: Simple Contrastive Learning of Sentence Embeddings', 'abstract': 'This paper presents SimCSE, a simple contrastive learning framework...'}
]

# Function to preprocess data
def preprocess_papers(papers):
    # Concatenate title and abstract with a separator token
    title_abs = [d['title'] + tokenizer.sep_token + (d.get('abstract') or '') for d in papers]
    return title_abs

# Convert papers to dataset format
class PapersDataset(Dataset):
    def __init__(self, papers, tokenizer, max_length=512):
        self.papers = papers
        self.tokenizer = tokenizer
        self.max_length = max_length

    def __len__(self):
        return len(self.papers)

    def __getitem__(self, idx):
        paper = self.papers[idx]
        encoding = self.tokenizer(
            paper,
            truncation=True,
            padding='max_length',
            max_length=self.max_length,
            return_tensors='pt'
        )
        return {
            'input_ids': encoding['input_ids'].squeeze(),
            'attention_mask': encoding['attention_mask'].squeeze()
        }

# Function to predict NLP concepts
def predict_nlp_concepts(model, tokenizer, papers, batch_size=8, device='cpu'):
    # Prepare dataset and dataloader
    dataset = PapersDataset(papers, tokenizer)
    dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=False)
    
    model.to(device)
    model.eval()
    
    predictions = []
    with torch.no_grad():
        for batch in dataloader:
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)
            
            outputs = model(input_ids=input_ids, attention_mask=attention_mask)
            logits = outputs.logits
            
            # Get predicted labels (Assuming logits are probabilities)
            preds = torch.sigmoid(logits).cpu().numpy()
            predictions.extend(preds)
    
    return predictions

# Extended functionality or improvements
def extended_predict_nlp_concepts(model, tokenizer, papers, batch_size=8, device='cpu'):
    # Preprocess papers and prepare dataset
    title_abs = preprocess_papers(papers)
    dataset = PapersDataset(title_abs, tokenizer)
    dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=False)
    
    model.to(device)
    model.eval()
    
    all_predictions = []
    with torch.no_grad():
        for batch in dataloader:
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)
            
            outputs = model(input_ids=input_ids, attention_mask=attention_mask)
            logits = outputs.logits
            
            # Use a threshold for binary classification (e.g., 0.5)
            preds = (torch.sigmoid(logits) > 0.5).cpu().numpy()
            all_predictions.extend(preds)
    
    return all_predictions

# Contribution Code
def evaluate_model(predictions, true_labels):
    """
    Evaluate the model's performance by calculating metrics like accuracy, precision, recall, and F1 score.
    Args:
    - predictions (list of numpy arrays): Model predictions
    - true_labels (list of lists): True labels
    
    Returns:
    - dict: Evaluation metrics
    """
    from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
    
    # Flatten lists for metric calculations
    y_true = [label for sublist in true_labels for label in sublist]
    y_pred = [pred for sublist in predictions for pred in sublist]
    
    metrics = {
        'accuracy': accuracy_score(y_true, y_pred),
        'precision': precision_score(y_true, y_pred, average='weighted'),
        'recall': recall_score(y_true, y_pred, average='weighted'),
        'f1_score': f1_score(y_true, y_pred, average='weighted')
    }
    
    return metrics

# Example usage
title_abs = preprocess_papers(papers)
predictions = extended_predict_nlp_concepts(model, tokenizer, title_abs)
true_labels = [[1, 0], [0, 1]]  # Replace with actual true labels
metrics = evaluate_model(predictions, true_labels)

print("Evaluation Metrics:")
print(f"Accuracy: {metrics['accuracy']:.2f}")
print(f"Precision: {metrics['precision']:.2f}")
print(f"Recall: {metrics['recall']:.2f}")
print(f"F1 Score: {metrics['f1_score']:.2f}")