In [3]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from transformers import RobertaModel

  from .autonotebook import tqdm as notebook_tqdm


Feature extraction from input data using convolutional layers.

In [2]:
# Define the Convolutional Feature Extraction Model
class ConvFeatureExtractionModel(nn.Module):
    def __init__(self, input_channels, conv_layers):
        super(ConvFeatureExtractionModel, self).__init__()
        layers = []
        for (out_channels, kernel_size, stride) in conv_layers:
            layers.append(nn.Conv1d(input_channels, out_channels, kernel_size=kernel_size, stride=stride))
            layers.append(nn.ReLU())
            input_channels = out_channels
        self.conv_layers = nn.Sequential(*layers)
    
    def forward(self, x):
        return self.conv_layers(x)

In [7]:
class Cov_Model(nn.Module):
    def __init__(self, input_channels, conv_layers, num_tokens, transformer_config):
        super(Cov_Model, self).__init__()
        self.conv_feature_extractor = ConvFeatureExtractionModel(input_channels, conv_layers)
        
        # Transformer Encoder Configuration
        encoder_layers = nn.TransformerEncoderLayer(**transformer_config)
        self.transformer_encoder = nn.TransformerEncoder(encoder_layers, num_layers=transformer_config['num_layers'])
        
        # Classifier
        self.classifier = nn.Linear(transformer_config['d_model'], num_tokens)
    
    def forward(self, x):
        # Transform input for CNN compatibility: (batch, channels, sequence)
        x = x.transpose(1, 2)
        conv_features = self.conv_feature_extractor(x)
        
        # Transpose to fit the transformer input requirements: (sequence, batch, model)
        conv_features = conv_features.permute(2, 0, 1)
        transformer_output = self.transformer_encoder(conv_features)
        
        # Classification
        logits = self.classifier(transformer_output)
        return logits

In [14]:
from transformers import RobertaModel
import torch.nn as nn

class Ro_Model(nn.Module):
    def __init__(self, num_classes, transformer_config, roberta_model_name='roberta-base'):
        super(Ro_Model, self).__init__()
        # Load pre-trained RoBERTa
        self.roberta = RobertaModel.from_pretrained(roberta_model_name)
        
        # Transformer Encoder Configuration
        # Creating encoder layer according to transformer_config, excluding 'num_layers'
        encoder_layer_config = {k: v for k, v in transformer_config.items() if k != 'num_layers'}
        encoder_layers = nn.TransformerEncoderLayer(**encoder_layer_config)
        self.transformer_encoder = nn.TransformerEncoder(encoder_layers, num_layers=transformer_config['num_layers'])
        
        # Classifier
        self.classifier = nn.Linear(transformer_config['d_model'], num_classes)
    
    def forward(self, input_ids, attention_mask):
        # Get the last hidden states from RoBERTa
        roberta_output = self.roberta(input_ids=input_ids, attention_mask=attention_mask)
        
        cls_embeddings = roberta_output.last_hidden_state[:, 0, :]
        
        logits = self.classifier(cls_embeddings)
        return logits


In [11]:
import pandas as pd
from sklearn.model_selection import train_test_split
from transformers import RobertaTokenizer
from torch.utils.data import Dataset, DataLoader
import torch


df = pd.read_csv('data/New_data.csv')

# Initialize the tokenizer
tokenizer = RobertaTokenizer.from_pretrained('roberta-base')

# Tokenize the text
inputs = tokenizer(df['text'].tolist(), padding=True, truncation=True, max_length=512, return_tensors="pt")

# Prepare labels
labels = torch.tensor(df['generated'].values)

# Split data into training and validation
train_inputs, val_inputs, train_labels, val_labels = train_test_split(inputs.input_ids, labels, test_size=0.1)
train_masks, val_masks, _, _ = train_test_split(inputs.attention_mask, labels, test_size=0.1)

# Create dataset
class TextDataset(Dataset):
    def __init__(self, input_ids, attention_masks, labels):
        self.input_ids = input_ids
        self.attention_masks = attention_masks
        self.labels = labels

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, idx):
        return {
            'input_ids': self.input_ids[idx],
            'attention_mask': self.attention_masks[idx],
            'labels': self.labels[idx]
        }

# Create the DataLoader for our training and validation sets
train_data = TextDataset(train_inputs, train_masks, train_labels)
train_dataloader = DataLoader(train_data, batch_size=32, shuffle=True)

val_data = TextDataset(val_inputs, val_masks, val_labels)
val_dataloader = DataLoader(val_data, batch_size=32)


In [15]:
from transformers import AdamW
from torch.nn import CrossEntropyLoss

# Initialize model
num_classes = 2  
transformer_config = {
    'd_model': 768,  # Matches RoBERTa's hidden size
    'nhead': 12,
    'dim_feedforward': 2048,
    'dropout': 0.1,
    'num_layers': 6,
}

model = Ro_Model(num_classes, transformer_config)

# Define loss function
loss_fn = CrossEntropyLoss()

# Define optimizer
optimizer = AdamW(model.parameters(), lr=5e-5)


Some weights of RobertaModel were not initialized from the model checkpoint at roberta-base and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [17]:
from tqdm.auto import tqdm

epochs = 4  # Number of training epochs

for epoch in range(epochs):
    model.train()
    total_loss = 0
    # Wrap train_dataloader with tqdm for a progress bar
    train_progress_bar = tqdm(train_dataloader, desc=f'Epoch {epoch+1}/{epochs} Training')
    for batch in train_progress_bar:
        optimizer.zero_grad()
        outputs = model(batch['input_ids'], batch['attention_mask'])
        labels = batch['labels'].to(outputs.device)  
        loss = loss_fn(outputs, labels)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
        # Update the progress bar with the current loss
        train_progress_bar.set_postfix({'loss': loss.item()})
    
    print(f"Epoch {epoch+1}, Loss: {total_loss / len(train_dataloader)}")
    
    # Validation phase
    model.eval()
    total_eval_loss = 0
    # Wrap val_dataloader with tqdm for a progress bar
    val_progress_bar = tqdm(val_dataloader, desc=f'Epoch {epoch+1}/{epochs} Validation')
    with torch.no_grad():
        for batch in val_progress_bar:
            outputs = model(batch['input_ids'], batch['attention_mask'])
            labels = batch['labels'].to(outputs.device)  
            loss = loss_fn(outputs, labels)
            total_eval_loss += loss.item()
            # Update the progress bar with the current validation loss
            val_progress_bar.set_postfix({'val_loss': loss.item()})
    
    print(f"Validation Loss: {total_eval_loss / len(val_dataloader)}")


Epoch 1/4 Training: 100%|██████████| 78/78 [4:46:24<00:00, 220.31s/it, loss=0.00367]   


Epoch 1, Loss: 0.1039106838631993


Epoch 1/4 Validation: 100%|██████████| 9/9 [07:34<00:00, 50.48s/it, val_loss=0.000957]


Validation Loss: 0.003074840526096523


Epoch 2/4 Training:  13%|█▎        | 10/78 [38:09<4:19:31, 228.99s/it, loss=0.000983]


KeyboardInterrupt: 