# Generate Test Predictions for Submission

**Objective**: Generate predictions on the test set using the trained RoBERTa model to fix the submission error.

**Issue**: Previous submission failed with 400 error - likely because we submitted validation predictions instead of test predictions.

In [None]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from transformers import RobertaTokenizerFast, RobertaModel
from pathlib import Path
import json

# Check GPU availability
print(f"CUDA available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"GPU: {torch.cuda.get_device_name(0)}")
    print(f"GPU Memory: {torch.cuda.get_device_properties(0).total_memory / 1024**3:.2f} GB")

# Load test data
test_df = pd.read_csv('/home/code/data/test.csv')
print(f"Test data shape: {test_df.shape}")
print(f"Test data columns: {test_df.columns.tolist()}")
test_df.head()

## Load Trained Model

In [None]:
# Define the RoBERTa span extraction model (same architecture as training)
class TweetRoBERTaModel(nn.Module):
    def __init__(self, model_name='roberta-base'):
        super(TweetRoBERTaModel, self).__init__()
        self.roberta = RobertaModel.from_pretrained(model_name)
        self.dropout = nn.Dropout(0.1)
        self.start_classifier = nn.Linear(self.roberta.config.hidden_size, 1)
        self.end_classifier = nn.Linear(self.roberta.config.hidden_size, 1)
        
    def forward(self, input_ids, attention_mask):
        outputs = self.roberta(input_ids=input_ids, attention_mask=attention_mask)
        sequence_output = outputs.last_hidden_state
        sequence_output = self.dropout(sequence_output)
        
        start_logits = self.start_classifier(sequence_output).squeeze(-1)
        end_logits = self.end_classifier(sequence_output).squeeze(-1)
        
        return start_logits, end_logits

# Load the trained model
model_path = '/home/code/experiments/002_roberta_span/final_model.pt'
print(f"Loading model from: {model_path}")

model = TweetRoBERTaModel('roberta-base')
model.load_state_dict(torch.load(model_path, map_location='cpu'))
model.eval()

if torch.cuda.is_available():
    model = model.cuda()
    
print("Model loaded successfully!")

## Prepare Test Dataset

In [None]:
# Initialize tokenizer
tokenizer = RobertaTokenizerFast.from_pretrained('roberta-base')
MAX_LEN = 128

class TweetTestDataset(Dataset):
    def __init__(self, df, tokenizer, max_len=128):
        self.df = df
        self.tokenizer = tokenizer
        self.max_len = max_len
        
    def __len__(self):
        return len(self.df)
    
    def __getitem__(self, idx):
        row = self.df.iloc[idx]
        text = row['text']
        sentiment = row['sentiment']
        
        # Add sentiment token at beginning
        text_with_sentiment = f"<{sentiment}> {text}"
        
        encoding = self.tokenizer(
            text_with_sentiment,
            max_length=self.max_len,
            padding='max_length',
            truncation=True,
            return_tensors='pt',
            return_offsets_mapping=True
        )
        
        return {
            'input_ids': encoding['input_ids'].squeeze(0),
            'attention_mask': encoding['attention_mask'].squeeze(0),
            'offset_mapping': encoding['offset_mapping'].squeeze(0),
            'text': text,
            'text_with_sentiment': text_with_sentiment,
            'sentiment': sentiment
        }

# Create test dataset
test_dataset = TweetTestDataset(test_df, tokenizer, MAX_LEN)
test_loader = DataLoader(test_dataset, batch_size=16, shuffle=False, num_workers=0)

print(f"Test dataset size: {len(test_dataset)}")
print(f"Test loader batches: {len(test_loader)}")

## Generate Predictions

In [None]:
def extract_span_from_predictions(start_probs, end_probs, offset_mapping, text, sentiment, threshold=0.5):
    """Extract text span from model predictions"""
    
    # For neutral sentiment, return full text
    if sentiment == 'neutral':
        return text
    
    # Find best start and end positions
    start_idx = torch.argmax(start_probs).item()
    end_idx = torch.argmax(end_probs).item()
    
    # Ensure start <= end
    if start_idx > end_idx:
        start_idx, end_idx = end_idx, start_idx
    
    # Get character positions from offset mapping
    start_char = offset_mapping[start_idx][0].item()
    end_char = offset_mapping[end_idx][1].item()
    
    # Extract span
    selected_text = text[start_char:end_char]
    
    # Handle edge cases
    if not selected_text.strip():
        # If prediction is empty, return text (fallback)
        return text
    
    return selected_text

# Generate predictions
predictions = []

print("Generating predictions...")
with torch.no_grad():
    for batch_idx, batch in enumerate(test_loader):
        if batch_idx % 50 == 0:
            print(f"Batch {batch_idx}/{len(test_loader)}")
        
        input_ids = batch['input_ids']
        attention_mask = batch['attention_mask']
        offset_mapping = batch['offset_mapping']
        texts = batch['text']
        sentiments = batch['sentiment']
        
        if torch.cuda.is_available():
            input_ids = input_ids.cuda()
            attention_mask = attention_mask.cuda()
        
        # Get model predictions
        start_logits, end_logits = model(input_ids, attention_mask)
        
        # Convert to probabilities
        start_probs = torch.softmax(start_logits, dim=-1)
        end_probs = torch.softmax(end_logits, dim=-1)
        
        # Process each item in batch
        for i in range(len(texts)):
            pred_text = extract_span_from_predictions(
                start_probs[i], 
                end_probs[i], 
                offset_mapping[i], 
                texts[i], 
                sentiments[i]
            )
            
            predictions.append(pred_text)

print(f"Generated {len(predictions)} predictions")
print(f"Sample predictions:")
for i in range(5):
    print(f"  {i+1}. {predictions[i][:100]}...")

## Create Submission File

In [None]:
# Create submission dataframe
submission_df = pd.DataFrame({
    'textID': test_df['textID'],
    'selected_text': predictions
})

print(f"Submission shape: {submission_df.shape}")
print(f"Sample submission:")
submission_df.head()

In [None]:
# Save submission file
submission_path = '/home/code/submission_candidates/candidate_002_roberta_test.csv'
submission_df.to_csv(submission_path, index=False)

print(f"Submission saved to: {submission_path}")

# Verify file format
print(f"\nFile verification:")
print(f"- File exists: {Path(submission_path).exists()}")
print(f"- File size: {Path(submission_path).stat().st_size / 1024:.2f} KB")
print(f"- Number of rows: {len(submission_df)}")
print(f"- Expected rows: 3535")
print(f"- Match: {len(submission_df) == 3535}")

# Check for any issues
print(f"\nData quality checks:")
print(f"- Missing predictions: {submission_df['selected_text'].isna().sum()}")
print(f"- Empty predictions: {(submission_df['selected_text'] == '').sum()}")

# Show some examples by sentiment
print(f"\nExamples by sentiment:")
for sentiment in ['positive', 'negative', 'neutral']:
    mask = test_df['sentiment'] == sentiment
    sample_idx = test_df[mask].index[0]
    print(f"\n{sentiment.upper()}:")
    print(f"  Text: {test_df.loc[sample_idx, 'text'][:100]}...")
    print(f"  Prediction: {submission_df.loc[sample_idx, 'selected_text'][:100]}...")