In [1]:
import pandas as pd
import numpy as np
from pathlib import Path
import sys

# Add code directory to path for imports
sys.path.append('/home/code')

print("Libraries imported successfully")
print(f"Pandas version: {pd.__version__}")
print(f"NumPy version: {np.__version__}")

Libraries imported successfully
Pandas version: 2.2.2
NumPy version: 1.26.4


In [2]:
# Load the fold predictions from previous experiment
preds_path = Path('/home/code/experiments/002_roberta_span/fold_predictions.csv')
print(f"Loading from: {preds_path}")
print(f"File exists: {preds_path.exists()}")

if preds_path.exists():
    preds_df = pd.read_csv(preds_path)
    print(f"Loaded {len(preds_df)} predictions")
    print(f"Columns: {list(preds_df.columns)}")
else:
    print("File not found!")

Loading from: /home/code/experiments/002_roberta_span/fold_predictions.csv
File exists: True
Loaded 27481 predictions
Columns: ['fold', 'textID', 'text', 'sentiment', 'true_selected', 'pred_selected', 'jaccard']


In [4]:
# Define space trimming function
def trim_spaces(prediction):
    """
    Remove leading and trailing spaces from prediction.
    If result is empty or NaN, return original prediction.
    """
    if pd.isna(prediction):
        return prediction
    if not isinstance(prediction, str):
        prediction = str(prediction)
    trimmed = prediction.strip()
    return trimmed if trimmed else prediction

# Apply space trimming to predictions
print("Applying space trimming to predictions...")
preds_df['pred_selected_trimmed'] = preds_df['pred_selected'].apply(trim_spaces)

# Show examples of changes
changed = preds_df[preds_df['pred_selected'] != preds_df['pred_selected_trimmed']]
print(f"Number of predictions changed by trimming: {len(changed)}")

if len(changed) > 0:
    print("\nExamples of changes:")
    sample_changed = changed.head(3)
    for idx, row in sample_changed.iterrows():
        print(f"  Original: '{row['pred_selected']}'")
        print(f"  Trimmed:  '{row['pred_selected_trimmed']}'")
        print()

Applying space trimming to predictions...
Number of predictions changed by trimming: 1

Examples of changes:
  Original: 'nan'
  Trimmed:  'nan'

