In [1]:
import pandas as pd
import numpy as np
from pathlib import Path
import sys

# Add code directory to path for imports
sys.path.append('/home/code')

print("Libraries imported successfully")
print(f"Pandas version: {pd.__version__}")
print(f"NumPy version: {np.__version__}")

Libraries imported successfully
Pandas version: 2.2.2
NumPy version: 1.26.4


In [2]:
# Load the fold predictions from previous experiment
preds_path = Path('/home/code/experiments/002_roberta_span/fold_predictions.csv')
print(f"Loading from: {preds_path}")
print(f"File exists: {preds_path.exists()}")

if preds_path.exists():
    preds_df = pd.read_csv(preds_path)
    print(f"Loaded {len(preds_df)} predictions")
    print(f"Columns: {list(preds_df.columns)}")
else:
    print("File not found!")

Loading from: /home/code/experiments/002_roberta_span/fold_predictions.csv
File exists: True
Loaded 27481 predictions
Columns: ['fold', 'textID', 'text', 'sentiment', 'true_selected', 'pred_selected', 'jaccard']


In [4]:
# Define space trimming function
def trim_spaces(prediction):
    """
    Remove leading and trailing spaces from prediction.
    If result is empty or NaN, return original prediction.
    """
    if pd.isna(prediction):
        return prediction
    if not isinstance(prediction, str):
        prediction = str(prediction)
    trimmed = prediction.strip()
    return trimmed if trimmed else prediction

# Apply space trimming to predictions
print("Applying space trimming to predictions...")
preds_df['pred_selected_trimmed'] = preds_df['pred_selected'].apply(trim_spaces)

# Show examples of changes
changed = preds_df[preds_df['pred_selected'] != preds_df['pred_selected_trimmed']]
print(f"Number of predictions changed by trimming: {len(changed)}")

if len(changed) > 0:
    print("\nExamples of changes:")
    sample_changed = changed.head(3)
    for idx, row in sample_changed.iterrows():
        print(f"  Original: '{row['pred_selected']}'")
        print(f"  Trimmed:  '{row['pred_selected_trimmed']}'")
        print()

Applying space trimming to predictions...
Number of predictions changed by trimming: 1

Examples of changes:
  Original: 'nan'
  Trimmed:  'nan'



In [5]:
# Calculate Jaccard scores - first define the function since we had import issues
def jaccard_similarity(str1, str2):
    """Calculate Jaccard similarity between two strings."""
    # Convert to lowercase and split into words
    a = set(str1.lower().split())
    b = set(str2.lower().split())
    
    # Handle empty sets
    if len(a) == 0 and len(b) == 0:
        return 1.0
    
    # Calculate intersection and union
    intersection = a.intersection(b)
    union = a.union(b)
    
    # Return Jaccard similarity
    return len(intersection) / len(union)

# Calculate new Jaccard scores with trimmed predictions
print("Calculating Jaccard scores with trimmed predictions...")

jaccard_scores = []
for idx, row in preds_df.iterrows():
    try:
        jaccard = jaccard_similarity(row['true_selected'], row['pred_selected_trimmed'])
        jaccard_scores.append(jaccard)
    except Exception as e:
        print(f"Error at index {idx}: {e}")
        jaccard_scores.append(0.0)

preds_df['jaccard_trimmed'] = jaccard_scores

# Calculate overall score
overall_score = np.mean(jaccard_scores)
print(f"\nOverall Jaccard score with trimming: {overall_score:.4f}")

# Calculate by sentiment
print("\nScores by sentiment:")
sentiment_scores = preds_df.groupby('sentiment')['jaccard_trimmed'].agg(['mean', 'count'])
print(sentiment_scores)

# Calculate improvement
original_score = preds_df['jaccard'].mean()
improvement = overall_score - original_score
print(f"\nOriginal score: {original_score:.4f}")
print(f"Trimmed score:  {overall_score:.4f}")
print(f"Improvement:    {improvement:.4f} ({improvement*100:.2f}%)")

Calculating Jaccard scores with trimmed predictions...


Error at index 22051: 'float' object has no attribute 'lower'



Overall Jaccard score with trimming: 0.7036

Scores by sentiment:
               mean  count
sentiment                 
negative   0.520631   7781
neutral    0.971700  11118
positive   0.522043   8582

Original score: 0.7036
Trimmed score:  0.7036
Improvement:    -0.0000 (-0.00%)


In [6]:
# Investigate why there's no improvement
print("Investigating predictions...")

# Check for NaN values in predictions
nan_count = preds_df['pred_selected'].isna().sum()
nan_count_trimmed = preds_df['pred_selected_trimmed'].isna().sum()
print(f"NaN values in original predictions: {nan_count}")
print(f"NaN values in trimmed predictions: {nan_count_trimmed}")

# Check data types
print(f"\nData types:")
print(preds_df['pred_selected'].apply(type).value_counts())

# Look at the problematic index
print(f"\nProblematic index 22051:")
row_22051 = preds_df.loc[22051]
print(f"  true_selected: {repr(row_22051['true_selected'])}")
print(f"  pred_selected: {repr(row_22051['pred_selected'])}")
print(f"  pred_selected_trimmed: {repr(row_22051['pred_selected_trimmed'])}")

# Check if there are any actual differences
actual_changes = preds_df[
    (preds_df['pred_selected'] != preds_df['pred_selected_trimmed']) & 
    (preds_df['pred_selected'].notna()) & 
    (preds_df['pred_selected_trimmed'].notna())
]
print(f"\nActual meaningful changes: {len(actual_changes)}")

if len(actual_changes) > 0:
    print("\nExamples of actual changes:")
    for idx, row in actual_changes.head(3).iterrows():
        print(f"  Original: {repr(row['pred_selected'])}")
        print(f"  Trimmed:  {repr(row['pred_selected_trimmed'])}")
        print(f"  Original jaccard: {row['jaccard']:.4f}")
        print(f"  Trimmed jaccard:  {row['jaccard_trimmed']:.4f}")
        print()

Investigating predictions...
NaN values in original predictions: 1
NaN values in trimmed predictions: 1

Data types:
pred_selected
<class 'str'>      27480
<class 'float'>        1
Name: count, dtype: int64

Problematic index 22051:
  true_selected: nan
  pred_selected: nan
  pred_selected_trimmed: nan

Actual meaningful changes: 0
