In [4]:
import pandas as pd
import re
import numpy as np

# Path to the CSV file
csv_path = 'data/OOD_dataset.csv'

# Load data
df = pd.read_csv(csv_path)

def compute_metrics(transcript):
    """Compute structural metrics for a single conversation transcript."""
    lines = [line.strip() for line in transcript.strip().split('\n') if line.strip()]
    # total words in transcript (including speaker tags)
    total_words = len(re.findall(r'\b\w+[\'’\w]*\b', transcript))
    # split turns by speaker
    d_turns = [l for l in lines if l.startswith('D:') or l.startswith('Dr:')]
    p_turns = [l for l in lines if l.startswith('P:') or l.startswith('Pt:')]
    d_count = len(d_turns)
    p_count = len(p_turns)
    total_turns = d_count + p_count
    # word counts per turn excluding the speaker tag
    d_words = sum(len(re.findall(r'\b\w+[\'’\w]*\b', t)) - 1 for t in d_turns)
    p_words = sum(len(re.findall(r'\b\w+[\'’\w]*\b', t)) - 1 for t in p_turns)
    return pd.Series({
        'word_count': total_words,
        'line_count': len(lines),
        'total_turns': total_turns,
        'd_turn_count': d_count,
        'p_turn_count': p_count,
        'avg_turn_len': (d_words + p_words) / total_turns if total_turns else np.nan,
        'avg_d_len': d_words / d_count if d_count else np.nan,
        'avg_p_len': p_words / p_count if p_count else np.nan,
    })

# Compute per‑conversation metrics
metrics = df['conversation_transcript'].apply(compute_metrics)
full_df = pd.concat([df, metrics], axis=1)

# Aggregate means by label
mean_stats = full_df.groupby('label')[[
    'word_count', 'line_count', 'total_turns',
    'd_turn_count', 'p_turn_count',
    'avg_turn_len', 'avg_d_len', 'avg_p_len'
]].mean()

# Build the summary table with only mean values
table_rows = []
for metric, label_name in [
    ('word_count', 'Words per conversation'),
    ('line_count', 'Lines per conversation'),
    ('total_turns', 'Total turns'),
    ('d_turn_count', 'Doctor turns'),
    ('p_turn_count', 'Patient turns'),
    ('avg_turn_len', 'Avg turn length (words)'),
    ('avg_d_len', 'Avg doctor turn length'),
    ('avg_p_len', 'Avg patient turn length'),
]:
    table_rows.append({
        'Metric': label_name,
        'Human-AI (Mean)': round(mean_stats.loc[0, metric], 2),
        'Human-Human (Mean)': round(mean_stats.loc[1, metric], 2),
    })

# Add a row showing the number of conversations for each label
table_rows.append({
    'Metric': 'Number of conversations',
    'Human-AI (Mean)': full_df[full_df['label'] == 0].shape[0],
    'Human-Human (Mean)': full_df[full_df['label'] == 1].shape[0],
})

summary_df = pd.DataFrame(table_rows)
summary_df

Unnamed: 0,Metric,Human-AI (Mean),Human-Human (Mean)
0,Words per conversation,332.9,508.12
1,Lines per conversation,11.36,21.0
2,Total turns,11.36,20.92
3,Doctor turns,5.51,10.6
4,Patient turns,5.85,10.32
5,Avg turn length (words),29.44,26.36
6,Avg doctor turn length,33.81,32.09
7,Avg patient turn length,25.32,20.26
8,Number of conversations,73.0,73.0


In [None]:
# 100, 100
                            # Human | AI
# Numer of conversation:        100 | 100
# Average number of turns:       -- | --
# Average turn length :          -- | --
# Average lengh in words:        -- | --
# Number of unique Investigator: 51 | 26
# Number of unique Witness:      13 | N/A