## show graphs of how the IOU changes for every competitor for a given label. Download also silver truth and show that.

In [None]:
import pandas as pd

In [None]:
df = pd.read_csv('preprocessed_dataset.csv')
df['competitor_name'] = df['Mask_file'].apply(get_competitor_name)
def get_competitor_name(file_path: str):
    """returns the competitor folder name given a path to a file."""
    return file_path.split('/')[2]

In [None]:
df['competitor_name'] = df['Mask_file'].apply(get_competitor_name)
#df['dataset_name'] = df['Mask_file'].apply(get_dataset_name)

In [None]:
def get_dataset_name(file_path: str):
    """returns the competitor folder name given a path to a file."""
    return file_path.split('/')[1]

In [None]:
def get_label_comp_scores(df, source_file, label):
    df['Gt_source_file' == source_file]

In [None]:
df

In [None]:
source_file_filtered = df[df['Gt_source_file'] == 'inputs-2020-07/BF-C2DL-HSC/02/t0046.tif']
label_filtered = source_file_filtered[source_file_filtered['Label'] == 1]

In [None]:
source_file_filtered = df[df['Gt_source_file'] == 'inputs-2020-07/BF-C2DL-HSC/02/t0046.tif']
label_filtered = source_file_filtered[source_file_filtered['Label'] == 1]
comp_results = {}
for row in label_filtered.iterrows():
    comp_results[row[1]['competitor_name']] = row[1]['J_value']
    comp_results[row.competitor_name] = row['Label']
    

In [None]:
comp_results

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from collections import defaultdict
import seaborn as sns

def analyze_jaccard_scores(df):
    # Create a nested dictionary structure
    image_label_scores = defaultdict(lambda: defaultdict(dict))
    
    # Populate the dictionary
    for _, row in df.iterrows():
        image = row['Gt_source_file']
        label = row['Label']
        competitor = row['competitor_name']
        score = row['J_value']
        image_label_scores[image][label][competitor] = score

    # Calculate differences and store interesting cases
    interesting_cases = []
    competitor_stats = defaultdict(lambda: {'scores': [], 'by_label': defaultdict(list)})
    
    for image, labels in image_label_scores.items():
        for label, competitors in labels.items():
            scores = competitors.values()
            max_score = max(scores)
            min_score = min(scores)
            difference = max_score - min_score
            best_competitor = max(competitors.items(), key=lambda x: x[1])[0]
            worst_competitor = min(competitors.items(), key=lambda x: x[1])[0]
            
            # Store case information
            interesting_cases.append({
                'image': image,
                'label': label,
                'difference': difference,
                'max_score': max_score,
                'min_score': min_score,
                'best_competitor': best_competitor,
                'worst_competitor': worst_competitor,
                'all_scores': competitors
            })
            
            # Collect statistics for each competitor
            for competitor, score in competitors.items():
                competitor_stats[competitor]['scores'].append(score)
                competitor_stats[competitor]['by_label'][label].append(score)

    # Sort cases by difference
    interesting_cases.sort(key=lambda x: x['difference'], reverse=True)
    
    return interesting_cases, competitor_stats

def plot_analysis(interesting_cases, competitor_stats):
    # Set up the plotting style
    plt.style.use('seaborn-v0_8')
    
    # Create a figure with multiple subplots
    fig = plt.figure(figsize=(20, 15))
    
    # 1. Top differences plot
    ax1 = plt.subplot(2, 2, 1)
    top_n = 10
    top_cases = interesting_cases[:top_n]
    
    x = range(top_n)
    max_scores = [case['max_score'] for case in top_cases]
    min_scores = [case['min_score'] for case in top_cases]
    
    ax1.bar(x, [case['difference'] for case in top_cases], alpha=0.3)
    ax1.plot(x, max_scores, 'g^-', label='Best Score')
    ax1.plot(x, min_scores, 'rv-', label='Worst Score')
    
    ax1.set_title('Top 10 Largest Score Differences')
    ax1.set_xlabel('Case Index')
    ax1.set_ylabel('Jaccard Score')
    ax1.legend()
    
    # Rotate x-axis labels for better readability
    plt.xticks(x, [f"{case['image'].split('/')[-1]}\nLabel {case['label']}" 
                   for case in top_cases], rotation=45, ha='right')
    
    # 2. Competitor performance distribution
    ax2 = plt.subplot(2, 2, 2)
    competitor_means = []
    competitor_stds = []
    competitor_names = []
    
    for competitor, stats in competitor_stats.items():
        scores = stats['scores']
        competitor_names.append(competitor)
        competitor_means.append(np.mean(scores))
        competitor_stds.append(np.std(scores))
    
    # Create violin plot
    sns.violinplot(data=[stats['scores'] for stats in competitor_stats.values()], ax=ax2)
    ax2.set_xticklabels(competitor_names, rotation=45, ha='right')
    ax2.set_title('Distribution of Jaccard Scores by Competitor')
    ax2.set_ylabel('Jaccard Score')
    
    # 3. Score correlation heatmap
    ax3 = plt.subplot(2, 2, 3)
    correlation_data = []
    for case in interesting_cases[:20]:  # Use top 20 cases
        scores = list(case['all_scores'].values())
        correlation_data.append(scores)
    
    correlation_matrix = np.corrcoef(np.array(correlation_data).T)
    sns.heatmap(correlation_matrix, 
                xticklabels=competitor_names,
                yticklabels=competitor_names,
                ax=ax3,
                cmap='RdYlBu',
                center=0)
    ax3.set_title('Competitor Score Correlation')
    
    # 4. Performance stability plot
    ax4 = plt.subplot(2, 2, 4)
    for competitor, stats in competitor_stats.items():
        means = []
        stds = []
        labels = sorted(stats['by_label'].keys())
        for label in labels:
            label_scores = stats['by_label'][label]
            means.append(np.mean(label_scores))
            stds.append(np.std(label_scores))
        
        ax4.errorbar(means, stds, fmt='o', label=competitor, alpha=0.6)
    
    ax4.set_title('Performance Stability by Competitor')
    ax4.set_xlabel('Mean Jaccard Score')
    ax4.set_ylabel('Standard Deviation')
    ax4.legend(bbox_to_anchor=(1.05, 1), loc='upper left')
    
    plt.tight_layout()
    return fig

def print_interesting_findings(interesting_cases, competitor_stats):
    print("\nTop 5 Most Interesting Cases:")
    print("-" * 80)
    for i, case in enumerate(interesting_cases[:5], 1):
        print(f"\n{i}. Image: {case['image'].split('/')[-1]}")
        print(f"   Label: {case['label']}")
        print(f"   Score Difference: {case['difference']:.3f}")
        print(f"   Best: {case['best_competitor']} ({case['max_score']:.3f})")
        print(f"   Worst: {case['worst_competitor']} ({case['min_score']:.3f})")
    
    print("\nCompetitor Statistics:")
    print("-" * 80)
    for competitor, stats in competitor_stats.items():
        scores = stats['scores']
        print(f"\n{competitor}:")
        print(f"   Mean Score: {np.mean(scores):.3f}")
        print(f"   Std Dev: {np.std(scores):.3f}")
        print(f"   Min Score: {min(scores):.3f}")
        print(f"   Max Score: {max(scores):.3f}")

def analyze_and_visualize(df):
    interesting_cases, competitor_stats = analyze_jaccard_scores(df)
    fig = plot_analysis(interesting_cases, competitor_stats)
    print_interesting_findings(interesting_cases, competitor_stats)
    return fig, interesting_cases, competitor_stats

# Usage example:
# fig, cases, stats = analyze_and_visualize(df)
# plt.show()


In [None]:
fig, cases, stats = analyze_and_visualize(df)
plt.show()

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from collections import defaultdict

def find_contrasting_cases(df):
    # Group data by image
    image_data = defaultdict(lambda: defaultdict(dict))
    
    # First, organize data by image -> label -> competitor -> score
    for _, row in df.iterrows():
        image = row['Gt_source_file']
        label = row['Label']
        competitor = row['competitor_name']
        score = row['J_value']
        image_data[image][label][competitor] = score
    
    interesting_cases = []
    
    # Analyze each image
    for image, label_data in image_data.items():
        # Only consider images with multiple labels
        if len(label_data) < 2:
            continue
            
        # Calculate mean score per competitor per label
        competitor_label_scores = defaultdict(dict)
        for label, comp_scores in label_data.items():
            for competitor, score in comp_scores.items():
                competitor_label_scores[competitor][label] = score
        
        # Look for contrasting performance
        for comp1 in competitor_label_scores:
            for comp2 in competitor_label_scores:
                if comp1 >= comp2:
                    continue
                    
                performance_diff = []
                for label in label_data:
                    # Score difference between competitors for this label
                    diff = competitor_label_scores[comp1][label] - competitor_label_scores[comp2][label]
                    performance_diff.append((label, diff))
                
                # Check if there are contrasting performances (positive and negative differences)
                pos_diffs = [d for _, d in performance_diff if d > 0]
                neg_diffs = [d for _, d in performance_diff if d < 0]
                
                if pos_diffs and neg_diffs:  # If we have both positive and negative differences
                    max_contrast = max(pos_diffs) + abs(min(neg_diffs))  # Total contrast magnitude
                    
                    interesting_cases.append({
                        'image': image,
                        'competitor1': comp1,
                        'competitor2': comp2,
                        'contrast_magnitude': max_contrast,
                        'label_differences': performance_diff,
                        'scores': {
                            comp1: competitor_label_scores[comp1],
                            comp2: competitor_label_scores[comp2]
                        }
                    })
    
    # Sort cases by contrast magnitude
    interesting_cases.sort(key=lambda x: x['contrast_magnitude'], reverse=True)
    return interesting_cases

def visualize_contrasting_case(case, top_n=5):
    """Visualize a contrasting performance case."""
    comp1 = case['competitor1']
    comp2 = case['competitor2']
    
    # Prepare data for plotting
    labels = sorted(case['scores'][comp1].keys())
    scores1 = [case['scores'][comp1][label] for label in labels]
    scores2 = [case['scores'][comp2][label] for label in labels]
    
    # Create the plot
    plt.figure(figsize=(12, 6))
    x = np.arange(len(labels))
    width = 0.35
    
    plt.bar(x - width/2, scores1, width, label=comp1, alpha=0.7)
    plt.bar(x + width/2, scores2, width, label=comp2, alpha=0.7)
    
    plt.xlabel('Label')
    plt.ylabel('Jaccard Score')
    plt.title(f'Contrasting Performance in Image:\n{case["image"].split("/")[-1]}')
    plt.xticks(x, labels)
    plt.legend()
    
    # Add value annotations
    for i, v in enumerate(scores1):
        plt.text(i - width/2, v, f'{v:.3f}', ha='center', va='bottom')
    for i, v in enumerate(scores2):
        plt.text(i + width/2, v, f'{v:.3f}', ha='center', va='bottom')
    
    plt.grid(True, alpha=0.3)
    return plt.gcf()

def analyze_contrasting_performance(df):
    cases = find_contrasting_cases(df)
    
    if not cases:
        print("No contrasting cases found!")
        return
    
    print("\nTop Contrasting Performance Cases:")
    print("-" * 80)
    
    for i, case in enumerate(cases[:5], 1):
        print(f"\nCase {i}:")
        print(f"Image: {case['image'].split('/')[-1]}")
        print(f"Competitors: {case['competitor1']} vs {case['competitor2']}")
        print(f"Contrast Magnitude: {case['contrast_magnitude']:.3f}")
        print("\nLabel-wise differences:")
        for label, diff in case['label_differences']:
            print(f"  Label {label}: {diff:.3f}")
            print(f"    {case['competitor1']}: {case['scores'][case['competitor1']][label]:.3f}")
            print(f"    {case['competitor2']}: {case['scores'][case['competitor2']][label]:.3f}")
        
        # Visualize the top cases
        if i <= 3:  # Show plots for top 3 cases
            fig = visualize_contrasting_case(case)
            plt.tight_layout()
            
    return cases

# Usage:
# cases = analyze_contrasting_performance(df)
# plt.show()


In [None]:
cases = analyze_contrasting_performance(df)
plt.show()

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from matplotlib.colors import to_rgba

def visualize_contrasting_case(case, df, max_labels=8):
    """Visualize a contrasting performance case with improved clarity."""
    comp1 = case['competitor1']
    comp2 = case['competitor2']
    
    # Get all labels and scores for this image
    image = case['image']
    image_data = df[df['Gt_source_file'] == image]
    
    # Get all unique competitors for this image
    all_competitors = image_data['competitor_name'].unique()
    
    # Get all labels
    all_labels = sorted(image_data['Label'].unique())
    
    # If we have more than max_labels, select the most interesting ones
    if len(all_labels) > max_labels:
        # Calculate label interestingness based on score differences
        label_interests = []
        for label in all_labels:
            label_data = image_data[image_data['Label'] == label]
            scores = {row['competitor_name']: row['J_value'] for _, row in label_data.iterrows()}
            diff = abs(scores.get(comp1, 0) - scores.get(comp2, 0))
            label_interests.append((label, diff))
        
        # Sort by interestingness and take top max_labels
        label_interests.sort(key=lambda x: x[1], reverse=True)
        selected_labels = [label for label, _ in label_interests[:max_labels]]
        selected_labels.sort()  # Sort numerically for better visualization
    else:
        selected_labels = all_labels

    # Create color scheme
    highlighted_color1 = '#FF6B6B'  # Red
    highlighted_color2 = '#4ECDC4'  # Turquoise
    muted_colors = plt.cm.Greys(np.linspace(0.3, 0.7, len(all_competitors)))

    plt.figure(figsize=(12, 7))
    
    # First plot all competitors in muted colors
    for competitor in all_competitors:
        if competitor not in [comp1, comp2]:
            comp_data = image_data[image_data['competitor_name'] == competitor]
            scores = [comp_data[comp_data['Label'] == label]['J_value'].iloc[0] 
                     if not comp_data[comp_data['Label'] == label].empty else np.nan 
                     for label in selected_labels]
            plt.plot(selected_labels, scores, '-o', color='gray', alpha=0.2, 
                    linewidth=1, markersize=4)

    # Then plot the two competitors of interest with bright colors
    for competitor, color in [(comp1, highlighted_color1), (comp2, highlighted_color2)]:
        comp_data = image_data[image_data['competitor_name'] == competitor]
        scores = [comp_data[comp_data['Label'] == label]['J_value'].iloc[0] 
                 if not comp_data[comp_data['Label'] == label].empty else np.nan 
                 for label in selected_labels]
        plt.plot(selected_labels, scores, '-o', color=color, alpha=1, 
                label=competitor, linewidth=2.5, markersize=8)

    plt.xlabel('Label')
    plt.ylabel('Jaccard Score')
    plt.title(f'Contrasting Performance in Image:\n{image.split("/")[-1]}')
    
    # Customize the grid
    plt.grid(True, alpha=0.2)
    
    # Customize the legend
    plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left')
    
    # Set y-axis limits with some padding
    plt.ylim(max(0, image_data['J_value'].min() - 0.1), 
             min(1, image_data['J_value'].max() + 0.1))
    
    # Rotate x-axis labels if needed
    plt.xticks(selected_labels, rotation=45)
    
    # Add a light background grid
    plt.grid(True, linestyle='--', alpha=0.3)
    
    # Adjust layout to prevent label cutoff
    plt.tight_layout()
    
    return plt.gcf()

def analyze_contrasting_performance(df):
    cases = find_contrasting_cases(df)  # Using the same find_contrasting_cases function as before
    
    if not cases:
        print("No contrasting cases found!")
        return
    
    print("\nTop Contrasting Performance Cases:")
    print("-" * 80)
    
    for i, case in enumerate(cases[:3], 1):  # Show only top 3 cases
        print(f"\nCase {i}:")
        print(f"Image: {case['image'].split('/')[-1]}")
        print(f"Competitors: {case['competitor1']} vs {case['competitor2']}")
        print(f"Contrast Magnitude: {case['contrast_magnitude']:.3f}")
        print("\nLabel-wise differences:")
        for label, diff in case['label_differences']:
            print(f"  Label {label}: {diff:.3f}")
            print(f"    {case['competitor1']}: {case['scores'][case['competitor1']][label]:.3f}")
            print(f"    {case['competitor2']}: {case['scores'][case['competitor2']][label]:.3f}")
        
        # Visualize with improved plot
        fig = visualize_contrasting_case(case, df)
        plt.show()
            
    return cases

# Usage:
# cases = analyze_contrasting_performance(df)


In [None]:
cases = analyze_contrasting_performance(df)

In [None]:
import numpy as np
from scipy import stats
import matplotlib.pyplot as plt

# Data
data = {
    'MU-Lux-CZ': 0.861789,
    'CALT-US': 0.92033,
    'DREX-US': 0.784461,
    'KTH-SE': 0.785714,
    'KIT-Sch-GE': 0.856749
}

values = np.array(list(data.values()))
names = list(data.keys())

# Basic statistics
mean = np.mean(values)
median = np.median(values)
std = np.std(values)
q1 = np.percentile(values, 25)
q3 = np.percentile(values, 75)
iqr = q3 - q1

# Print basic statistics
print("Basic Statistics:")
print(f"Mean: {mean:.6f}")
print(f"Median: {median:.6f}")
print(f"Standard Deviation: {std:.6f}")
print(f"Q1: {q1:.6f}")
print(f"Q3: {q3:.6f}")
print(f"IQR: {iqr:.6f}")
print("\n")

# Method 1: Z-score method (identifies values more than 2 standard deviations from mean)
z_scores = stats.zscore(values)
z_score_outliers = [(name, value) for name, value, z in zip(names, values, z_scores) if abs(z) > 2]
print("Outliers using Z-score method (>2 standard deviations):")
if z_score_outliers:
    for name, value in z_score_outliers:
        print(f"{name}: {value:.6f}")
else:
    print("No outliers found")
print("\n")

# Method 2: IQR method
iqr_lower_bound = q1 - 1.5 * iqr
iqr_upper_bound = q3 + 1.5 * iqr
iqr_outliers = [(name, value) for name, value in data.items() 
                if value < iqr_lower_bound or value > iqr_upper_bound]
print("Outliers using IQR method (1.5 × IQR):")
if iqr_outliers:
    for name, value in iqr_outliers:
        print(f"{name}: {value:.6f}")
else:
    print("No outliers found")
print("\n")

# Method 3: Modified Z-score method (more robust to outliers)
mad = stats.median_abs_deviation(values)
modified_z_scores = 0.6745 * (values - median) / mad
modified_z_outliers = [(name, value) for name, value, z in zip(names, values, modified_z_scores) 
                      if abs(z) > 3.5]
print("Outliers using Modified Z-score method (>3.5):")
if modified_z_outliers:
    for name, value in modified_z_outliers:
        print(f"{name}: {value:.6f}")
else:
    print("No outliers found")

# Visualization
plt.figure(figsize=(12, 6))

# Box plot
plt.subplot(121)
plt.boxplot(values)
plt.title('Box Plot')
plt.xticks([1], ['Values'])

# Scatter plot with reference lines
plt.subplot(122)
plt.scatter(range(len(values)), values)
plt.axhline(y=mean, color='r', linestyle='--', label='Mean')
plt.axhline(y=mean + 2*std, color='g', linestyle='--', label='+2 Std Dev')
plt.axhline(y=mean - 2*std, color='g', linestyle='--', label='-2 Std Dev')
plt.axhline(y=q3 + 1.5*iqr, color='orange', linestyle='--', label='IQR Upper Bound')
plt.axhline(y=q1 - 1.5*iqr, color='orange', linestyle='--', label='IQR Lower Bound')
plt.xticks(range(len(values)), names, rotation=45)
plt.title('Values with Reference Lines')
plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left')

plt.tight_layout()
plt.show()


In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from matplotlib.colors import to_rgba

def visualize_contrasting_case(case, df, max_labels=8):
    """Visualize a contrasting performance case, ignoring zero values."""
    comp1 = case['competitor1']
    comp2 = case['competitor2']
    
    # Get all labels and scores for this image
    image = case['image']
    image_data = df[df['Gt_source_file'] == image]
    
    # Filter out zero values
    image_data = image_data[image_data['J_value'] > 0.001]  # Using small threshold instead of exact 0
    
    # Get all unique competitors for this image
    all_competitors = image_data['competitor_name'].unique()
    
    # Get all labels that have non-zero values for at least one of our main competitors
    main_comp_data = image_data[image_data['competitor_name'].isin([comp1, comp2])]
    all_labels = sorted(main_comp_data['Label'].unique())
    
    # If we have more than max_labels, select the most interesting ones
    if len(all_labels) > max_labels:
        # Calculate label interestingness based on score differences
        label_interests = []
        for label in all_labels:
            label_data = image_data[image_data['Label'] == label]
            scores = {row['competitor_name']: row['J_value'] for _, row in label_data.iterrows()}
            if comp1 in scores and comp2 in scores:  # Only consider labels where both competitors have scores
                diff = abs(scores.get(comp1, 0) - scores.get(comp2, 0))
                label_interests.append((label, diff))
        
        # Sort by interestingness and take top max_labels
        label_interests.sort(key=lambda x: x[1], reverse=True)
        selected_labels = [label for label, _ in label_interests[:max_labels]]
        selected_labels.sort()  # Sort numerically for better visualization
    else:
        selected_labels = all_labels

    # Create color scheme
    highlighted_color1 = '#FF6B6B'  # Red
    highlighted_color2 = '#4ECDC4'  # Turquoise

    plt.figure(figsize=(12, 7))
    
    # First plot all competitors in muted colors
    for competitor in all_competitors:
        if competitor not in [comp1, comp2]:
            comp_data = image_data[image_data['competitor_name'] == competitor]
            scores = []
            x_values = []
            
            for label in selected_labels:
                label_score = comp_data[comp_data['Label'] == label]['J_value']
                if not label_score.empty and label_score.iloc[0] > 0.001:
                    scores.append(label_score.iloc[0])
                    x_values.append(label)
            
            if scores:  # Only plot if we have valid scores
                plt.plot(x_values, scores, '-o', color='gray', alpha=0.2, 
                        linewidth=1, markersize=4)

    # Then plot the two competitors of interest with bright colors
    for competitor, color in [(comp1, highlighted_color1), (comp2, highlighted_color2)]:
        comp_data = image_data[image_data['competitor_name'] == competitor]
        scores = []
        x_values = []
        
        for label in selected_labels:
            label_score = comp_data[comp_data['Label'] == label]['J_value']
            if not label_score.empty and label_score.iloc[0] > 0.001:
                scores.append(label_score.iloc[0])
                x_values.append(label)
        
        if scores:  # Only plot if we have valid scores
            plt.plot(x_values, scores, '-o', color=color, alpha=1, 
                    label=f"{competitor}", linewidth=2.5, markersize=8)

    plt.xlabel('Label')
    plt.ylabel('Jaccard Score')
    plt.title(f'Contrasting Performance in Image:\n{image.split("/")[-1]}')
    
    # Customize the grid
    plt.grid(True, alpha=0.2)
    
    # Customize the legend
    plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left')
    
    # Set y-axis limits with some padding, using only non-zero values
    valid_scores = image_data['J_value'][image_data['J_value'] > 0.001]
    if not valid_scores.empty:
        plt.ylim(max(0, valid_scores.min() - 0.1), 
                 min(1, valid_scores.max() + 0.1))
    
    # Rotate x-axis labels if needed
    plt.xticks(selected_labels, rotation=45)
    
    # Add a light background grid
    plt.grid(True, linestyle='--', alpha=0.3)
    
    # Adjust layout to prevent label cutoff
    plt.tight_layout()
    
    return plt.gcf()

def find_contrasting_cases(df):
    """Find cases with contrasting performance, ignoring zero values."""
    # Group data by image
    image_data = defaultdict(lambda: defaultdict(dict))
    
    # Filter out zero values
    df_filtered = df[df['J_value'] > 0.001]
    
    # First, organize data by image -> label -> competitor -> score
    for _, row in df_filtered.iterrows():
        image = row['Gt_source_file']
        label = row['Label']
        competitor = row['competitor_name']
        score = row['J_value']
        image_data[image][label][competitor] = score
    
    interesting_cases = []
    
    # Analyze each image
    for image, label_data in image_data.items():
        # Only consider images with multiple labels
        if len(label_data) < 2:
            continue
            
        # Calculate mean score per competitor per label
        competitor_label_scores = defaultdict(dict)
        for label, comp_scores in label_data.items():
            for competitor, score in comp_scores.items():
                competitor_label_scores[competitor][label] = score
        
        # Look for contrasting performance
        for comp1 in competitor_label_scores:
            for comp2 in competitor_label_scores:
                if comp1 >= comp2:
                    continue
                    
                performance_diff = []
                for label in label_data:
                    # Only consider labels where both competitors have scores
                    if label in competitor_label_scores[comp1] and label in competitor_label_scores[comp2]:
                        diff = competitor_label_scores[comp1][label] - competitor_label_scores[comp2][label]
                        performance_diff.append((label, diff))
                
                # Check if there are contrasting performances (positive and negative differences)
                pos_diffs = [d for _, d in performance_diff if d > 0]
                neg_diffs = [d for _, d in performance_diff if d < 0]
                
                if pos_diffs and neg_diffs:  # If we have both positive and negative differences
                    max_contrast = max(pos_diffs) + abs(min(neg_diffs))  # Total contrast magnitude
                    
                    interesting_cases.append({
                        'image': image,
                        'competitor1': comp1,
                        'competitor2': comp2,
                        'contrast_magnitude': max_contrast,
                        'label_differences': performance_diff,
                        'scores': {
                            comp1: competitor_label_scores[comp1],
                            comp2: competitor_label_scores[comp2]
                        }
                    })
    
    # Sort cases by contrast magnitude
    interesting_cases.sort(key=lambda x: x['contrast_magnitude'], reverse=True)
    return interesting_cases

def analyze_contrasting_performance(df):
    cases = find_contrasting_cases(df)
    
    if not cases:
        print("No contrasting cases found!")
        return
    
    print("\nTop Contrasting Performance Cases:")
    print("-" * 80)
    
    for i, case in enumerate(cases[:3], 1):
        print(f"\nCase {i}:")
        print(f"Image: {case['image'].split('/')[-1]}")
        print(f"Competitors: {case['competitor1']} vs {case['competitor2']}")
        print(f"Contrast Magnitude: {case['contrast_magnitude']:.3f}")
        print("\nLabel-wise differences:")
        for label, diff in case['label_differences']:
            print(f"  Label {label}: {diff:.3f}")
            print(f"    {case['competitor1']}: {case['scores'][case['competitor1']][label]:.3f}")
            print(f"    {case['competitor2']}: {case['scores'][case['competitor2']][label]:.3f}")
        
        # Visualize with improved plot
        fig = visualize_contrasting_case(case, df)
        plt.show()
            
    return cases

# Usage:
# cases = analyze_contrasting_performance(df)


In [None]:
cases = analyze_contrasting_performance(df)

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from matplotlib.colors import to_rgba

def visualize_contrasting_case(case, df, show_all=False):
    """
    Visualize a contrasting performance case with sequential x-axis.
    If show_all is True, shows all competitors in equal emphasis.
    """
    comp1 = case['competitor1']
    comp2 = case['competitor2']
    
    # Get all labels and scores for this image
    image = case['image']
    image_data = df[df['Gt_source_file'] == image]
    
    # Filter out zero values
    image_data = image_data[image_data['J_value'] > 0.001]
    
    # Get all unique competitors for this image
    all_competitors = sorted(image_data['competitor_name'].unique())
    
    # Get all labels that have non-zero values for at least one of our main competitors
    main_comp_data = image_data[image_data['competitor_name'].isin([comp1, comp2])]
    all_labels = sorted(main_comp_data['Label'].unique())
    
    # Create mapping from original labels to sequential numbers
    label_to_seq = {label: idx + 1 for idx, label in enumerate(all_labels)}
    seq_to_label = {idx + 1: label for idx, label in enumerate(all_labels)}
    
    # Create color scheme
    if show_all:
        # Use a color palette with enough distinct colors
        colors = plt.cm.tab20(np.linspace(0, 1, len(all_competitors)))
    else:
        highlighted_color1 = '#FF6B6B'  # Red
        highlighted_color2 = '#4ECDC4'  # Turquoise

    plt.figure(figsize=(12, 7))
    
    if show_all:
        # Plot all competitors with equal emphasis
        for idx, competitor in enumerate(all_competitors):
            comp_data = image_data[image_data['competitor_name'] == competitor]
            scores = []
            x_values = []
            
            for label in all_labels:
                label_score = comp_data[comp_data['Label'] == label]['J_value']
                if not label_score.empty and label_score.iloc[0] > 0.001:
                    scores.append(label_score.iloc[0])
                    x_values.append(label_to_seq[label])
            
            if scores:
                plt.plot(x_values, scores, '-o', color=colors[idx], 
                        label=competitor, linewidth=2, markersize=6)
    else:
        # First plot all competitors in muted colors
        for competitor in all_competitors:
            if competitor not in [comp1, comp2]:
                comp_data = image_data[image_data['competitor_name'] == competitor]
                scores = []
                x_values = []
                
                for label in all_labels:
                    label_score = comp_data[comp_data['Label'] == label]['J_value']
                    if not label_score.empty and label_score.iloc[0] > 0.001:
                        scores.append(label_score.iloc[0])
                        x_values.append(label_to_seq[label])
                
                if scores:
                    plt.plot(x_values, scores, '-o', color='gray', alpha=0.2, 
                            linewidth=1, markersize=4)

        # Then plot the two competitors of interest with bright colors
        for competitor, color in [(comp1, highlighted_color1), (comp2, highlighted_color2)]:
            comp_data = image_data[image_data['competitor_name'] == competitor]
            scores = []
            x_values = []
            
            for label in all_labels:
                label_score = comp_data[comp_data['Label'] == label]['J_value']
                if not label_score.empty and label_score.iloc[0] > 0.001:
                    scores.append(label_score.iloc[0])
                    x_values.append(label_to_seq[label])
            
            if scores:
                plt.plot(x_values, scores, '-o', color=color, alpha=1, 
                        label=competitor, linewidth=2.5, markersize=8)

    plt.xlabel('Sequential Label Number (Original Label)')
    plt.ylabel('Jaccard Score')
    
    if show_all:
        plt.title(f'All Competitors Performance in Image:\n{image.split("/")[-1]}')
    else:
        plt.title(f'Contrasting Performance in Image:\n{image.split("/")[-1]}')
    
    # Customize the grid
    plt.grid(True, alpha=0.2)
    
    # Customize the legend
    if show_all:
        plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left', ncol=1)
    else:
        plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left')
    
    # Set y-axis limits with some padding
    valid_scores = image_data['J_value'][image_data['J_value'] > 0.001]
    if not valid_scores.empty:
        plt.ylim(max(0, valid_scores.min() - 0.1), 
                 min(1, valid_scores.max() + 0.1))
    
    # Set x-axis ticks with both sequential numbers and original labels
    x_ticks = list(label_to_seq.values())
    plt.xticks(x_ticks, [f'{seq} ({seq_to_label[seq]})' for seq in x_ticks], rotation=45)
    
    # Set x-axis limits with some padding
    plt.xlim(min(x_ticks) - 0.5, max(x_ticks) + 0.5)
    
    # Add a light background grid
    plt.grid(True, linestyle='--', alpha=0.3)
    
    # Adjust layout to prevent label cutoff
    plt.tight_layout()
    
    return plt.gcf()

def analyze_contrasting_performance(df):
    cases = find_contrasting_cases(df)  # Using the same find_contrasting_cases function as before
    
    if not cases:
        print("No contrasting cases found!")
        return
    
    print("\nTop Contrasting Performance Cases:")
    print("-" * 80)
    
    for i, case in enumerate(cases[:3], 1):
        print(f"\nCase {i}:")
        print(f"Image: {case['image'].split('/')[-1]}")
        print(f"Competitors: {case['competitor1']} vs {case['competitor2']}")
        print(f"Contrast Magnitude: {case['contrast_magnitude']:.3f}")
        print("\nLabel-wise differences:")
        for label, diff in case['label_differences']:
            print(f"  Label {label}: {diff:.3f}")
            print(f"    {case['competitor1']}: {case['scores'][case['competitor1']][label]:.3f}")
            print(f"    {case['competitor2']}: {case['scores'][case['competitor2']][label]:.3f}")
        
        # Visualize with contrasting competitors highlighted
        fig1 = visualize_contrasting_case(case, df, show_all=False)
        plt.show()
        
        # Visualize with all competitors equally
        fig2 = visualize_contrasting_case(case, df, show_all=True)
        plt.show()
            
    return cases

# Usage:
# cases = analyze_contrasting_performance(df)


In [None]:
cases = analyze_contrasting_performance(df)

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from matplotlib.colors import to_rgba

def visualize_contrasting_case(case, df, max_labels=10, show_all=False):
    """
    Visualize a contrasting performance case with limited labels.
    If show_all is True, shows all competitors in equal emphasis.
    """
    comp1 = case['competitor1']
    comp2 = case['competitor2']
    
    # Get all labels and scores for this image
    image = case['image']
    image_data = df[df['Gt_source_file'] == image]
    
    # Filter out zero values
    image_data = image_data[image_data['J_value'] > 0.001]
    
    # Get all unique competitors for this image
    all_competitors = sorted(image_data['competitor_name'].unique())
    
    # Get all labels that have non-zero values for at least one of our main competitors
    main_comp_data = image_data[image_data['competitor_name'].isin([comp1, comp2])]
    all_labels = sorted(main_comp_data['Label'].unique())
    
    # Select most interesting labels if we have more than max_labels
    if len(all_labels) > max_labels:
        # Calculate label interestingness based on score differences
        label_interests = []
        for label in all_labels:
            label_data = image_data[image_data['Label'] == label]
            scores_dict = {row['competitor_name']: row['J_value'] 
                          for _, row in label_data.iterrows()}
            
            # Calculate variance of scores for this label
            scores = list(scores_dict.values())
            variance = np.var(scores) if scores else 0
            
            # Calculate difference between main competitors if both present
            main_diff = 0
            if comp1 in scores_dict and comp2 in scores_dict:
                main_diff = abs(scores_dict[comp1] - scores_dict[comp2])
            
            # Combine variance and main difference for interestingness
            interestingness = variance + main_diff
            label_interests.append((label, interestingness))
        
        # Sort by interestingness and take top max_labels
        label_interests.sort(key=lambda x: x[1], reverse=True)
        selected_labels = sorted([label for label, _ in label_interests[:max_labels]])
    else:
        selected_labels = all_labels
    
    # Create mapping from original labels to sequential numbers
    label_to_seq = {label: idx + 1 for idx, label in enumerate(selected_labels)}
    seq_to_label = {idx + 1: label for idx, label in enumerate(selected_labels)}
    
    # Create color scheme
    if show_all:
        colors = plt.cm.tab20(np.linspace(0, 1, len(all_competitors)))
    else:
        highlighted_color1 = '#FF6B6B'  # Red
        highlighted_color2 = '#4ECDC4'  # Turquoise

    plt.figure(figsize=(12, 7))
    
    if show_all:
        # Plot all competitors with equal emphasis
        for idx, competitor in enumerate(all_competitors):
            comp_data = image_data[image_data['competitor_name'] == competitor]
            scores = []
            x_values = []
            
            for label in selected_labels:
                label_score = comp_data[comp_data['Label'] == label]['J_value']
                if not label_score.empty and label_score.iloc[0] > 0.001:
                    scores.append(label_score.iloc[0])
                    x_values.append(label_to_seq[label])
            
            if scores:
                plt.plot(x_values, scores, '-o', color=colors[idx], 
                        label=competitor, linewidth=2, markersize=6)
    else:
        # First plot all competitors in muted colors
        for competitor in all_competitors:
            if competitor not in [comp1, comp2]:
                comp_data = image_data[image_data['competitor_name'] == competitor]
                scores = []
                x_values = []
                
                for label in selected_labels:
                    label_score = comp_data[comp_data['Label'] == label]['J_value']
                    if not label_score.empty and label_score.iloc[0] > 0.001:
                        scores.append(label_score.iloc[0])
                        x_values.append(label_to_seq[label])
                
                if scores:
                    plt.plot(x_values, scores, '-o', color='gray', alpha=0.2, 
                            linewidth=1, markersize=4)

        # Then plot the two competitors of interest with bright colors
        for competitor, color in [(comp1, highlighted_color1), (comp2, highlighted_color2)]:
            comp_data = image_data[image_data['competitor_name'] == competitor]
            scores = []
            x_values = []
            
            for label in selected_labels:
                label_score = comp_data[comp_data['Label'] == label]['J_value']
                if not label_score.empty and label_score.iloc[0] > 0.001:
                    scores.append(label_score.iloc[0])
                    x_values.append(label_to_seq[label])
            
            if scores:
                plt.plot(x_values, scores, '-o', color=color, alpha=1, 
                        label=competitor, linewidth=2.5, markersize=8)

    plt.xlabel('Sequential Label Number (Original Label)')
    plt.ylabel('Jaccard Score')
    
    if show_all:
        plt.title(f'All Competitors Performance in Image:\n{image.split("/")[-1]}')
    else:
        plt.title(f'Contrasting Performance in Image:\n{image.split("/")[-1]}')
    
    # Customize the grid
    plt.grid(True, alpha=0.2)
    
    # Customize the legend
    if show_all:
        plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left', ncol=1)
    else:
        plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left')
    
    # Set y-axis limits with some padding
    valid_scores = image_data['J_value'][image_data['J_value'] > 0.001]
    if not valid_scores.empty:
        plt.ylim(max(0, valid_scores.min() - 0.1), 
                 min(1, valid_scores.max() + 0.1))
    
    # Set x-axis ticks with both sequential numbers and original labels
    x_ticks = list(label_to_seq.values())
    plt.xticks(x_ticks, [f'{seq} ({seq_to_label[seq]})' for seq in x_ticks], rotation=45)
    
    # Set x-axis limits with some padding
    plt.xlim(min(x_ticks) - 0.5, max(x_ticks) + 0.5)
    
    # Add a light background grid
    plt.grid(True, linestyle='--', alpha=0.3)
    
    # Adjust layout to prevent label cutoff
    plt.tight_layout()
    
    return plt.gcf()

def analyze_contrasting_performance(df):
    cases = find_contrasting_cases(df)  # Using the same find_contrasting_cases function as before
    
    if not cases:
        print("No contrasting cases found!")
        return
    
    print("\nTop Contrasting Performance Cases:")
    print("-" * 80)
    
    for i, case in enumerate(cases[:3], 1):
        print(f"\nCase {i}:")
        print(f"Image: {case['image'].split('/')[-1]}")
        print(f"Competitors: {case['competitor1']} vs {case['competitor2']}")
        print(f"Contrast Magnitude: {case['contrast_magnitude']:.3f}")
        print("\nLabel-wise differences:")
        for label, diff in case['label_differences']:
            print(f"  Label {label}: {diff:.3f}")
            print(f"    {case['competitor1']}: {case['scores'][case['competitor1']][label]:.3f}")
            print(f"    {case['competitor2']}: {case['scores'][case['competitor2']][label]:.3f}")
        
        # Visualize with contrasting competitors highlighted
        fig1 = visualize_contrasting_case(case, df, show_all=False)
        plt.show()
        
        # Visualize with all competitors equally
        fig2 = visualize_contrasting_case(case, df, show_all=True)
        plt.show()
            
    return cases

# Usage:
# cases = analyze_contrasting_performance(df)


In [None]:
cases = analyze_contrasting_performance(df)