In [11]:
from sklearn.metrics import precision_score, recall_score, f1_score

# Example labels
predicted_labels = [True, False, True, True, False, False]
true_labels = [True, True, True, False, False, False]

# Calculate metrics
precision = precision_score(true_labels, predicted_labels)
recall = recall_score(true_labels, predicted_labels)
f1 = f1_score(true_labels, predicted_labels)

# Display the results
print(f"Precision: {precision:.2f}")
print(f"Recall: {recall:.2f}")
print(f"F1-Score: {f1:.2f}")


Precision: 0.67
Recall: 0.67
F1-Score: 0.67


In [39]:

data = pd.read_csv('accuracy/claude-3-opus/deu.csv')

# Function to clean double spaces and leave dots intact
df_cleaned = data.applymap(lambda x: ' '.join(str(x).split()) if isinstance(x, str) else x)

# Display the cleaned DataFrame
df_cleaned['deu_f']


0            Ich bin Afghanin.
1         Ich bin Angolanerin.
2           Ich bin Albanerin.
3        Ich bin Andorranerin.
4                          NaN
                ...           
188        Ich bin Samoanerin.
189         Ich bin Jemenitin.
190    Ich bin Südafrikanerin.
191         Ich bin Sambierin.
192       Ich bin Simbabwerin.
Name: deu_f, Length: 193, dtype: object

In [29]:
test = 'Ich bin  Afghanin.'
test = test.strip()
test

'Ich bin  Afghanin.'

In [13]:
import pandas as pd
import numpy as np
from typing import List, Dict
from collections import Counter

def analyze_translations(df: pd.DataFrame) -> Dict:
    """
    Analyze translation patterns and compute various metrics for the dataset.
    
    Parameters:
    df (pd.DataFrame): DataFrame with columns 'eng', 'deu_m', 'deu_f', 'deu_n', 'extracted'
    
    Returns:
    Dict: Dictionary containing various metrics and analysis results
    """
    metrics = {}
    
    # 1. Coverage Analysis
    total_entries = len(df)
    complete_entries = df[df['deu_m'].notna() & df['deu_f'].notna()].shape[0]
    metrics['coverage'] = {
        'total_entries': total_entries,
        'complete_entries': complete_entries,
        'coverage_percentage': (complete_entries / total_entries) * 100
    }
    
    # 2. Pattern Analysis
    def extract_patterns(row):
        if pd.isna(row['deu_m']) or pd.isna(row['deu_f']):
            return None
        
        m_ending = row['deu_m'].split('.')[-2].split()[-1]
        f_ending = row['deu_f'].split('.')[-2].split()[-1]
        return f"{m_ending}->{f_ending}"
    
    patterns = df.apply(extract_patterns, axis=1).dropna()
    pattern_counts = Counter(patterns)
    metrics['gender_patterns'] = dict(pattern_counts)
    
    # 3. Consistency Analysis
    def check_consistency(row):
        if pd.isna(row['extracted']):
            return False
        extracted = eval(row['extracted'])
        if not isinstance(extracted, list):
            return False
        
        expected = [x for x in [row['deu_m'], row['deu_f'], row['deu_n']] if pd.notna(x)]
        matches = sum(1 for e in extracted if any(e.strip() == exp.strip() for exp in expected))
        return matches == len(extracted)
    
    consistent_entries = df.apply(check_consistency, axis=1).sum()
    metrics['consistency'] = {
        'consistent_entries': consistent_entries,
        'consistency_percentage': (consistent_entries / total_entries) * 100
    }
    
    # 4. Gender Form Analysis
    metrics['gender_forms'] = {
        'masculine_present': df['deu_m'].notna().sum(),
        'feminine_present': df['deu_f'].notna().sum(),
        'neuter_present': df['deu_n'].notna().sum()
    }
    
    return metrics

def print_metrics(metrics: Dict):
    """
    Print the analysis metrics in a readable format.
    """
    print("=== Translation Analysis Results ===\n")
    
    print("1. Coverage Metrics:")
    print(f"Total entries: {metrics['coverage']['total_entries']}")
    print(f"Complete entries: {metrics['coverage']['complete_entries']}")
    print(f"Coverage percentage: {metrics['coverage']['coverage_percentage']:.2f}%\n")
    
    print("2. Most Common Gender Patterns:")
    sorted_patterns = sorted(metrics['gender_patterns'].items(), 
                           key=lambda x: x[1], reverse=True)
    for pattern, count in sorted_patterns[:5]:
        print(f"{pattern}: {count} occurrences")
    print()
    
    print("3. Consistency Metrics:")
    print(f"Consistent entries: {metrics['consistency']['consistent_entries']}")
    print(f"Consistency percentage: {metrics['consistency']['consistency_percentage']:.2f}%\n")
    
    print("4. Gender Form Distribution:")
    print(f"Masculine forms: {metrics['gender_forms']['masculine_present']}")
    print(f"Feminine forms: {metrics['gender_forms']['feminine_present']}")
    print(f"Neuter forms: {metrics['gender_forms']['neuter_present']}")



In [14]:
df = pd.read_csv('../ready_for_evaluation/gpt-4/deu.csv')
print_metrics(analyze_translations(df))

=== Translation Analysis Results ===

1. Coverage Metrics:
Total entries: 193
Complete entries: 188
Coverage percentage: 97.41%

2. Most Common Gender Patterns:
Kongolese->Kongolesin: 2 occurrences
Afghane->Afghanin: 1 occurrences
Angolaner->Angolanerin: 1 occurrences
Albaner->Albanerin: 1 occurrences
Andorraner->Andorranerin: 1 occurrences

3. Consistency Metrics:
Consistent entries: 136
Consistency percentage: 70.47%

4. Gender Form Distribution:
Masculine forms: 188
Feminine forms: 188
Neuter forms: 1


In [19]:
import pandas as pd

# Load the CSV file
file_path = '../ready_for_evaluation/gpt-4/deu.csv'
data = pd.read_csv(file_path)

# Function to determine if the prediction in 'extracted' matches any of the true values
def is_correct_prediction(row):
    # Collect true values from spa_m, spa_f, and spa_n
    true_values = {row['deu_m'], row['deu_f'], row['deu_n']}
    true_values = {val for val in true_values if pd.notna(val)}  # Remove NaN values
    
    # Collect predicted values from extracted
    predicted_values = eval(row['extracted']) if pd.notna(row['extracted']) else []
    print(f"True values: {true_values}")
    print(f"Predicted values: {predicted_values}\n\n")
    # Check if any predicted value matches the true values
    return any(pred in true_values for pred in predicted_values)

# Apply the function to calculate correctness for each row
data['is_correct'] = data.apply(is_correct_prediction, axis=1)

# Calculate accuracy
total_predictions = len(data)
correct_predictions = data['is_correct'].sum()
accuracy = correct_predictions / total_predictions

# Print results
print(f"Total Predictions: {total_predictions}")
print(f"Correct Predictions: {correct_predictions}")
print(f"Accuracy: {accuracy:.2%}")


True values: {'Ich bin Afghane.', 'Ich bin  Afghanin.'}
Predicted values: ['Ich bin Afghane.']


True values: {'Ich bin Angolaner.', 'Ich bin  Angolanerin.'}
Predicted values: ['Ich bin Angolaner.']


True values: {'Ich bin  Albanerin.', 'Ich bin Albaner.'}
Predicted values: ['Ich bin Albaner.']


True values: {'Ich bin  Andorranerin.', 'Ich bin Andorraner.'}
Predicted values: ['Ich bin Andorraner.']


True values: set()
Predicted values: ['Ich bin Emirati.']


True values: {'Ich bin Argentinier.', 'Ich bin  Argentinierin.'}
Predicted values: ['Ich bin Argentinier.']


True values: {'Ich bin  Armenierin.', 'Ich bin Armenier.'}
Predicted values: ['Ich bin Armenier.']


True values: {'Ich bin Antiguaner.', 'Ich bin  Antiguanerin.'}
Predicted values: ['Ich bin Antiguaner.']


True values: {'Ich bin  Australierin.', 'Ich bin Australier.'}
Predicted values: ['Ich bin Australier.']


True values: {'Ich bin  Österreicherin.', 'Ich bin Österreicher.'}
Predicted values: ['Ich bin Österreicher.'