In [1]:
import pandas as pd
import numpy as np

def calculate_average_posix_score(file_path):
    """
    Read CSV file and calculate average POSIX score, standard deviation, and other statistics,
    excluding any invalid entries.
    
    Parameters:
    file_path (str): Path to the CSV file
    
    Returns:
    dict: Dictionary containing various statistical measures
    """
    # Read the CSV file
    df = pd.read_csv(file_path)
    
    # Convert posix_score to numeric, invalid values will become NaN
    df['posix_score'] = pd.to_numeric(df['posix_score'], errors='coerce')
    
    # Calculate statistics
    total_entries = len(df)
    valid_entries = df['posix_score'].count()
    average_score = df['posix_score'].mean()
    std_dev = df['posix_score'].std()
    
    # Calculate additional statistics
    min_score = df['posix_score'].min()
    max_score = df['posix_score'].max()
    
    return {
        'average_score': average_score,
        'standard_deviation': std_dev,
        'total_valid_entries': valid_entries,
        'total_entries': total_entries,
        'min_score': min_score,
        'max_score': max_score
    }

# Usage example
if __name__ == "__main__":
    file_path = "/ephemeral/shashmi/posix_new_improved/Thesis/template_variation_results_qwenvl/all_results_summary.csv"  # Replace with your actual file path
    results = calculate_average_posix_score(file_path)
    
    print("Analysis Results:")
    print(f"POSIX Score: {results['average_score']:.2f} ± {results['standard_deviation']:.2f}")
    print(f"Valid Entries: {results['total_valid_entries']} out of {results['total_entries']}")
    print(f"Score Range: [{results['min_score']:.2f}, {results['max_score']:.2f}]")

Analysis Results:
POSIX Score: 1.20 ± 0.41
Valid Entries: 400 out of 400
Score Range: [0.00, 3.28]
