In [3]:
import pandas as pd
import numpy as np

# Read the classification report from file
with open('Data 1 Classification', 'r') as file:
    content = file.read()
    print(content)

# Alternatively, parse it into a DataFrame for easier analysis
def parse_classification_report(file_path):
    """Parse classification report into a pandas DataFrame"""
    
    with open(file_path, 'r') as file:
        lines = file.readlines()
    
    # Extract data lines (skip header and footer)
    data = []
    for line in lines:
        if line.strip() and not line.startswith('Classification'):
            parts = line.split()
            if len(parts) >= 4 and parts[0] not in ['accuracy', 'macro', 'weighted', 'precision']:
                try:
                    # Regular class rows - try to convert to ensure it's data
                    label = parts[0]
                    precision = float(parts[1])
                    recall = float(parts[2])
                    f1_score = float(parts[3])
                    support = int(parts[4])
                    data.append([label, precision, recall, f1_score, support])
                except ValueError:
                    # Skip header lines or any non-numeric rows
                    continue
    
    # Create DataFrame
    df = pd.DataFrame(data, columns=['Class', 'Precision', 'Recall', 'F1-Score', 'Support'])
    
    return df

# Parse the report
df = parse_classification_report('Data 1 Classification')

# Display the DataFrame
print("\nClassification Report as DataFrame:")
print(df)

# Show basic statistics
print("\n=== Summary Statistics ===")
print(f"Average Precision: {df['Precision'].mean():.4f}")
print(f"Average Recall: {df['Recall'].mean():.4f}")
print(f"Average F1-Score: {df['F1-Score'].mean():.4f}")
print(f"Total Support: {df['Support'].sum()}")

# Identify best and worst performing classes
print("\n=== Best Performing Classes (by F1-Score) ===")
print(df.nlargest(5, 'F1-Score')[['Class', 'F1-Score']])
# extra comment
print("\n=== Worst Performing Classes (by F1-Score) ===")
print(df.nsmallest(5, 'F1-Score')[['Class', 'F1-Score']])

Classification Report:
              precision    recall  f1-score   support

           A       0.97      0.97      0.97       100
           B       0.97      0.98      0.98       100
       Blank       0.98      1.00      0.99       100
           C       0.97      0.93      0.95       100
           D       0.89      0.91      0.90       100
           E       0.93      0.93      0.93       100
           F       0.79      0.92      0.85       100
           G       0.91      0.94      0.93       100
           H       0.96      0.96      0.96       100
           I       0.99      0.88      0.93       100
           J       0.94      0.94      0.94       100
           K       0.93      0.92      0.92       100
           L       0.90      0.90      0.90       100
           M       0.88      0.73      0.80       100
           N       0.87      0.90      0.89       100
           O       0.93      0.91      0.92       100
           P       0.60      0.89      0.72       100
    