In [7]:
import pandas as pd
import json
from collections import Counter

# Define paths to the provided JSON files
files = [
    'llama3.1_results_grocery.json',
    'llama3.1_results_home.json',
    'llama3.1_results_jewelry.json',
    'llama3.1_results_office.json'
]

# Placeholder to store the extracted data
extracted_data = []

# Process each file
for file in files:
    with open(file, 'r') as f:
        data = json.load(f)
        for entry in data:
            category = file.split('_')[2].split('.')[0]  # Extracting category name from file name
            response = entry.get('response', '')
            try:
                attributes = json.loads(response.split('```json')[1].split('```')[0])['Attributes']
                for attribute_name in attributes.keys():
                    extracted_data.append({'Category': category, 'Attribute': attribute_name})
            except (IndexError, KeyError, json.JSONDecodeError):
                continue

# Create a DataFrame from the extracted data
df = pd.DataFrame(extracted_data)

# Calculate statistics
attribute_stats = df.groupby(['Category', 'Attribute']).size().reset_index(name='Count')
total_counts = attribute_stats.groupby('Category')['Count'].transform('sum')
attribute_stats['Percentage'] = (attribute_stats['Count'] / total_counts) * 100

# Save to a CSV file
attribute_stats.to_csv('attribute_statistics_llama3_1.csv', index=False)

# Display the statistics in the console
print(attribute_stats)


    Category               Attribute  Count  Percentage
0        gro    Additional Accolades      1    0.392157
1        gro  Additional Ingredients      1    0.392157
2        gro       Allergen Friendly      2    0.784314
3        gro    Allergen Information      1    0.392157
4        gro        Alternate Item #      1    0.392157
..       ...                     ...    ...         ...
805      jew                   Style     30    5.917160
806      jew           Target Gender      1    0.197239
807      jew                    Type      6    1.183432
808      jew                   Width      4    0.788955
809      jew              Width (mm)      1    0.197239

[810 rows x 4 columns]
