# Combine JSON files to raw table

In [9]:
import os
import json
import csv
from tqdm import tqdm

# Загрузка JSON-файла
folder = 'val'
output_file = f"{folder}3.csv"

columns = [
    "File Name", "Rhetorical Devices and Figures of Speech", "Manipulation of Facts and Information",
    "Emotional Appeals", "Logical Argumentation and Reasoning", "Manipulative Techniques",
    "Fact-Based Arguments", "Criticism and Devaluation", "Audience Engagement Techniques",
    "Emotional Manipulation", "Simplification and Misrepresentation", "Misdirection and Distraction",
    "Cherry-Picking and Selectivity", "Social Influence and Group Dynamics", "Deceptive Framing",
    "Polarity", "Formality", "Emotional Tone", "Urgency and Focus", "Narrative Style",
    "Inspirational and Awe", "Euphemism and Directness", "Thematic Tones", "Literary Aesthetic",
    "Relevance Fallacies", "Ambiguity and Vagueness", "Causal Fallacies", "Formal Logical Errors",
    "False Comparisons", "Fallacies of Overgeneralization", "Contradictory Arguments",
    "Fallacies of Unfounded Assertions", "Dichotomies and Binary Thinking", "Contradictions"
]

def parse_json_to_csv(input_folder, output_file, columns):
    all_data = []

    # Получаем список файлов в папке
    json_files = [f for f in os.listdir(input_folder) if f.endswith('.json')]

    # Используем tqdm для прогресс-бара
    for json_file in tqdm(json_files, desc="Processing files"):
        input_path = os.path.join(input_folder, json_file)
        file_name = os.path.splitext(json_file)[0]  # Убираем расширение .json
        
        with open(input_path, 'r', encoding='utf-8') as f:
            data = json.load(f)
        
        # Создаем структуру данных для записи
        parsed_data = {col: "" for col in columns}
        parsed_data["File Name"] = file_name

        # Обрабатываем данные
        for key, values in data.items():
            if isinstance(values, list):  # Если это список, перебираем его
                for item in values:
                    if isinstance(item, dict):
                        classification = item.get("Classification", "")
                        details = item.get("Details", [])
                        
                        # Если classification совпадает с названием столбца
                        if classification in columns:
                            types = [detail.get("Type", "") for detail in details]
                            parsed_data[classification] = ", ".join(types)
            
            # Обработка Polarity
            if key == "Polarity" and isinstance(values, list):
                types = [entry.get("Type", "") for entry in values if "Type" in entry]
                parsed_data["Polarity"] = ", ".join(types)
            
            # Обработка Contradictions
            if key == "Contradictions" and isinstance(values, list):
                types = [entry.get("Type", "") for entry in values if "Type" in entry]
                parsed_data["Contradictions"] = ", ".join(types)
        
        all_data.append(parsed_data)

    # Записываем данные в CSV
    with open(output_file, 'w', newline='', encoding='utf-8') as csvfile:
        writer = csv.DictWriter(csvfile, fieldnames=columns)
        writer.writeheader()
        writer.writerows(all_data)

# Запуск функции
parse_json_to_csv(folder, output_file, columns)


Processing files: 100%|██████████| 1284/1284 [00:32<00:00, 38.93it/s]
