In [1]:
import os
import json
import pandas as pd
from pprint import pprint

In [88]:
def csv_generator(file_num_list: list, output_folder_path: str, rag_method: str, hyper_param_method: str=None):
    ## 상위 폴더명 저장
    output_paper_folder_path = []
    for file_num in file_num_list:
        json_file_num = f"00{file_num}"[-3:]
        output_paper_folder_path.append(f"{output_folder_path}/json/{rag_method}/paper_{json_file_num}_output/")

    ## 해당 방법에 따른 데이터셋 불러오기
    total_data = {}
    for i, op in zip(file_num_list, output_paper_folder_path):
        total_data[f"{i}"] = {}
        temp_data = {}
        j = 0
        for filename in os.listdir(op):
            # print(filename)
            if hyper_param_method is None:
                j += 1
                if filename.startswith(f"category-{j}-paper_"):
                    json_file_path = os.path.join(op, filename)    
                    with open(json_file_path, "r", encoding="utf-8") as f:
                        temp_data[f'category-{j}'] = json.load(f)
            
            elif filename.endswith(f"{hyper_param_method}.json"):
                json_file_path = os.path.join(op, filename)
                with open(json_file_path, "r", encoding="utf-8") as f:
                    data = json.load(f)
                temp_data[f'category-{j}'] = data        
        total_data[f"{i}"] = temp_data
       

    ## json을 csv 형태로 변환
    all_rows = []
    for paper_id in file_num_list:
        try: 
            temp_data = total_data[str(paper_id)]
        except KeyError:
            print(f"[경고] Paper ID {paper_id} 없음.")
            continue

        samples = list(temp_data['category-1'].keys())
        content = temp_data.copy()
        all_crates = set()

        # 모든 C-rate 종류 수집
        for sample in samples:
            try:
                c_rate_info = content['category-4'][sample][0]['C-rate and Specific capacity']
                for entry in c_rate_info:
                    if 'C-rate' in entry:
                        all_crates.add(str(entry['C-rate']))
                    elif 'Other C-rates and performance' in entry:
                        for sub_entry in entry['Other C-rates and performance']:
                            all_crates.add(str(sub_entry['C-rate']))
            except (KeyError, IndexError, TypeError):
                continue

        # 정렬
        all_crates = sorted(
            [c for c in all_crates if c is not None and str(c).lower() != 'none'],
            key=lambda x: float(str(x))
        )
        for sample in samples:
            try:
                stoich = content['category-1'][sample]
                if 'W ratio' in stoich['Stoichiometry information']:
                    W_ratio = stoich['Stoichiometry information']['W ratio']
                else:
                    W_ratio = None

                electrolyte = content['category-2'][sample]
                
                particle = content['category-3']
                if 'Particle size' in particle.keys():
                    particle1 = 'Particle size'
                else:
                    particle1 = 'ParticleSize'
                if 'Particle shape' in particle.keys():
                    particle2 = 'Particle shape'
                else:
                    particle2 = 'ParticleShape'
                if 'Particle distribution' in particle.keys():
                    particle3 = 'Particle distribution'
                else:
                    particle3 = 'ParticleDistribution'
                if 'Coating layer characteristics' in particle.keys():
                    particle4 = 'Coating layer characteristics'
                else:
                    particle4 = 'CoatingLayerCharacteristics'
                if 'Crystal structure and lattice characteristics' in particle.keys():
                    particle5 = 'Crystal structure and lattice characteristics'
                else:
                    particle5 = 'CrystalStructureAndLatticeCharacteristics'
                        
                capacity = content['category-4'][sample][0]
                
                row = {
                    'Paper ID': paper_id,
                    'Sample': sample,
                    
                    'Li ratio': stoich['Stoichiometry information']['Li ratio'],
                    'Ni ratio': stoich['Stoichiometry information']['Ni ratio'],
                    'Co ratio': stoich['Stoichiometry information']['Co ratio'],
                    'Mn ratio': stoich['Stoichiometry information']['Mn ratio'],
                    'O ratio': stoich['Stoichiometry information']['O ratio'],
                    'W ratio': W_ratio,
                    'Commercial NCM used': stoich['Commercial NCM used'], 
                    'Lithium source': stoich['Lithium source'], 
                    'Synthesis method': stoich['Synthesis method'], 
                    'Crystallization method': stoich['Crystallization method'], 
                    'Crystallization final temperature': stoich['Crystallization final temperature'], 
                    'Crystallization final duration (hours)': stoich['Crystallization final duration (hours)'], 
                    'Doping': stoich['Doping'], 
                    'Coating': stoich['Coating'], 
                    
                    'Active material to Conductive additive to Binder ratio': electrolyte['Active material to Conductive additive to Binder ratio'],
                    'Electrolyte salt': electrolyte['Electrolyte'][0]['Salt'],
                    'Electrolyte concentration': electrolyte['Electrolyte'][0]['Concentration'],
                    'Electrolyte solvent': electrolyte['Electrolyte'][0]['Solvent'],
                    'Electrolyte solvent ratio': electrolyte['Electrolyte'][0]['Solvent ratio'],
                    'Additive': electrolyte['Additive'],
                    'Loading density (mass loading of NCM)': electrolyte['Loading density (mass loading of NCM)'],

                    'Particle size': particle[particle1][sample],
                    'Particle shape': particle[particle2][sample],
                    'Particle distribution': particle[particle3][sample],
                    'Coating layer characteristics': particle[particle4][sample],
                    'Crystal structure and lattice characteristics': particle[particle5][sample],
                    
                    'Voltage range': capacity['Voltage range'], 
                    'Temperature': capacity['Temperature'], 
                }

                # C-rate 열 미리 생성
                for c in all_crates:
                    row[f'C-rate {c}'] = None

                # C-rate 값 넣기
                c_rate_info = capacity['C-rate and Specific capacity']
                for entry in c_rate_info:
                    if 'C-rate' in entry:
                        row[f'C-rate {entry["C-rate"]}'] = entry.get('Capacity')
                    elif 'Other C-rates and performance' in entry:
                        for sub_entry in entry['Other C-rates and performance']:
                            row[f'C-rate {sub_entry["C-rate"]}'] = sub_entry.get('Capacity')

                all_rows.append(row)
            except Exception as e:
                print(f"[오류] Sample {sample} (Paper {paper_id}): {e}")

    # 최종 DataFrame 생성
    df = pd.DataFrame(all_rows)

    # 폴더 없으면 생성
    os.makedirs(f"{output_folder_path}/csv", exist_ok=True)
    if hyper_param_method is None:
        df.to_csv(f"{output_folder_path}/csv/{rag_method}.csv", index=False)
    else:
        df.to_csv(f"{output_folder_path}/csv/{rag_method}_{hyper_param_method}.csv", index=False)

    return df


In [89]:
file_num_list = [1,2,3,4]  ## 11, 16, 22, 35, 39, 40, 41, 42, 44, 56
output_folder_path = "../output"
rag_method = "multiagent-rag"
hyper_param_method = None ## "paper56_o1"

csv_generator(file_num_list, output_folder_path, rag_method, hyper_param_method)

Unnamed: 0,Paper ID,Sample,Li ratio,Ni ratio,Co ratio,Mn ratio,O ratio,W ratio,Commercial NCM used,Lithium source,...,Voltage range,Temperature,C-rate 0.1,C-rate 0.2,C-rate 0.5,C-rate 1.0,C-rate 2.0,C-rate 4.0,C-rate 5.0,C-rate 3.0
0,1,NCM622-600,1.0,0.6,0.2,0.2,2.0,,no,Li2CO3,...,2.8–4.5,25,190.0,,139.0,,,,,
1,1,NCM622-700,1.0,0.6,0.2,0.2,2.0,,no,Li2CO3,...,2.8–4.5,25,218.17,,164.2,,,,123.8,
2,1,NCM622-800,1.0,0.6,0.2,0.2,2.0,,no,Li2CO3,...,2.8–4.5,25,204.39,,151.3,,,,,
3,2,NCM-622,0.98,0.6,0.2,0.2,2.0,,,,...,2.8–4.3,25,163.96,,,,,,,
4,2,B-NCM,0.98,0.6,0.2,0.2,2.0,,,LiOH,...,2.8–4.3,25,195.65,,,,,,,
5,2,Ti-NCM,0.98,0.6,0.2,0.2,2.0,,,LiOH,...,2.8–4.3,25,190.45,,,,,,,
6,2,Z-NCM-622,0.98,0.6,0.2,0.2,2.0,,,LiOH,...,2.8–4.3,25,214.65,,,,,,,
7,3,NCM622,1.0,0.6,0.2,0.2,2.0,,,,...,3.0-4.2,25,203.0,,,160.0,,,,
8,3,PC-NCM622,1.0,0.6,0.2,0.2,2.0,,no,LiOH,...,3.0-4.2,25,203.0,,,160.0,,,,
9,3,SC-NCM622,1.0,0.6,0.2,0.2,2.0,,no,LiOH,...,3.0-4.2,25,195.0,,,160.0,,,,
