In [6]:
import os
import json
import pandas as pd

In [7]:
file_num_list = [11, 16, 22, 35, 39, 40, 41, 42, 44, 56]
groundtruth_folder_path = "../data/ground_truth"

In [8]:
all_rows = []
for num in file_num_list:
    file_num = f"00{num}"[-3:]
    filename = f"paper_{file_num}_gt.json"
    json_file_path = os.path.join(groundtruth_folder_path, filename)
    with open(json_file_path, "r", encoding="utf-8") as f:
        gt = json.load(f)
    samples = list(gt["CAM (Cathode Active Material)"]["Stoichiometry information"].keys())

    rows = []
    for sample in samples:
        row = {
            'Paper ID': num, 
            "Sample": sample
        }
        
        # CAM - Stoichiometry
        cam = gt["CAM (Cathode Active Material)"]
        for elem, ratio in cam["Stoichiometry information"][sample].items():
            row[f"{elem}"] = ratio

        row["Commercial NCM used"] = cam["Commercial NCM used"][sample]
        row["Lithium source"] = cam["Lithium source"]
        row["Synthesis method"] = cam["Synthesis method"]
        row["Crystallization method"] = cam["Crystallization method"]
        row["Crystallization final temperature"] = cam["Crystallization final temperature"]
        row["Crystallization final duration (hours)"] = cam["Crystallization final duration (hours)"]
        row["Doping"] = cam["Doping"]
        row["Coating"] = cam["Coating"]

        # Electrode
        electrode = gt["Electrode (half-cell)"]
        row["Active material to Conductive additive to Binder ratio"] = electrode["Active material to Conductive additive to Binder ratio"]
        if electrode["Electrolyte"]:
            elec = electrode["Electrolyte"][0]
            row["Electrolyte salt"] = elec["Salt"]
            row["Electrolyte concentration"] = elec["Concentration"]
            row["Electrolyte solvent"] = elec["Solvent"]
            row["Electrolyte solvent ratio"] = elec["Solvent ratio"]
        row["Additive"] = electrode["Additive"]
        row["Loading density (mass loading of NCM)"] = electrode["Loading density (mass loading of NCM)"]

        # Morphological Properties
        morph = gt["Morphological Properties"]
        row["Particle size"] = morph["Particle size"].get(sample)
        row["Particle shape"] = morph["Particle shape"].get(sample)
        row["Particle distribution"] = morph["Particle distribution"].get(sample)
        row["Coating layer characteristics"] = morph["Coating layer characteristics"].get(sample)
        row["Crystal structure and lattice characteristics"] = morph["Crystal structure and lattice characteristics"].get(sample)

        # Cathode Performance
        perf = gt["Cathode Performance"].get(sample)
        if perf:
            perf = perf[0]
            row["Voltage range"] = perf["Voltage range"]
            row["Temperature"] = perf["Temperature"]
            for cr in perf["C-rate and Specific capacity"]:
                if isinstance(cr, dict) and "Other C-rates and performance" not in cr:
                    c_rate = cr["C-rate"]
                    capacity = cr["Capacity"]
                    row[f"C-rate {c_rate}"] = capacity
                elif "Other C-rates and performance" in cr and cr["Other C-rates and performance"]:
                    for extra in cr["Other C-rates and performance"]:
                        c_rate = extra["C-rate"]
                        capacity = extra["Capacity"]
                        row[f"C-rate {c_rate}"] = capacity

        rows.append(row)
    all_rows.extend(rows)
# Create DataFrame
df = pd.DataFrame(all_rows)

In [9]:
df.to_csv("../output/csv/groundtruth.csv", index=False)

In [12]:
pred_df = pd.read_csv("../output/csv/experiments/baseline.csv")

In [11]:
# set(set(df.columns) | set(pred_df.columns)) - set(set(df.columns) & set(pred_df.columns))