# JSON to Pandas Dataframe 

In [1]:
import pandas as pd
import json
import os

# Specify the directory containing your JSON files
json_dir = "./"

# Initialize an empty list to store DataFrames
dataframes = []

# Iterate through each file in the directory
for filename in os.listdir(json_dir):
    if filename.endswith(".json"):  # Process only JSON files
        file_path = os.path.join(json_dir, filename)
        try:
            # Load the JSON file
            with open(file_path, 'r') as f:
                data = json.load(f)
            
            # Ensure the JSON data is a list of dictionaries
            if isinstance(data, list):
                # Convert the JSON data to a DataFrame
                df = pd.DataFrame(data)
                df['Paperid'] = os.path.splitext(filename)[0]
                dataframes.append(df)  # Add the DataFrame to the list
            else:
                print(f"File {filename} does not contain a list of dictionaries.")
        except json.JSONDecodeError as e:
            print(f"Error decoding JSON in file {filename}: {e}")

# Combine all DataFrames into a single DataFrame
if dataframes:
    combined_df = pd.concat(dataframes, ignore_index=True)
    print("Combined DataFrame:")
    print(combined_df)
else:
    print("No valid JSON data found.")


Combined DataFrame:
                          name                 nominal_composition  \
0     CoCrFe0.75NiMo0.3Nb0.125              Co24Cr24Fe18Ni24Mo7Nb3   
1                     10HEA-Ni                  (CoCrFeNiMn)90Hf10   
2                     20HEA-Ni                  (CoCrFeNiMn)90Hf10   
3                     30HEA-Ni                  (CoCrFeNiMn)90Hf10   
4              Al17Ni34Ti17V32                     Al17Ni34Ti17V32   
...                        ...                                 ...   
4647                CoCrFeMnNi      Co20.0Cr20.0Fe20.0Mn20.0Ni20.0   
4648             CoCrFeMnNi-2N  Co19.6Cr19.6Fe19.6Mn19.6Ni19.6N2.0   
4649                CoCrFeMnNi           Co1.0Cr1.0Fe1.0Mn1.0Ni1.0   
4650             Al0.3CoCrFeNi           Al0.3Co1.0Cr1.0Fe1.0Ni1.0   
4651              Al0.1CrNbVMo            Al0.1Cr1.0Nb1.0V1.0Mo1.0   

                     measured_composition lattice_constant    phases  \
0     Co25Cr21.3Fe18Ni23.5Mo7Nb3Si1.4W0.8            3.605     FCC,

In [2]:
combined_df = combined_df.sort_values(by='Paperid')

In [3]:
combined_df

Unnamed: 0,name,nominal_composition,measured_composition,lattice_constant,phases,alloy_condition,doi,confidence_scores,Paperid,0
287,Ti0.25Ta1.75NbZrMo,Ti0.25Ta1.75Nb1.0Zr1.0Mo1.0,Not found,3.315,"BCC1,BCC2",As-Cast,10.1016/j.jallcom.2024.174408,"{'name': 100, 'nominal_composition': 95, 'meas...",1-s2,
284,TiTaNbZrMo,Ti1.0Ta1.0Nb1.0Zr1.0Mo1.0,Not found,3.285,"BCC1,BCC2",As-Cast,10.1016/j.jallcom.2024.174408,"{'name': 100, 'nominal_composition': 95, 'meas...",1-s2,
285,Ti0.75Ta1.25NbZrMo,Ti0.75Ta1.25Nb1.0Zr1.0Mo1.0,Not found,3.29,"BCC1,BCC2",As-Cast,10.1016/j.jallcom.2024.174408,"{'name': 100, 'nominal_composition': 95, 'meas...",1-s2,
286,Ti0.5Ta1.5NbZrMo,Ti0.5Ta1.5Nb1.0Zr1.0Mo1.0,Not found,3.31,"BCC1,BCC2",As-Cast,10.1016/j.jallcom.2024.174408,"{'name': 100, 'nominal_composition': 95, 'meas...",1-s2,
2685,V5,Nb25.0Ti35.0V5.0Zr35.0,Nb25.07Ti34.32V4.95Zr35.67,3.35,BCC,As-Cast,10.1016/j.matdes.2024.113260,"{'name': 100, 'nominal_composition': 100, 'mea...",1-s2-0-S026412752400635X-main,
...,...,...,...,...,...,...,...,...,...,...
2710,Al0.5Ti3Zr0.5Nb0.5Mo0.2,Al0.5Ti3.0Zr0.5Nb0.5Mo0.2,Not found,3.293,BCC,As-Cast,10.1038/s41467-025-58211-9,"{'name': 100, 'nominal_composition': 100, 'mea...",s41467-025-58211-9,
4076,HNTVA0,Hf1.0Nb1.0Ti1.0V1.0Al0.0,Not found,Not found,BCC,Annealed,10.1038/s41586-023-06894-9,"{'name': 100, 'nominal_composition': 90, 'meas...",s41586-023-06894-9,
4075,HNTVA10,Hf1.0Nb1.0Ti1.0V1.0Al1.0,Not found,3.3,BCC,Annealed,10.1038/s41586-023-06894-9,"{'name': 100, 'nominal_composition': 90, 'meas...",s41586-023-06894-9,
4077,HNTVA5,Hf1.0Nb1.0Ti1.0V1.0Al0.5,Not found,Not found,BCC,Annealed,10.1038/s41586-023-06894-9,"{'name': 100, 'nominal_composition': 90, 'meas...",s41586-023-06894-9,


In [4]:
combined_df.to_csv("Data_extracted.csv",index=False)