# This is to combine all json files you produced with SciSift into a CSV and view it

In [1]:
# Directory where JSON files are located
json_directory = 'outputs'  # Adjust this path to your directory

In [2]:
import json
import csv
import os
import pandas as pd


def is_json_file(filename):
    """Check if the file has a JSON extension (case insensitive)."""
    return filename.lower().endswith(".json")

data_list = []

# Get a list of all JSON files in the directory
json_files = [f for f in os.listdir(json_directory) if is_json_file(f)]

# If there are no JSON files, exit
if not json_files:
    print("No JSON files found.")
    exit()

# Read the first JSON file and set ordered_keys
with open(os.path.join(json_directory, json_files[0]), 'r') as json_file:
    first_data = json.load(json_file)
    first_data['Filename'] = json_files[0]
    ordered_keys = list(first_data.keys())
    data_list.append(first_data)

# Iterate through the remaining JSON files in the directory
for filename in json_files[1:]:
    with open(os.path.join(json_directory, filename), 'r') as json_file:
        data = json.load(json_file)
        data['Filename'] = filename
        data_list.append(data)
            
        # If a key isn't in ordered_keys yet, append it
        for key in data.keys():
            if key not in ordered_keys:
                ordered_keys.append(key)

# Write the collected data to a CSV file
csv_path = os.path.join(json_directory,'combined_data.csv')
with open(csv_path, 'w', newline='') as csv_file:
    writer = csv.DictWriter(csv_file, fieldnames=ordered_keys)
    writer.writeheader()
    for row in data_list:
        writer.writerow(row)


# Read the CSV file and Display it
df = pd.read_csv(csv_path)
# df.head()

In [3]:
# !pip install plotly==5.17.0

In [4]:
# If you want to display more colorful table of results

import plotly.graph_objects as go


fig = go.Figure(data=[go.Table(
    header=dict(values=list(df.columns),
                fill_color='paleturquoise',
                align='left'),
    cells=dict(values=[df[col] for col in df.columns],
               fill_color='lavender',
               align='left'))
])

fig.update_layout(width=5000, height=800)
fig.show()