In [1]:
import json
import pandas as pd


def read_json(filename: str) -> dict:
    """Read JSON file and return its data."""
    try:
        with open(filename, "r") as f:
            data = json.loads(f.read())
    except Exception as e:
        raise Exception(f"Error reading {filename}: {str(e)}")
    return data


def normalize_json(data: dict) -> pd.DataFrame:
    """Flatten nested JSON data to match CSV format."""
    base_data = {
        "article_id": data.get("article_id"),
        "article_link": data.get("article_link"),
        "published_on": data.get("published_on"),
        "source": data.get("source"),
        "article_title": data["article"].get("title"),
        "article_category": data["article"].get("category"),
        "article_image": data["article"].get("image"),
        "article_sentiment": data["article"].get("sentiment"),
    }

    flattened_data = []

    for test_entry in data.get("test", []):
        row = base_data.copy()
        row["test_test"] = test_entry.get("test")
        row["test_test2"] = test_entry.get("test2")
        row["test_address"] = test_entry.get("address", [None])[0]

        if "names" in test_entry:
            for name_entry in test_entry["names"]:
                row_copy = row.copy()
                row_copy["test_names_name"] = name_entry.get("name")
                row_copy["test_names_age"] = name_entry.get("age")
                row_copy["Rank"] = 1
                flattened_data.append(row_copy)
        else:
            row["test_names_name"] = None
            row["test_names_age"] = None
            row["Rank"] = 1
            flattened_data.append(row)

    dataframe = pd.DataFrame(flattened_data)

    column_order = [
        "article_id", "article_link", "published_on", "source", "article_title",
        "article_category", "article_image", "article_sentiment", "test_test",
        "test_test2", "test_address", "Rank", "test_names_name", "test_names_age",
    ]
    dataframe = dataframe.reindex(columns=column_order, fill_value=None)

    return dataframe


def main():
    data = read_json(filename="article1.json")

    dataframe = normalize_json(data)

    dataframe.to_csv("article1.csv", index=False)
    print("CSV file 'article1.csv' created successfully!")


if __name__ == "__main__":
    main()


CSV file 'article1.csv' created successfully!


In [8]:
import json
import pandas as pd

def read_json(filename: str) -> dict:
    """Read JSON file and return its data, While returning it returns in dictionary"""
    try:
        with open(filename, "r") as f:
            data = json.load(f)
    except Exception as e:
        raise Exception(f"Error reading {filename}: {str(e)}")
    return data

def flatten_json_to_rows(data, parent_key='', sep='_'):
    """Flatten nested JSON into rows for DataFrame. 
    inputs a json data which can be dictionary, list or a single value
    parent_key tracks the hierarchy of keys, if {"a":{"b":1}} is given then parent_key will be a_b
    sep is simply a separator
    """
    if isinstance(data, dict):
        """
        if data is dictionary rows represent initial rows structure
        """
        rows = [{}]
        for k, v in data.items():
            new_key = f"{parent_key}{sep}{k}" if parent_key else k
            if isinstance(v, list):
                expanded_rows = []
                for row in rows:
                    for i, item in enumerate(v):
                        flattened_item = flatten_json_to_rows(item, f"{new_key}", sep=sep) # recursive function
                        for flat_row in flattened_item:
                            new_row = row.copy()
                            new_row.update(flat_row)
                            expanded_rows.append(new_row)
                rows = expanded_rows
            elif isinstance(v, dict):
                for row in rows:
                    flattened_dict = flatten_json_to_rows(v, new_key, sep=sep)
                    for flat_row in flattened_dict:
                        row.update(flat_row)
            else:
                for row in rows:
                    row[new_key] = v
        return rows
    elif isinstance(data, list):
        rows = []
        for i, item in enumerate(data):
            rows.extend(flatten_json_to_rows(item, f"{parent_key}", sep=sep))
        return rows
    else:
        return [{parent_key: data}]

def normalize_json(data):
    """Flatten JSON data into rows for DataFrame without predefined columns."""
    if isinstance(data, list):
        flattened_data = []
        for item in data:
            flattened_data.extend(flatten_json_to_rows(item))
    else:
        flattened_data = flatten_json_to_rows(data)
    return pd.DataFrame(flattened_data)

def main():
    data = read_json(filename="article1.json")

    dataframe = normalize_json(data)

    dataframe.to_csv("article1.csv", index=False)
    print("CSV file 'article1.csv' created successfully!")

if __name__ == "__main__":
    main()


CSV file 'article1.csv' created successfully!
