In [11]:
import requests
import pandas as pd
import json
import logging
from datetime import datetime

# Configure logging
logging.basicConfig(
    filename="parser_errors.log",
    level=logging.ERROR,
    format="%(asctime)s - %(levelname)s - %(message)s"
)

# Load configuration
try:
    with open("config.json", "r", encoding="utf-8") as config_file:
        config = json.load(config_file)
        url = config.get("url")
        token = config.get("token")
except Exception as e:
    logging.error(f"Error loading configuration: {e}")
    print("Error loading 'config.json'.")
    exit()

# HTTP headers
headers = {
    "Authorization": f"Bearer {token}"
}

# Fetch data from server
try:
    response = requests.get(url, headers=headers)
    response.raise_for_status()
    print("Data successfully retrieved from server.")
except requests.exceptions.RequestException as e:
    logging.error(f"HTTP request error: {e}")
    print("Error retrieving data.")
    exit()

# Parse JSON content directly from memory
try:
    data = response.json()
except Exception as e:
    logging.error(f"Error parsing JSON response: {e}")
    print("Error parsing JSON response.")
    exit()

# Convert to DataFrame
try:
    if isinstance(data, list):
        df = pd.DataFrame(data)
    elif isinstance(data, dict):
        df = pd.json_normalize(data)
    else:
        raise Exception("Unsupported data format.")
except Exception as e:
    logging.error(f"Error converting to DataFrame: {e}")
    print("Unsupported data format.")
    exit()

# Display first 5 rows
print("\nFirst 5 rows of data:")
print(df.head(5))

# Save to CSV
try:
    df.to_csv("dataset.csv", index=False, encoding="utf-8")
    print("\nData saved as 'dataset.csv'.")
except Exception as e:
    logging.error(f"Error saving CSV file: {e}")
    print("Error saving CSV file.")

# Identify complex columns
def is_complex_type(elem):
    return isinstance(elem, (dict, list))

def is_list(elem):
    return isinstance(elem, list)

try:
    complex_columns = [col for col in df.columns if df[col].map(is_complex_type).any()]
except Exception as e:
    logging.error(f"Error identifying complex columns: {e}")
    complex_columns = []

# Unpack complex columns
if complex_columns:
    print("\nFound complex columns:", complex_columns)

    for col in complex_columns:
        try:
            print(f"\nUnpacking column: {col}")
            col_non_null = df[col].dropna()
            if col_non_null.map(is_list).all():
                # Explode if all values are lists
                exploded = df[[col]].explode(col)
                nested_df = pd.json_normalize(exploded[col])
            else:
                # Otherwise assume dicts
                nested_df = pd.json_normalize(col_non_null)

            nested_df.to_csv(f"{col}_table.csv", index=False, encoding="utf-8")
            print(f"Column '{col}' unpacked and saved as '{col}_table.csv'.")
        except Exception as e:
            logging.error(f"Error processing column '{col}': {e}")
            print(f"Error processing column: '{col}'. See 'parser_errors.log'.")
else:
    print("\nNo complex columns found. No unpacking needed.")

print("\nData is prepared and ready for further processing.")


Data successfully retrieved from server.

First 5 rows of data:
  status message                                               json
0    200      ok  {"index":{"0":0,"1":1,"2":2,"3":3,"4":4,"5":5,...

Data saved as 'dataset.csv'.

No complex columns found. No unpacking needed.

Data is prepared and ready for further processing.
