In [2]:
import pandas as pd
import json
import glob
import os

# Define the path to the folder containing the framework JSON files.
# We use a recursive glob to find all .json files in the 'frameworks' directory and any subdirectories.
framework_files = glob.glob('frameworks/**/*.json', recursive=True)

# This list will store all the time series objects from the JSON files.
all_series_data = []

# Iterate over each file found by glob.
for file_path in framework_files:
    with open(file_path, 'r') as f:
        data = json.load(f)
    
    # The time series data is under the 'data' key, which is a list of series objects.
    # We extend our list with the list of series from the current file, keeping the nested structure.
    all_series_data.extend(data.get('data', []))

# Create a pandas DataFrame from the list of series objects.
# Each dictionary in all_series_data becomes a row, preserving the nested 'filters' and 'series' data.
df = pd.DataFrame(all_series_data)

# Display the first few rows of the created DataFrame.
df.head()


Unnamed: 0,entity_representation,entity_id,entity_name,filters,insight,series_name,series
0,product,33378,Brilliant Earth,{'platform': 'Desktop Site'},advertising_count,Brilliant Earth | Advertising Count | Desktop ...,"[{'date': '2022-12-03', 'value': 23.0}, {'date..."
1,title,46673,Game of Thrones (2011-2019 - TV),"{'country_id': 101, 'country': 'Afghanistan'}",ott_views,Game of Thrones (2011-2019 - TV) | OTT Views |...,"[{'date': '2022-12-03', 'value': 12.0}, {'date..."
2,retailer,1403,Amazon,"{'platform_name': 'Desktop Site', 'demographic...",average_website_traffic,Amazon | Average Website Traffic | Desktop Sit...,"[{'date': '2022-12-03', 'value': 23.6100635731..."
3,retailer,1403,Amazon,"{'platform_name': 'Desktop Site', 'demographic...",average_website_traffic,Amazon | Average Website Traffic | Desktop Sit...,"[{'date': '2022-12-03', 'value': 16.9069627150..."
4,retailer,1403,Amazon,"{'platform_name': 'Mobile Site', 'demographic_...",average_website_traffic,Amazon | Average Website Traffic | Mobile Site...,"[{'date': '2022-12-03', 'value': 5.16934349161..."


In [6]:
df.head()

Unnamed: 0,entity_representation,entity_id,entity_name,filters,insight,series_name,series
0,product,33378,Brilliant Earth,{'platform': 'Desktop Site'},advertising_count,Brilliant Earth | Advertising Count | Desktop ...,"[{'date': '2022-12-03', 'value': 23.0}, {'date..."
1,title,46673,Game of Thrones (2011-2019 - TV),"{'country_id': 101, 'country': 'Afghanistan'}",ott_views,Game of Thrones (2011-2019 - TV) | OTT Views |...,"[{'date': '2022-12-03', 'value': 12.0}, {'date..."
2,retailer,1403,Amazon,"{'platform_name': 'Desktop Site', 'demographic...",average_website_traffic,Amazon | Average Website Traffic | Desktop Sit...,"[{'date': '2022-12-03', 'value': 23.6100635731..."
3,retailer,1403,Amazon,"{'platform_name': 'Desktop Site', 'demographic...",average_website_traffic,Amazon | Average Website Traffic | Desktop Sit...,"[{'date': '2022-12-03', 'value': 16.9069627150..."
4,retailer,1403,Amazon,"{'platform_name': 'Mobile Site', 'demographic_...",average_website_traffic,Amazon | Average Website Traffic | Mobile Site...,"[{'date': '2022-12-03', 'value': 5.16934349161..."


In [8]:
# This list will store the generated widget configurations.
widget_configs = []

# We iterate through each row of the DataFrame to create a widget configuration for each time series.
for index, row in df.iterrows():
    series_name = row['series_name']
    
    # Create a unique endpoint name by slugifying the series name.
    # This involves converting to lowercase, replacing non-alphanumeric characters with hyphens,
    # and removing duplicate hyphens.
    endpoint_slug = series_name.lower()
    endpoint_slug = ''.join(c if c.isalnum() else '-' for c in endpoint_slug)
    endpoint_slug = '-'.join(filter(None, endpoint_slug.split('-')))

    widget = {
        "name": series_name,
        "description": series_name,
        "type": "table",
        "endpoint": endpoint_slug,
        "gridData": {"w": 12, "h": 4},
        "refetchInterval": False,
    }
    widget_configs.append(widget)

# Display the first 5 generated widget configurations to verify the output.
widget_configs[:5]


[{'name': 'Brilliant Earth | Advertising Count | Desktop Site | v2025.11.1 | Reinstated On: 2025-11-29',
  'description': 'Brilliant Earth | Advertising Count | Desktop Site | v2025.11.1 | Reinstated On: 2025-11-29',
  'type': 'table',
  'endpoint': 'brilliant-earth-advertising-count-desktop-site-v2025-11-1-reinstated-on-2025-11-29',
  'gridData': {'w': 12, 'h': 4},
  'refetchInterval': False},
 {'name': 'Game of Thrones (2011-2019 - TV) | OTT Views | 101 | Afghanistan | v2025.11.1 | Reinstated On: 2025-11-30',
  'description': 'Game of Thrones (2011-2019 - TV) | OTT Views | 101 | Afghanistan | v2025.11.1 | Reinstated On: 2025-11-30',
  'type': 'table',
  'endpoint': 'game-of-thrones-2011-2019-tv-ott-views-101-afghanistan-v2025-11-1-reinstated-on-2025-11-30',
  'gridData': {'w': 12, 'h': 4},
  'refetchInterval': False},
 {'name': 'Amazon | Average Website Traffic | Desktop Site | Female | Gender | v2025.11.1 | Reinstated On: 2025-12-01',
  'description': 'Amazon | Average Website Traff

In [9]:
# Group the DataFrame by the 'insight' column to process each insight type separately.
grouped_by_insight = df.groupby('insight')

# This list will store the more advanced, grouped widget configurations.
advanced_widget_configs = []

# Iterate over each group of insights.
for insight_name, group_df in grouped_by_insight:
    # Create a slug for the endpoint from the insight name.
    endpoint_slug = insight_name.lower().replace('_', '-')
    
    # Assume the entity representation is consistent within an insight group and take the first one.
    category = group_df['entity_representation'].iloc[0]

    # Define the base structure for the widget configuration.
    widget = {
        "name": insight_name.replace('_', ' ').title(),
        "description": f"Displays {insight_name.replace('_', ' ')} data.",
        "endpoint": endpoint_slug,
        "gridData": {"w": 16, "h": 6},
        "type": "table",
        "category": category,
        "params": []
    }

    # --- Parameter for Entity Name ---
    # Get unique entity names within the group.
    unique_entities = group_df['entity_name'].unique()
    
    # Create a parameter for selecting the entity if there are any.
    if len(unique_entities) > 0:
        entity_options = [{"value": name, "label": name} for name in sorted(unique_entities)]
        entity_param = {
            "paramName": "entity_name",
            "description": "The entity to display data for.",
            "value": entity_options[0]['value'],  # Default value
            "label": "Entity",
            "type": "text",
            "options": entity_options
        }
        widget["params"].append(entity_param)

    # --- Parameters for Filters ---
    # Collect all unique filter keys and their corresponding unique values within the group.
    filter_values = {}
    for filters_dict in group_df['filters']:
        if isinstance(filters_dict, dict):
            for key, value in filters_dict.items():
                if key not in filter_values:
                    filter_values[key] = set()
                # Some values might be dicts themselves, we convert them to a string representation
                # to make them hashable for the set. This is a simplification.
                if isinstance(value, dict):
                    value = str(value)
                filter_values[key].add(value)

    # Create a parameter for each unique filter key.
    for key in sorted(filter_values.keys()):
        # Get sorted, unique values for the current filter key.
        values = sorted(list(filter_values[key]))
        
        if not values:
            continue

        # Format values into the required options structure.
        options = [{"value": str(v), "label": str(v)} for v in values]
        
        # Create the parameter configuration.
        filter_param = {
            "paramName": key,
            "description": f"Filter by {key.replace('_', ' ')}.",
            "value": options[0]['value'],  # Default value
            "label": key.replace('_', ' ').title(),
            "type": "text",
            "multiSelect": True,  # Allow multiple selections for filters.
            "options": options
        }
        widget["params"].append(filter_param)

    # Add the fully configured widget to our list.
    advanced_widget_configs.append(widget)

# Display the generated widget configurations.
advanced_widget_configs


[{'name': 'Advertising Count',
  'description': 'Displays advertising count data.',
  'endpoint': 'advertising-count',
  'gridData': {'w': 16, 'h': 6},
  'type': 'table',
  'category': 'product',
  'params': [{'paramName': 'entity_name',
    'description': 'The entity to display data for.',
    'value': 'Brilliant Earth',
    'label': 'Entity',
    'type': 'text',
    'options': [{'value': 'Brilliant Earth', 'label': 'Brilliant Earth'}]},
   {'paramName': 'platform',
    'description': 'Filter by platform.',
    'value': 'Desktop Site',
    'label': 'Platform',
    'type': 'text',
    'multiSelect': True,
    'options': [{'value': 'Desktop Site', 'label': 'Desktop Site'},
     {'value': 'Facebook', 'label': 'Facebook'},
     {'value': 'Instagram', 'label': 'Instagram'}]}]},
 {'name': 'App Downloads',
  'description': 'Displays app downloads data.',
  'endpoint': 'app-downloads',
  'gridData': {'w': 16, 'h': 6},
  'type': 'table',
  'category': 'app',
  'params': [{'paramName': 'entity_