In [1]:
import json

In [2]:
# Load the results from results.json
with open('results.json', 'r', encoding='utf-8') as file:
    results = json.load(file)

len(results)

10454

In [3]:
# Dictionary to store unique modalities and their associated data types
modality_data_types = {}

In [4]:
# Traverse each dataset entry in the results
for dataset in results:
    # Access the tags section if it exists
    tags = dataset.get('tags', {})
    # lower the modality title
    modalities = tags.get('modalities', [])
    
    # Process each modality
    for modality in modalities:
        modality_title = modality.get('title').lower()
        data_types = modality.get('data_types', [])
        
        # Initialize the set for this modality if not already present
        if modality_title not in modality_data_types:
            modality_data_types[modality_title] = set()
        
        # Add each data type title under the current modality
        for data_type in data_types:
            data_type_title = data_type.get('title')
            modality_data_types[modality_title].add(data_type_title)


In [5]:
# Convert sets to sorted lists for readability and uniqueness
for modality in modality_data_types:
    modality_data_types[modality] = sorted(modality_data_types[modality])

In [6]:
# Optionally, save to a new JSON file if needed
with open('unique_modalities_data_types.json', 'w', encoding='utf-8') as output_file:
    json.dump(modality_data_types, output_file, ensure_ascii=False, indent=4)

In [7]:
modalities_list = []

In [8]:
# Print the extracted modalities and their unique data types
for modality, data_types in modality_data_types.items():
    modalities_list.append(modality)

print(sorted(modalities_list))

['2d', '2d data', '2d keypoints', '2d models', '2d visualization', '3d', '3d coordinates', '3d data', '3d environment', '3d environments', '3d imaging', '3d landmarks', '3d layout', '3d lidar', '3d mesh', '3d model', '3d model data', '3d model parameters', '3d models', '3d motion', '3d motion capture', '3d motion data', '3d object', '3d objects', '3d point cloud', '3d pose', '3d pose annotations', '3d pose data', '3d reconstruction', '3d scan', '3d scans', '3d simulation', '3d skeletal data', '3d skeleton', '3d skeleton data', '3d structure', '3d structures', '3d virtual environment', '3d visual data', '4d', '4d data', 'acoustic', 'action', 'action log', 'action sequence', 'activity data', 'activity logs', 'actuator data', 'aerial data', 'algorithm', 'algorithmic', 'alignment', 'alignment data', 'alpha matte', 'ambient light', 'analysis', 'analytic data', 'anatomical data', 'animation', 'annotated data', 'annotation', 'annotation data', 'annotations', 'api', 'application', 'application

In [11]:
import requests
import json
import time

# Configuration
API_KEY = "80d2590161de42a9b0f6d1582e17a4d6"
headers = {
    "Content-Type": "application/json",
    "api-key": API_KEY,
}

ENDPOINT = "https://yuwa-m2oi18l3-swedencentral.openai.azure.com/openai/deployments/gpt-4o/chat/completions?api-version=2024-08-01-preview"

In [None]:
def generate_synonym_mapping(modalities_list):
    

    payload = {
        "messages": [
            {
                "role": "system",
                "content": "You are an assistant that maps a list of modality phrases to their standard terms, grouping synonyms and semantically similar phrases together. Your role is to ensure consistency across datasets by generating concise and accurate groupings. Avoid creating complex structures or hierarchical relationships, and format the output strictly as a JSON object with 'standard terms' as keys and their corresponding phrases as lists of synonyms."
            },
            {
                "role": "user",
                "content": f"""Below is a list of modality phrases:

{modalities_list}

Your task:

1. Group synonyms and semantically similar phrases together.
2. Assign a single, most appropriate term as the 'standard term' for each group.
3. Ensure the groups are logical, concise, and avoid redundancy.
4. Keep the grouping simple: do not create nested or hierarchical structures.
5. Each modality phrase must belong to only one standard term group. Assign it to the group that best reflects its meaning.


Format the response as JSON:
{{
    "groups": {{
        "standard term 1":  ["original term 1"],
        "standard term 2": ["original term 2", "original term 3"]
    }}
}}
"""
            }
        ],
        "temperature": 0.1,
        "top_p": 0.95,
        "max_tokens": 10000
    }

    try:
        response = requests.post(ENDPOINT, headers=headers, json=payload)
        response.raise_for_status()

        # Extract and clean the GPT-4 response
        synonym_map_text = response.json()['choices'][0]['message']['content']
        print("Raw Response from GPT-4:", synonym_map_text)
        # Clean any Markdown code block formatting and parse as JSON
        synonym_map_text_clean = synonym_map_text.strip("```json").strip("```").strip()

        try:
            # Attempt to parse the cleaned string as JSON
            synonym_map = json.loads(synonym_map_text_clean)
        except json.JSONDecodeError:
            print("Failed to parse JSON response from GPT-4.")
            synonym_map = {"groups": {}}
        
    except requests.RequestException as e:
        print(f"Error with GPT-4 API: {e}")
        synonym_map = {"groups": {}}

    return synonym_map


In [25]:
sorted_modalities_list = sorted(modalities_list)

len(sorted_modalities_list)

711

In [26]:
synonym_map = generate_synonym_mapping(modalities_list)    

Raw Response from GPT-4: ```json
{
    "groups": {
        "image": ["image", "images", "imaging", "satellite imagery", "3d imaging", "medical imaging"],
        "annotation": ["annotation", "annotations", "annotation data", "manual annotations", "emotion annotations", "pose annotations", "3d pose annotations"],
        "text": ["text", "document", "bilingual text", "text-image interaction"],
        "3d model": ["3d model", "3d models", "3d model data", "3d model parameters", "3d reconstruction", "3d structure", "3d structures", "3d object", "3d objects", "3d mesh", "3d scan", "3d scans", "3d environment", "3d environments", "3d virtual environment", "3d visual data", "3d simulation", "3d data", "3d"],
        "classification": ["classification", "categorical", "categorical data"],
        "audio": ["audio", "audio data", "audio features", "audiovisual", "audio-visual"],
        "video": ["video", "motion capture", "motion capture data", "motion tracking", "motion data", "motion senso