## Section 0: Pre-check JSON structure for required USDM sections ---
### Ensure to run usdm_validation_full_with_logging.py first

In [None]:
import logging
from datetime import datetime

# Setup logging to file
log_file = f"upload_script_log_{datetime.now().strftime('%Y%m%d_%H%M%S')}.log"
logging.basicConfig(
    filename=log_file,
    level=logging.INFO,
    format="%(asctime)s - %(levelname)s - %(message)s",
    force=True  # Ensure reinitialization even if logging was configured before
)

logging.info("Logging initialized.")


## Study creation and study metadata patch

In [None]:
import requests
import random
import json
from typing import Dict, List, Optional, Tuple
from datetime import datetime
import copy

import logging
import csv
# Import the part of the template you want to map – here we use the study_patch template.
from import_templates import study_patch

class USDMStudyManager:
    logging.info("Logging info for section 1: Entering function __init__")
    def __init__(self, api_base_url: str, auth_token: str):
        """
        Initialize the USDMStudyManager with API configuration.
        
        Args:
            api_base_url: Base URL of the API
            auth_token: Authentication token for API access
        """
        self.api_base_url = api_base_url.rstrip('/')
        self.auth_token = auth_token
        self.headers = {
            "Authorization": f"Bearer {auth_token}",
            "Content-Type": "application/json"
        }
        self.study_uid = None  # Will be set after study creation
        
    


    logging.info("Logging info for section 2: Entering function load_usdm_data")
    def load_usdm_data(self, file_path: str) -> Dict:
        """
        Load USDM data from a JSON file.
        
        Args:
            file_path: Path to the USDM JSON file
            
        Returns:
            Dictionary containing the USDM data
        """
        with open(file_path,  encoding='utf-8') as f:
            return json.load(f)
            logging.info("Function load_usdm_data completed and returned a value.")

    logging.info("Logging info for section 3: Entering function create_study_from_usdm")
    def create_study_from_usdm(self, usdm_data: Dict) -> Optional[str]:
        """
        Create a new study from USDM data.
        
        Args:
            usdm_data: Dictionary containing USDM data
            
        Returns:
            Study UID if successful, None otherwise
        """
        study_info = usdm_data.get("study", {})
        study_versions = study_info.get("versions", [])
        # study_title and short title
        study_titles = study_versions[0].get("titles", [])
        for title in study_titles:
            if title.get("type", {}).get("decode", "") == "Official Study Title":
                title1 = title.get("text", "")
            elif title.get("type", {}).get("decode", "") == "Brief Study Title":
                title2 = title.get("text", "")
        # Prepare study creation payload
        study_payload = {
            "study_acronym": study_info.get("name", ""),  # Truncate to 20 chars if needed
            "study_subpart_acronym": None,  # Can be customized or left empty
            #"description": study_info.get("description", ""),
            "description": title1,
            "study_parent": None,
            "study_parent_part_uid": ["999"],
            "study_description": {"study_title": title1},
            "project_number": "999",
            "study_number": str(random.randint(4000, 9000))
            # "study_number":study_info.get("name","")[-4:]# Assuming this is a new study with no parent
        }

        endpoint = f"{self.api_base_url}/studies"
        response = requests.post(
            endpoint,
            headers=self.headers,
            json=study_payload
        )

        if response.status_code == 201:
            self.study_uid = response.json().get("uid")
            print(f"Successfully created study with UID: {self.study_uid}")
            return self.study_uid
            logging.info("Function create_study_from_usdm completed and returned a value.")
        else:
            print(f"Failed to create study. Status code: {response.status_code}")
            print(f"Response: {response.text}")
            return None
            logging.info("Function create_study_from_usdm completed and returned a value.")

    # study patch

    logging.info("Logging info for section 4: Entering function map_identification_from_identifiers")
    def map_identification_from_identifiers(self, identifiers: list, template: dict) -> dict:
        """
        Build the identification metadata using studyIdentifiers from the study version.
        For example:
          - The first identifier's 'text' is used for the study_number field.
          - An identifier starting with 'NCT' (if any) is used for registry_identifiers.ct_gov_id.
        Any keys not set from the identifiers retain their default values from the template.
        """
        result = {}

        if identifiers:
            # Use the first identifier's text as the generic study_number.
            

            # Search for an identifier starting with 'NCT' for registry_identifiers.ct_gov_id.
            ct_gov_id = ""
            eudract_id=""
            for ident in identifiers:
                ident_text = ident.get("text", "")
                if ident_text.startswith("NCT"):
                    ct_gov_id = ident_text
                elif ident_text.startswith("20"):
                    eudract_id = ident_text
#                     break
            result["registry_identifiers"] = {
                "ct_gov_id": ct_gov_id,
                "ct_gov_id_null_value_code": template.get("registry_identifiers", {}).get(
                    "ct_gov_id_null_value_code", {"term_uid": "string", "name": "string"}
                ),
                "eudract_id": eudract_id,
                "eudract_id_null_value_code": template.get("registry_identifiers", {}).get(
                    "eudract_id_null_value_code", {"term_uid": "string", "name": "string"}
                ),
                "universal_trial_number_utn": template.get("registry_identifiers", {}).get(
                    "universal_trial_number_utn", "string"
                ),
                "universal_trial_number_utn_null_value_code": template.get("registry_identifiers", {}).get(
                    "universal_trial_number_utn_null_value_code", {"term_uid": "string", "name": "string"}
                ),
            }

        # For any missing keys, fill in default values from the template.
        for key, default in template.items():
            if key not in result:
                result[key] = default
        return result
        logging.info("Function map_identification_from_identifiers completed and returned a value.")

    logging.info("Logging info for section 5: Entering function map_high_level_design")
    def map_high_level_design(self, study_designs: list, template: dict) -> dict:
        """
        Updated: Fetches study type and trial phase terms from external codelists and maps them accordingly,
        using codelist_uid from name['codelists'] structure.
        """
        result = {}

        if study_designs:
            design = study_designs[0]

            # Map study_type_code using studyType info
            studyType = design.get("studyType", {})
            study_type_code = studyType.get("code", "")
            studytypeuid = "string"
            studytypename = "string"

            if study_type_code:
                endpoint = f"{self.api_base_url}/ct/terms?codelist_uid=C99077&page_number=1&page_size=1000"
                response = requests.get(endpoint, headers=self.headers)
                if response.status_code == 200:
                    items = response.json().get("items", [])
                    for item in items:
                        codelists = item.get("attributes", {}).get("concept_id", "")
                        if codelists == study_type_code:
                            studytypeuid = item.get("term_uid", "string")
                            studytypename = item.get("name", {}).get("sponsor_preferred_name", "string")
                            break

            result["study_type_code"] = {
                "term_uid": studytypeuid,
                "name": studytypename,
            }
            result["study_type_null_value_code"] = template.get("study_type_null_value_code")

            # Map trial_phase_code using studyPhase info
            studyPhase = design.get("studyPhase", {})
            standardCode = studyPhase.get("standardCode", {})
            phase_code = standardCode.get("code", "")
            trial_phase_uid = "string"
            trial_phase_name = "string"

            if phase_code:
                endpoint = f"{self.api_base_url}/ct/terms?codelist_uid=C66737&page_number=1&page_size=1000"
                response = requests.get(endpoint, headers=self.headers)
                if response.status_code == 200:
                    items = response.json().get("items", [])
                    for item in items:
                        codelists = item.get("attributes", {}).get("concept_id", "")
                        if codelists == phase_code:
                            trial_phase_uid = item.get("term_uid", "string")
                            trial_phase_name = item.get("name", {}).get("sponsor_preferred_name", "string")
                            break

            result["trial_phase_code"] = {
                "term_uid": trial_phase_uid,
                "name": trial_phase_name,
            }
            result["trial_phase_null_value_code"] = template.get("trial_phase_null_value_code")
            
           # Map trial_type_code using studyPhase info
            
            # Initialize trial type codes list
            result["trial_type_codes"] = []

            # Ensure subTypes is a list of dictionaries with "code"
            subTypes = design.get("subTypes", [])
            trial_type_codes_list = [subtype.get("code", "") for subtype in subTypes if "code" in subtype]

            # Only proceed if there are codes
            if trial_type_codes_list:
                endpoint = f"{self.api_base_url}/ct/terms?codelist_uid=C66739&page_number=1&page_size=1000"
                response = requests.get(endpoint, headers=self.headers)
    
                if response.status_code == 200:
                    items = response.json().get("items", [])

                    for code in trial_type_codes_list:
                        for item in items:
                            codelist_code = item.get("attributes", {}).get("concept_id", "")
                            if codelist_code == code:
                                result["trial_type_codes"].append({
                                    "term_uid": item.get("term_uid", "string"),
                                    "name": item.get("name", {}).get("sponsor_preferred_name", "string")
                                })
                                break
                else:
                    # Optionally log error or handle missing response
                    logging.warning("Failed to fetch trial type terms from codelist API.")
            else:
                logging.info("No valid trial_type_codes found in subTypes.")

            # Add null value code
            result["trial_type_null_value_code"] = template.get("trial_type_null_value_code")

        # For any keys not explicitly set, keep the template's default.
        for key, default in template.items():
            if key not in result:
                result[key] = default
        return result
        logging.info("Function map_high_level_design completed and returned a value.")

    logging.info("Logging info for section 6: Entering function map_study_population")
    def map_study_population(self, study_designs: list, template: dict) -> dict:
        """
        Build the study_population metadata using studyDesigns from the study version.
        For example:
          - Map disease conditions or indications codes from the design's "indications" field,
            transforming each item by picking its "code" and "decode" values.
          - Map therapeutic area, number of expected subjects, planned sex, and planned age.
        Any keys not explicitly mapped retain their default values.
        """
        result = {}

        if study_designs:
            design = study_designs[0]

            # Map disease conditions or indication codes from indications (if present).
            indications = design.get("indications", [])
            standard_codes = indications[0].get("codes", []) if indications and indications[0].get("codes") else []
            if standard_codes:
                result["disease_conditions_or_indications_codes"] = [
                    {"term_uid": ind.get("code", "string"), "name": ind.get("decode", "string")}
                    for ind in standard_codes
                ]
            else:
                result["disease_conditions_or_indications_codes"] = template.get(
                    "disease_conditions_or_indications_codes", [{"term_uid": "string", "name": "string"}]
                )

            # Map therapeutic area.
            therapeutic_area = design.get("therapeuticAreas", [])
            if therapeutic_area:
                therapeutic_phase_code = therapeutic_area[0].get("decode", "")            
                therapeutic_area_uid = "string"
                therapeutic_area_name = "string"       
                if therapeutic_phase_code:
                    endpoint = f"{self.api_base_url}/ct/terms?codelist_name=Therapeutic%20area"
                    response = requests.get(endpoint, headers=self.headers)
                    if response.status_code == 200:
                        items = response.json().get("items", [])
                        for item in items:
                            codelists = item.get("name", {}).get("sponsor_preferred_name", "string")
                            if codelists == therapeutic_phase_code:
                                therapeutic_area_uid = item.get("term_uid", "string")
                                therapeutic_area_name = item.get("name", {}).get("sponsor_preferred_name", "string")
                                break

                result["therapeutic_area_codes"] = {
                    "term_uid": therapeutic_area_uid,
                    "name": therapeutic_area_name,
                }

            # Map number of expected subjects.
            population = design.get("population", {})
            enrolment_number = population.get("plannedEnrollmentNumberQuantity", {})
            if enrolment_number:
                result["number_of_expected_subjects"] = enrolment_number.get("value")

#             # Map planned sex.
            planned_sex = population.get("plannedSex", [])
            if planned_sex and planned_sex[0]:
                sex_of_participants_code = planned_sex[0].get("code", "")
                sex_of_participants_code_uid = "string"
                sex_of_participants_code_name = "string"

                if sex_of_participants_code:
                    endpoint = f"{self.api_base_url}/ct/terms?codelist_uid=C66732"
                    response = requests.get(endpoint, headers=self.headers)
                    if response.status_code == 200:
                        items = response.json().get("items", [])
                        for item in items:
                            codelists = item.get("attributes", {}).get("concept_id", "")
                            if codelists == sex_of_participants_code:
                                sex_of_participants_code_uid = item.get("term_uid", "string")
                                sex_of_participants_code_name = item.get("name", {}).get("sponsor_preferred_name", "string")
                                break

                result["sex_of_participants_code"] = {
                    "term_uid": sex_of_participants_code_uid,
                    "name": sex_of_participants_code_uid,
                }
                result["sex_of_participants_null_value_code"] = template.get("sex_of_participants_null_value_code")
            

            # Map planned age.
            planned_age = population.get("plannedAge", {})
            if planned_age:
                planned_age_min = planned_age.get("minValue", {})
                result["planned_minimum_age_of_subjects"] = {"duration_value": planned_age_min.get("value"),
                    "duration_unit_code": {"uid": "UnitDefinition_000370", "name": "years"}}
                    
                
                planned_age_max = planned_age.get("maxValue", {})
                result["planned_maximum_age_of_subjects"] = {"duration_value": planned_age_max.get("value"),
                    "duration_unit_code": {"uid": "UnitDefinition_000370", "name": "years"}}
            else:
                result["planned_minimum_age_of_subjects"] = template.get("planned_minimum_age_of_subjects")
                result["planned_maximum_age_of_subjects"] = template.get("planned_maximum_age_of_subjects")

        # For any keys not explicitly set, keep the template's default.
        for key, default in template.items():
            if key not in result:
                result[key] = default
        return result
        logging.info("Function map_study_population completed and returned a value.")

    logging.info("Logging info for section 7: Entering function map_study_intervention")
    def map_study_intervention(self, study_designs: list, template: dict) -> dict:
        """
        Updated: Fetches study intervention terms from external codelists and maps them accordingly,
        using codelist_uid from name['codelists'] structure.
        """
        result = {}

        if study_designs:
            design = study_designs[0]

            # Map intervention model code
            model = design.get("model", {})
            intervention_model_code = model.get("code", "")
            intervention_model_code_uid = "string"
            intervention_model_code_name = "string"

            if intervention_model_code:
                endpoint = f"{self.api_base_url}/ct/terms?codelist_uid=C99076&page_number=1&page_size=1000"
                response = requests.get(endpoint, headers=self.headers)
                if response.status_code == 200:
                    items = response.json().get("items", [])
                    for item in items:
                        codelists = item.get("attributes", {}).get("concept_id", "")
                        if codelists == intervention_model_code:
                            intervention_model_code_uid = item.get("term_uid", "string")
                            intervention_model_code_name = item.get("name", {}).get("sponsor_preferred_name", "string")
                            break

            result["intervention_model_code"] = {
                "term_uid": intervention_model_code_uid,
                "name": intervention_model_code_name,
            }
            result["intervention_model_null_value_code"] = template.get("intervention_model_null_value_code")

             
           # Map trial_blinding schema info
        
            blindingSchema = design.get("blindingSchema", {})
            standardCode = blindingSchema.get("standardCode", {})
            trial_blinding_schema_code = standardCode.get("code", "")
            trial_blinding_schema_code_uid = "string"
            trial_blinding_schema_code_name = "string"

            if trial_blinding_schema_code:
                endpoint = f"{self.api_base_url}/ct/terms?codelist_uid=C66735&page_number=1&page_size=1000"
                response = requests.get(endpoint, headers=self.headers)
                if response.status_code == 200:
                    items = response.json().get("items", [])
                    for item in items:
                        codelists = item.get("attributes", {}).get("concept_id", "")
                        if codelists == trial_blinding_schema_code:
                            trial_blinding_schema_code_uid = item.get("term_uid", "string")
                            trial_blinding_schema_code_name = item.get("name", {}).get("sponsor_preferred_name", "string")
                            break

            result["trial_blinding_schema_code"] = {
                "term_uid": trial_blinding_schema_code_uid,
                "name": trial_blinding_schema_code_name,
            }
            result["trial_blinding_schema_null_value_code"] = template.get("trial_blinding_schema_null_value_code")
            
#             # Initialize trial intent type codes list
            result["trial_intent_types_codes"] = []

            # Ensure intentTypes is a list of dictionaries with "code"
            intentTypes = design.get("intentTypes", [])
            trial_intent_types_codes_list = [intenttype.get("code", "") for intenttype in intentTypes if "code" in intenttype]

            # Only proceed if there are codes
            if trial_intent_types_codes_list:
                endpoint = f"{self.api_base_url}/ct/terms?codelist_uid=C66736&page_number=1&page_size=1000"
                response = requests.get(endpoint, headers=self.headers)
    
                if response.status_code == 200:
                    items = response.json().get("items", [])

                    for code in trial_intent_types_codes_list:
                        for item in items:
                            codelist_code = item.get("attributes", {}).get("concept_id", "")
                            if codelist_code == code:
                                result["trial_intent_types_codes"].append({
                                    "term_uid": item.get("term_uid", "string"),
                                    "name": item.get("name", {}).get("sponsor_preferred_name", "string")
                                })
                                break
                else:
                    # Optionally log error or handle missing response
                    logging.warning("Failed to fetch trial intent type terms from codelist API.")
            else:
                logging.info("No valid trial_intent_type_codes found in intentTypes.")

            # Add null value code
            result["trial_intent_types_null_value_code"] = template.get("trial_intent_types_null_value_code")

        # For any keys not explicitly set, keep the template's default.
        for key, default in template.items():
            if key not in result:
                result[key] = default
        return result
        logging.info("Function map_study_intervention completed and returned a value.")

    #title
    logging.info("Logging info for section 8: Entering function map_study_description")
    def map_study_description(self, study_titles: list, template: dict) -> dict:
        """
        Build the study title
        """
        result = {}

        if study_titles:
            for title in study_titles:
                if title.get("type", {}).get("decode", "") == "Official Study Title":
                    title1 = title.get("text", "")
                elif title.get("type", {}).get("decode", "") == "Brief Study Title":
                    title2 = title.get("text", "")
            result = {
                "study_title": title1,
                "study_short_title": title2,
            }
            

        # For any keys not explicitly set, keep the template's default.
        for key, default in template.items():
            if key not in result:
                result[key] = default
        return result
        logging.info("Function map_study_description completed and returned a value.")

    logging.info("Logging info for section 9: Entering function patch_study_metadata")
    def patch_study_metadata(self, study_patch_payload: Dict) -> bool:
        """
        Patch study metadata via API.
        
        Args:
            study_patch_payload: Dictionary containing study patch payload
            
        Returns:
            True if successful, False otherwise
        """
        if not self.study_uid:
            print("Study UID not available. Please create a study first.")
            return False
            logging.info("Function patch_study_metadata completed and returned a value.")

        endpoint = f"{self.api_base_url}/studies/{self.study_uid}"
        response = requests.patch(
            endpoint,
            headers=self.headers,
            json=study_patch_payload
        )

        if response.status_code == 200:
            print("Successfully updated study metadata")
            return True
            logging.info("Function patch_study_metadata completed and returned a value.")
        else:
            print(f"Failed to update study metadata. Status code: {response.status_code}")
            print(f"Response: {response.text}")
            return False
            logging.info("Function patch_study_metadata completed and returned a value.")

    logging.info("Logging info for section 10: Entering function extract_study_arms")
    def extract_study_arms(self, study_designs: list):
        """
        Extract study arms information from USDM data.

        Args:
            study_designs: List containing USDM study design data

        Returns:
            List of study arms in the format expected by the API
        """
        study_arms = []
        design = study_designs[0]
        arms = design.get("arms", [])

        for arm in arms:
            arm_type_uid = self._resolve_arm_type_uid(arm)

            study_arm = {
                "name": arm.get("name", ""),
                "short_name": arm.get("name", ""),
                "code": arm.get("name", ""),
                "description": arm.get("description", ""),
                "arm_colour": "",
                "randomization_group": arm.get("id", ""),
                "number_of_subjects": 0,
                "arm_type_uid": arm_type_uid
            }

            study_arms.append(study_arm)

        return study_arms
        logging.info("Function extract_study_arms completed and returned a value.")

    logging.info("Logging info for section 11: Entering function _resolve_arm_type_uid")
    def _resolve_arm_type_uid(self, arm: dict) -> str:
        """
        Resolve arm type UID dynamically using arm description and a GET request.

        Args:
            arm: Dictionary representing an arm from USDM

        Returns:
            The UID string of the matched arm type
        """
        keywords = ["placebo", "investigational", "comparator", "observational"]
        arm_type_decode = arm.get("type", {}).get("decode", "").lower()

        # Map 'treatment' to 'investigational'
        if "treatment" in arm_type_decode:
            arm_type_decode = "investigational"

        try:
            response = requests.get(
                f"{self.api_base_url}/ct/terms?codelist_name=Arm%20Type&is_sponsor=false&page_number=1&page_size=100"
            )
            response.raise_for_status()
            items = response.json().get("items", [])

            for keyword in keywords:
                if keyword in arm_type_decode:
                    for item in items:
                        sponsor_name = item.get("name", {}).get("sponsor_preferred_name", "").lower()
                        if keyword in sponsor_name:
                            return item.get("term_uid", "UNKNOWN_UID")
                            logging.info("Function _resolve_arm_type_uid completed and returned a value.")

        except requests.RequestException as e:
            print(f"Failed to retrieve arm type terms: {e}")

        return "UNKNOWN_UID"
        logging.info("Function _resolve_arm_type_uid completed and returned a value.")

    logging.info("Logging info for section 12: Entering function update_study_arms")
    def update_study_arms(self, study_arms: List[Dict]) -> bool:
        """
        Update study arms via API.
        """
        if not self.study_uid:
            print("Study UID not available. Please create a study first.")
            return False
            logging.info("Function update_study_arms completed and returned a value.")

        success = True
        endpoint = f"{self.api_base_url}/studies/{self.study_uid}/study-arms"

        for arm in study_arms:
            response = requests.post(
                endpoint,
                headers=self.headers,
                json=arm
            )

            if response.status_code == 201:
                print(f"Successfully created study arm: {arm['name']}")
            else:
                success = False
                print(f"Failed to create study arm {arm['name']}. Status code: {response.status_code}")
                print(f"Response: {response.text}")
#             if response.status_code == 201:
#                 self.arm_uid = response.json().get("uid")
#                 print(f"Successfully created study arm with UID: {self.arm_uid}")
#                 return self.arm_uid
        return success
        logging.info("Function update_study_arms completed and returned a value.")
            
    logging.info("Logging info for section 13: Entering function replace_aults_with_none")
    def replace_defaults_with_none(self, obj):
        """
        Recursively traverse obj (which may be a dict or list) and replace:
        - Any dictionary that exactly equals {"term_uid": "string", "name": "string"}
        or {"uid": "string", "name": "string"} with None.
        - Any value that is exactly "string" with None.
      - If the result of processing a list is exactly [None], return None.
         """
        default_dict = {"term_uid": "string", "name": "string"}
        default_dict_b = {"uid": "string", "name": "string"}
        target_string = "string"

        if isinstance(obj, dict):
        # If the whole dictionary equals one of the default dictionaries, return None.
            if obj == default_dict or obj == default_dict_b:
                return None
                logging.info("Function replace_aults_with_none completed and returned a value.")
        # Process each key-value pair.
            new_dict = {}
            for key, value in obj.items():
                if value == target_string:
                    new_dict[key] = None
                else:
                    new_dict[key] = self.replace_defaults_with_none(value)
            return new_dict
            logging.info("Function replace_aults_with_none completed and returned a value.")
        elif isinstance(obj, list):
        # Process each item in the list.
            new_list = [self.replace_defaults_with_none(item) for item in obj]
        # If the processed list is exactly [None], return None instead.
            if new_list == [None]:
                return None
                logging.info("Function replace_aults_with_none completed and returned a value.")
            return new_list
            logging.info("Function replace_aults_with_none completed and returned a value.")
        else:
            return None if obj == target_string else obj
            logging.info("Function replace_aults_with_none completed and returned a value.")

    logging.info("Logging info for section 14: Entering function run")
    def run(self, usdm_file_path: str) -> Tuple[bool, Optional[str]]:
        """
        Main method to execute the complete study import process.
        """
        try:
            # Load USDM data
            
            usdm_data = self.load_usdm_data(usdm_file_path)

            # Create study
            study_uid = self.create_study_from_usdm(usdm_data)
            if not study_uid:
                return (False, None)
                logging.info("Function run completed and returned a value.")

            # Load your JSON file (ensure the file path is correct).
            with open(usdm_file_path, "r", encoding="utf-8") as f:
                json_data = json.load(f)

            # Extract study information, including study versions.
            study_data = json_data.get("study", {})
            study_versions = study_data.get("versions", [])

            # Initialize variables for studyIdentifiers and studyDesigns.
            study_identifiers = []
            study_designs = []
            if study_versions:
                first_version = study_versions[0]
                study_identifiers = first_version.get("studyIdentifiers", [])
                study_designs = first_version.get("studyDesigns", [])
                study_titles = study_versions[0].get("titles", [])
                
        

            # Extract template sections from the study_patch template.
            identification_template = study_patch["current_metadata"]["identification_metadata"]
            high_level_design_template = study_patch["current_metadata"]["high_level_study_design"]
            study_population_template = study_patch["current_metadata"]["study_population"]
            study_intervention_template = study_patch["current_metadata"]["study_intervention"]
            study_description_template=study_patch["current_metadata"]["study_description"]

            # Map the identification metadata and high-level study design metadata.
            mapped_identification = self.map_identification_from_identifiers(study_identifiers, identification_template)
            mapped_high_level_design = self.map_high_level_design(study_designs, high_level_design_template)
            mapped_study_population = self.map_study_population(study_designs, study_population_template)
            mapped_study_intervention = self.map_study_intervention(study_designs, study_intervention_template)
            mapped_study_description=self.map_study_description(study_titles, study_description_template)
            
            # Update a deep copy of the study_patch template with the mapped sections.
            mapped_study_patch = copy.deepcopy(study_patch)
            mapped_study_patch["current_metadata"]["identification_metadata"] = mapped_identification
            mapped_study_patch["current_metadata"]["high_level_study_design"] = mapped_high_level_design
            mapped_study_patch["current_metadata"]["study_population"] = mapped_study_population
            mapped_study_patch["current_metadata"]["study_intervention"] = mapped_study_intervention
            mapped_study_patch["current_metadata"]["study_description"]  = mapped_study_description
            mapped_study_patch = self.replace_defaults_with_none(mapped_study_patch)

            study_patched = mapped_study_patch
            #print(study_patched)
            if not self.patch_study_metadata(study_patched):
                return (False, study_uid)  # Study created but metadata update failed
                logging.info("Function run completed and returned a value.")

            # Extract and update study arms.
            study_arms = self.extract_study_arms(study_designs)
            #print(study_arms)
            if study_arms:
                arms_success = self.update_study_arms(study_arms)
                if not arms_success:
                    return (False, study_uid)  # Study created but arm updates failed
                    logging.info("Function run completed and returned a value.")

            return (True, study_uid)
            logging.info("Function run completed and returned a value.")

        except Exception as e:
            print(f"An error occurred: {str(e)}")
            return (False, None)
            logging.info("Function run completed and returned a value.")


# Example usage
if __name__ == "__main__":
    # Configuration - replace with your actual values.
    API_BASE_URL = ""
    AUTH_TOKEN = ""
    USDM_FILE_PATH = "example.json"

    # Create and run the manager.
    manager = USDMStudyManager(api_base_url=API_BASE_URL, auth_token=AUTH_TOKEN)
    success, study_uid = manager.run(USDM_FILE_PATH)

    if success:
        print(f"Study import completed successfully! Study UID: {study_uid}")
    else:
        print("There were issues during study import.")
        if study_uid:
            print(f"Partial import completed. Study UID: {study_uid}")

## combine objective with endpoints

In [None]:
import requests
import json

API_BASE_URL = ""
HEADERS = {"Content-Type": "application/json"}

logging.info("Logging info for section 15: Entering function post_objectives_with_templates")
def post_objectives_with_templates(study_designs: list, study_id: str, study_uid: str):
    results = []
    design = study_designs[0]
    usdm_objectives = design.get("objectives", [])

    for obj in usdm_objectives:
        # Step 1: Create and Approve Objective Template
        template_payload = {
            "name": obj.get("text", "").replace("[", "(").replace("]", ")"),
            "guidance_text": None,
            "study_uid": study_uid,
            "library_name": "User Defined",
            "indication_uids": None,
            "is_confirmatory_testing": False,
            "category_uids": None
        }
        template_resp = requests.post(f"{API_BASE_URL}/objective-templates", headers=HEADERS, json=template_payload)
        if template_resp.status_code != 201:
            results.append({"step": "objective-template", "id": obj["id"], "error": template_resp.text})
            continue

        template_uid = template_resp.json().get("uid")
        requests.post(f"{API_BASE_URL}/objective-templates/{template_uid}/approvals?cascade=false", headers=HEADERS)

        # Step 2: Create Study Objective using Template
        is_primary = obj.get("level", {}).get("decode", "").lower() == "primary objective"
        level_uid = "C85826_OBJPRIM" if is_primary else "C85827_OBJSEC"
        objective_payload = {
            "objective_level_uid": level_uid,
            "objective_data": {
                "objective_template_uid": template_uid,
                "library_name": "User Defined"
            }
        }
        obj_resp = requests.post(f"{API_BASE_URL}/studies/{study_id}/study-objectives?create_objective=true", headers=HEADERS, json=objective_payload)
        if obj_resp.status_code >= 400:
            results.append({"step": "study-objective", "id": obj["id"], "error": obj_resp.text})
            continue

        # Step 3: Get Created Study Objective UID
        study_obj_resp = requests.get(f"{API_BASE_URL}/studies/{study_id}/study-objectives", headers=HEADERS)
        study_objective_uid = None
        for existing in study_obj_resp.json().get('items', []):
            if existing.get("objective", {}).get("name", "") == obj.get("text", ""):
                study_objective_uid = existing.get("study_objective_uid")
                break
        if not study_objective_uid:
            results.append({"step": "get-study-objective", "id": obj["id"], "error": "Study Objective not found"})
            continue

        # Step 4: For each endpoint under the objective
        for obj_end in obj.get("endpoints", []):
            # Create and Approve Endpoint Template
            endpoint_template_payload = {
                "name": obj_end.get("text", "").replace("[", "(").replace("]", ")"),
                "guidance_text": None,
                "study_uid": study_uid,
                "library_name": "User Defined",
                "indication_uids": None,
                "is_confirmatory_testing": False,
                "category_uids": None
            }
            endpoint_template_resp = requests.post(f"{API_BASE_URL}/endpoint-templates", headers=HEADERS, json=endpoint_template_payload)
            if endpoint_template_resp.status_code != 201:
                results.append({"step": "endpoint-template", "id": obj_end["id"], "error": endpoint_template_resp.text})
                continue

            endpoint_template_uid = endpoint_template_resp.json().get("uid")
            requests.post(f"{API_BASE_URL}/endpoint-templates/{endpoint_template_uid}/approvals?cascade=false", headers=HEADERS)

            # Create Study Endpoint
            is_primary_endpoint = obj_end.get("level", {}).get("decode", "").lower() == "primary endpoint"
            endpoint_level_uid = "C98772_OUTMSPRI" if is_primary_endpoint else "C98781_OUTMSSEC"

            endpoint_payload = {
                "study_objective_uid": study_objective_uid,
                "endpoint_level_uid": endpoint_level_uid,
                "endpoint_sublevel_uid": None,
                "endpoint_data": {
                    "parameter_terms": [],
                    "endpoint_template_uid": endpoint_template_uid,
                    "library_name": "User Defined"
                },
                "endpoint_units": {
                    "units": [],
                    "separator": None
                },
                "timeframe_uid": None
            }
            endpoint_resp = requests.post(f"{API_BASE_URL}/studies/{study_id}/study-endpoints?create_endpoint=true", headers=HEADERS, json=endpoint_payload)
            results.append({
                "step": "study-endpoint",
                "endpoint_id": obj_end["id"],
                "study_objective_uid": study_objective_uid,
                "template_uid": endpoint_template_uid,
                "status_code": endpoint_resp.status_code,
                "response": endpoint_resp.json() if endpoint_resp.status_code < 400 else endpoint_resp.text
            })

    return results
    logging.info("Function post_objectives_with_templates completed and returned a value.")


logging.info("Logging info for section 16: Entering function process_objectives_and_endpoints")
def process_objectives_and_endpoints(json_path: str, study_number: str):
    with open(json_path, "r", encoding="utf-8") as f:
        json_data = json.load(f)

    study_data = json_data.get("study", {})
    study_versions = study_data.get("versions", [])
    
    if not study_versions:
        raise ValueError("No study versions found in the JSON file")

    study_designs = study_versions[0].get("studyDesigns", [])
    if not study_designs:
        raise ValueError("No study designs found in the JSON file")

    return post_objectives_with_templates(study_designs, study_number, study_number)
    logging.info("Function process_objectives_and_endpoints completed and returned a value.")


# Example call
results = process_objectives_and_endpoints("example.json", "Study_XXXXXX")
print(json.dumps(results, indent=2))

# post study_elements works

In [None]:
import requests
import random
import json

logging.info("Logging info for section 17: Entering function get_random_color")
def get_random_color():
    return "#%06x" % random.randint(0x888888, 0xFFFFFF)
    logging.info("Function get_random_color completed and returned a value.")

logging.info("Logging info for section 18: Entering function post_study_elements")
def post_study_elements(study_designs: list, study_uid: str, api_base_url: str):
    endpoint = f"{api_base_url}/studies/{study_uid}/study-elements"
    headers = {"Content-Type": "application/json"}
    design = study_designs[0]
    elements = design.get("elements", [])
    results = []

    for elem in elements:
        name = elem.get("name", "")
        label = name.lower() if len(name) > 3 else elem.get("label", "").lower()
        start_rule = elem.get("transitionStartRule", {}).get("text", "")
        end_rule = elem.get("transitionEndRule", {}).get("text", None) if elem.get("transitionEndRule") else None

        # Determine code and subtype UID referrence here https://github.com/NovoNordisk-OpenSource/openstudybuilder-solution/blob/main/studybuilder-import/datafiles/sponsor_library/element/element_type_exp.csv
        if "screening" in label:
            code = "CTTerm_000143"
            subtype_uid = "CTTerm_000145"
        elif "check in" in label or "run-in" in label:
            code = "CTTerm_000143"
            subtype_uid = "CTTerm_000148"
        elif "follow up" in label or "follow-up" in label:
            code = "CTTerm_000143"
            subtype_uid = "CTTerm_000149"
        elif "wash out" in label or "wash-out" in label:
            code = "CTTerm_000143"
            subtype_uid = "CTTerm_000149"
        else:
            code = "CTTerm_000144"
            subtype_uid = "CTTerm_000147"

        element_name = name if len(name) > 3 else elem.get("label", "")

        payload = {
            "name": element_name,
            "short_name": element_name,
            "code": code,
            "description": elem.get("description", ""),
            "planned_duration": None,
            "start_rule": start_rule,
            "end_rule": end_rule,
            "element_colour": get_random_color(),
            "element_subtype_uid": subtype_uid
        }

        try:
            resp = requests.post(endpoint, headers=headers, json=payload)
            results.append({
                "element": element_name,
                "status_code": resp.status_code,
                "response": resp.json() if resp.status_code < 400 else resp.text
            })
        except Exception as e:
            results.append({
                "element": element_name,
                "status_code": "Exception",
                "response": str(e)
            })

    return results
    logging.info("Function post_study_elements completed and returned a value.")

logging.info("Logging info for section 19: Entering function process_study_elements")
def process_study_elements(json_path: str, study_number: str, api_base_url: str):
    with open(json_path, "r", encoding="utf-8") as f:
        json_data = json.load(f)

    study_data = json_data.get("study", {})
    study_versions = study_data.get("versions", [])

    if not study_versions:
        raise ValueError("No study versions found in the JSON file")

    study_designs = study_versions[0].get("studyDesigns", [])
    if not study_designs:
        raise ValueError("No study designs found in the JSON file")

    return post_study_elements(study_designs, study_number, api_base_url)
    logging.info("Function process_study_elements completed and returned a value.")

# Example usage
results = process_study_elements("example.json", "Study_XXXXXX", "http://XX/api")
print(json.dumps(results, indent=2))

## EPOCH AND VISITS

In [None]:


import requests
import json
import re
import logging
import random
from typing import Dict, List
from collections import defaultdict
import pandas as pd

logging.basicConfig(level=logging.INFO, format='%(levelname)s: %(message)s')

logging.info("Logging info for section 20: Entering function get_random_color")
def get_random_color():
    return "#%06x" % random.randint(0x888888, 0xFFFFFF)
    logging.info("Function get_random_color completed and returned a value.")

logging.info("Logging info for section 21: Entering function extract_time_unit_from_unit")
def extract_time_unit_from_unit(unit: str) -> str:
    return "UnitDefinition_000364" if unit == "day" else "UnitDefinition_000368" if unit == "week" else "UnitDefinition_000364"
    logging.info("Function extract_time_unit_from_unit completed and returned a value.")

logging.info("Logging info for section 22: Entering function determine_visit_type_uid")
def determine_visit_type_uid(label):
    label = label.lower()
    if "screening" in label:
        return "CTTerm_000184"
        logging.info("Function determine_visit_type_uid completed and returned a value.")
    elif "follow up" in label or "follow-up" in label:
        return "CTTerm_000179"
        logging.info("Function determine_visit_type_uid completed and returned a value.")
    elif "washout" in label or "wash out" in label:
        return "CTTerm_000190"
        logging.info("Function determine_visit_type_uid completed and returned a value.")
    elif "run-in" in label or "check in" in label:
        return "CTTerm_000186"
        logging.info("Function determine_visit_type_uid completed and returned a value.")
    else:
        return "CTTerm_000190"
        logging.info("Function determine_visit_type_uid completed and returned a value.")

logging.info("Logging info for section 23: Entering function fetch_contact_mode_uid")
def fetch_contact_mode_uid(decode_value: str) -> str:
    decode_map = {
        "In person": "On Site Visit",
        "Telephone call": "Phone Contact"
    }
    preferred_name = decode_map.get(decode_value)
    if not preferred_name:
        return None
        logging.info("Function fetch_contact_mode_uid completed and returned a value.")
    url = f"{api_base_url}/api/ct/terms?codelist_name=Visit%20Contact%20Mode&is_sponsor=false&page_number=1&page_size=100"
    respone = requests.get(url)
    if response.status_code == 200:
        for item in response.json().get("items", []):
            name = item.get("name", {}).get("sponsor_preferred_name", "")
            if name == preferred_name:
                return item.get("term_uid")
                logging.info("Function fetch_contact_mode_uid completed and returned a value.")
    return None
    logging.info("Function fetch_contact_mode_uid completed and returned a value.")

logging.info("Logging info for section 24: Entering function extract_day_or_week_value_dynamic_with_anchor_flag")
def extract_day_or_week_value_dynamic_with_anchor_flag(timings: list) -> dict:
    anchor_found = False
    results = {}

    for timing in timings:
        enc_id = timing.get('encounterId')
        label = (timing.get("label") or "").lower()
        value_label = (timing.get("valueLabel") or "").strip().lower()
        description = (timing.get("description") or "").lower()

        if "anchor" in description and not anchor_found:
            anchor_found = True

        day_match = re.search(r"day\s*(-?\d+)", label)
        week_match = re.search(r"week\s*(-?\d+)", label)

        if day_match:
            val = int(day_match.group(1))
            results[enc_id] = (val if anchor_found else -abs(val), "day")
            continue
        elif week_match:
            val = int(week_match.group(1))
            results[enc_id] = (val if anchor_found else -abs(val), "week")
            continue

        val_match = re.search(r"-?\d+", value_label)
        if val_match:
            val = int(val_match.group(0))
            unit = "week" if "week" in label or "week" in value_label else "day"
            results[enc_id] = (val if anchor_found else -abs(val), unit)
        else:
            results[enc_id] = (None, None)

    return results
    logging.info("Function extract_day_or_week_value_dynamic_with_anchor_flag completed and returned a value.")

logging.info("Logging info for section 25: Entering function extract_day_or_week_value_dynamic_with_anchor_flag")
def extract_day_or_week_value_dynamic_with_anchor_flag(timings: list) -> dict:
    anchor_found = False
    results = {}

    for timing in timings:
        enc_id = timing.get('encounterId')
        label = (timing.get("label") or "").lower()
        value_label = (timing.get("valueLabel") or "").strip().lower()
        description = (timing.get("description") or "").lower()

        if "anchor" in description:
            anchor_found = True
            results[enc_id] = (0, "day")
            continue

        day_match = re.search(r"day\s*(-?\d+)", label)
        week_match = re.search(r"week\s*(-?\d+)", label)

        if day_match:
            val = int(day_match.group(1))
            results[enc_id] = (val if anchor_found else -abs(val), "day")
            continue
        elif week_match:
            val = int(week_match.group(1))
            results[enc_id] = (val if anchor_found else -abs(val), "week")
            continue

        val_match = re.search(r"-?\d+", value_label)
        if val_match:
            val = int(val_match.group(0))
            unit = "week" if "week" in label or "week" in value_label else "day"
            results[enc_id] = (val if anchor_found else -abs(val), unit)
        else:
            results[enc_id] = (None, None)

    return results
    logging.info("Function extract_day_or_week_value_dynamic_with_anchor_flag completed and returned a value.")


logging.info("Logging info for section 26: Entering function finalize_timing_integration")
def finalize_timing_integration(schedule: dict, encounters: list) -> dict:
    instances = schedule.get("instances", [])
    timings = schedule.get("timings", [])

    inst_to_enc = {
        inst.get("id"): inst.get("encounterId")
        for inst in instances if inst.get("encounterId")
    }

    logging.info("Instance to Encounter mapping:")
    for k, v in inst_to_enc.items():
        logging.info(f"  {k} → {v}")

    for timing in timings:
        rel_id = timing.get("relativeFromScheduledInstanceId")
        mapped_enc_id = inst_to_enc.get(rel_id)
        if not mapped_enc_id:
            logging.warning(f"No encounterId found for timing with relativeFromScheduledInstanceId: {rel_id}")
        else:
            logging.info(f"Timing {timing.get('id')} → Encounter {mapped_enc_id}")
        timing["encounterId"] = mapped_enc_id

    valid_timings = [t for t in timings if t.get("encounterId")]
    anchor_based_timing_map = extract_day_or_week_value_dynamic_with_anchor_flag(valid_timings)

    result = {}
    for enc in encounters:
        enc_id = enc.get("id")
        val, unit = anchor_based_timing_map.get(enc_id, (None, None))
        if val is not None and unit:
            logging.info(f"Encounter {enc_id} assigned timing: {val} {unit}")
            result[enc_id] = {"value": val, "unit": unit}
        else:
            logging.warning(f"No timing value found for Encounter {enc_id}")
    return result
    logging.info("Function finalize_timing_integration completed and returned a value.")



logging.info("Logging info for section 27: Entering function post_epochs_and_visits_from_json")
def post_epochs_and_visits_from_json(study_designs, study_uid: str, api_base_url: str, mapping_df: pd.DataFrame):
    headers = {"Content-Type": "application/json"}
    design = study_designs[0]
    epochs = design.get("epochs", [])
    elements = design.get("elements", [])
    encounters = design.get("encounters", [])
    schedule = design.get("scheduleTimelines", [])[0]

    encounter_timing_map = finalize_timing_integration(schedule, encounters)
    mapping_dict = {
        subtype.lower(): etype.replace(" EPOCH TYPE", "")
        for etype, subtype in zip(mapping_df["GEN_EPOCH_TYPE"], mapping_df["GEN_EPOCH_SUB_TYPE"])
    }

    epoch_uid_map = {}
    epoch_endpoint = f"{api_base_url}/studies/{study_uid}/study-epochs"

    for index, epoc in enumerate(sorted(epochs, key=lambda e: int(re.search(r'\d+', e.get("id", "0")).group()))):
        epoch_id = epoc.get("id")
        epoch_order = index + 1  # Sequential order: 1, 2, 3, ...
        label = epoc.get("name", "").strip().lower()

        idx = next((i for i, elem in enumerate(elements) if elem.get("id") == epoch_id), None)
        start_rule = elements[idx].get("transitionStartRule", {}).get("text") if idx is not None else None
        end_rule = elements[idx].get("transitionEndRule", {}).get("text") if idx is not None else None

        epoch_type_codes = epoc.get("type", {}).get("code", "")
        matched_row = mapping_df[mapping_df["CT_CD"] == epoch_type_codes]
        if not matched_row.empty:
            epoch_type_codes = matched_row.iloc[0]["CT_CD_NEW"]

        epoch_subtype = ""
        epoch_type_name = "UNKNOWN"

        if epoch_type_codes:
            endpoint = f"{api_base_url}/ct/terms?codelist_uid=C99079&page_number=1&page_size=1000"
            response = requests.get(endpoint, headers=headers)
            if response.status_code == 200:
                items = response.json().get("items", [])
                for item in items:
                    if item.get("attributes", {}).get("concept_id", "") == epoch_type_codes:
                        epoch_subtype = item.get("term_uid", "")
                        sponsor_name = item.get("name", {}).get("sponsor_preferred_name", "").lower()
                        epoch_type_name = next((etype for keyword, etype in mapping_dict.items() if keyword in sponsor_name), "UNKNOWN")
                        break

        payload = {
            "study_uid": study_uid,
            "epoch": epoch_subtype,
            "epoch_type_name": epoch_type_name,
            "epoch_subtype": epoch_subtype,
            "description": epoc.get("description", ""),
            "start_rule": start_rule,
            "end_rule": end_rule,
            "color_hash": get_random_color(),
            "duration_unit": None,
            "order": epoch_order,
            "duration": 0
        }

        resp = requests.post(epoch_endpoint, headers=headers, json=payload)
        if resp.status_code < 400:
            uid = resp.json().get("uid")
            if uid:
                epoch_uid_map[epoch_id] = uid
                logging.info(f"Posted epoch '{label}' with UID {uid}")
        else:
            logging.warning(f"Failed to post epoch '{label}': {resp.status_code} {resp.text}")

    visit_endpoint = f"{api_base_url}/studies/{study_uid}/study-visits"
    visit_results = []
    epoch_first_visit_flag = defaultdict(lambda: True)

    # Determine global visit window unit UID based on first valid timing unit
    first_unit = None
    for timing_data in encounter_timing_map.values():
        if timing_data.get("unit") in ["day", "week"]:
            first_unit = timing_data["unit"]
            break

    global_visit_window_unit_uid = extract_time_unit_from_unit(first_unit) if first_unit else "UnitDefinition_000364"
    logging.info(f"Global visit window unit determined: {first_unit} → {global_visit_window_unit_uid}")

    for enc in encounters:
        enc_id = enc.get("id")
        epoch_id = next((inst.get("epochId") for inst in schedule["instances"] if inst.get("encounterId") == enc_id), None)
        epoch_uid = epoch_uid_map.get(epoch_id)
        if not epoch_uid:
            continue

        timing_data = encounter_timing_map.get(enc_id, {})
        time_val = timing_data.get("value")
        unit = timing_data.get("unit")
        time_unit_uid = extract_time_unit_from_unit(unit)

        label = enc.get("label", "")

        contact_modes = enc.get("contactModes", [])
        contact_mode_decode = contact_modes[0].get("decode") if contact_modes else ""
        contact_mode_uid = fetch_contact_mode_uid(contact_mode_decode) or "CTTerm_000082"

        is_milestone = epoch_first_visit_flag[epoch_uid]
        epoch_first_visit_flag[epoch_uid] = False

        visit_payload = {
            "study_epoch_uid": epoch_uid,
            "visit_type_uid": determine_visit_type_uid(label),
            "time_reference_uid": "CTTerm_000124",
            "time_value": time_val,
            "time_unit_uid": time_unit_uid,
            "visit_window_unit_uid": global_visit_window_unit_uid,
            "description": None,
            "start_rule": None,
            "end_rule": None,
            "visit_contact_mode_uid": contact_mode_uid,
            "epoch_allocation_uid": None,
            "visit_class": "SINGLE_VISIT",
            "visit_subclass": "SINGLE_VISIT",
            "is_global_anchor_visit": time_val == 0,
            "is_soa_milestone": is_milestone,
            "visit_name": None,
            "visit_short_name": None,
            "visit_number": None,
            "unique_visit_number": None,
            "repeating_frequency_uid": None,
            "visit_sublabel_reference": None,
            "consecutive_visit_group": None,
            "show_visit": True,
            "min_visit_window_value": 0,
            "max_visit_window_value": 0
        }

        try:
            resp = requests.post(visit_endpoint, headers=headers, json=visit_payload)
            status = resp.status_code
            logging.info(f"Posted visit for Encounter {enc_id} (Timing: {label}) → {status}")
            visit_results.append({
                "encounter": enc_id,
                "timing_label": label,
                "time_value": time_val,
                "status": resp.status_code
            })
        except Exception as e:
            logging.error(f"Error posting visit for {enc_id}: {str(e)}")
            visit_results.append({
                "encounter": enc_id,
                "status": "Error",
                "error": str(e)
            })

    return {
    logging.info("Function post_epochs_and_visits_from_json completed and returned a value.")
        "epoch_uid_map": epoch_uid_map,
        "visits_posted": visit_results
    }


#final call
logging.info("Logging info for section 28: Entering function process_epochs_and_visits")
def process_epochs_and_visits(json_path: str, study_uid: str, api_base_url: str, mapping_csv_path: str):
    with open(json_path, "r", encoding="utf-8") as f:
        json_data = json.load(f)

    study_data = json_data.get("study", {})
    study_versions = study_data.get("versions", [])
    if not study_versions:
        raise ValueError("No study versions found in the JSON file")

    study_designs = study_versions[0].get("studyDesigns", [])
    if not study_designs:
        raise ValueError("No study designs found in the JSON file")

    mapping_df = pd.read_csv(mapping_csv_path)
    result = post_epochs_and_visits_from_json(study_designs, study_uid, api_base_url, mapping_df)
    return result
    logging.info("Function process_epochs_and_visits completed and returned a value.")


results = process_epochs_and_visits(
    json_path="example.json",
    study_uid="Study_XXXXXX",
    api_base_url="http://example/api",
    mapping_csv_path="epoch_mapping.csv" #do not replace
)
print(json.dumps(results, indent=2))

## post criteria

In [None]:
import requests
import json
from bs4 import BeautifulSoup

logging.info("Logging info for section 29: Entering function post_criteria_with_templates")
def post_criteria_with_templates(mapped_criteria: list, study_id: str, study_uid: str, api_base_url: str):
    results = []
    headers = {"Content-Type": "application/json"}

    for crit in mapped_criteria:
        is_inclusion = crit["type"] == "inclusion"
        type_uid = "CTTerm_000028" if is_inclusion else "CTTerm_000029"

        raw_html = crit.get("text", "")
        plain_text = BeautifulSoup(raw_html, "html.parser").get_text(separator="\n")

        # Step 1: Post template
        template_payload = {
            "name": plain_text.get("text", "").replace("[", "(").replace("]", ")"),
            "guidance_text": None,
            "study_uid": study_uid,
            "library_name": "User Defined",
            "type_uid": type_uid,
            "indication_uids": None,
            "category_uids": None,
            "sub_category_uids": None
        }

        template_resp = requests.post(f"{api_base_url}/criteria-templates", headers=headers, json=template_payload)
        if template_resp.status_code != 201:
            results.append({"step": "criteria-template", "id": crit["id"], "error": template_resp.text})
            continue

        template_uid = template_resp.json().get("uid")
        requests.post(f"{api_base_url}/criteria-templates/{template_uid}/approvals?cascade=false", headers=headers)

        # Step 2: Post study criteria using the template
        criteria_payload = {
            "criteria_data": {
                "parameter_terms": [],
                "criteria_template_uid": template_uid,
                "library_name": "User Defined"
            }
        }

        crit_resp = requests.post(f"{api_base_url}/studies/{study_id}/study-criteria?create_criteria=true",
                                  headers=headers, json=criteria_payload)
        if crit_resp.status_code >= 400:
            results.append({"step": "study-criteria", "id": crit["id"], "error": crit_resp.text})
        else:
            results.append({"step": "study-criteria", "id": crit["id"], "status": "success"})

    return results
    logging.info("Function post_criteria_with_templates completed and returned a value.")

logging.info("Logging info for section 30: Entering function process_study_criteria")
def process_study_criteria(json_path: str, study_number: str, api_base_url: str):
    with open(json_path, "r", encoding="utf-8") as f:
        json_data = json.load(f)

    study = json_data.get("study", {})
    versions = study.get("versions", [])
    if not versions:
        raise ValueError("No study versions found")

    version = versions[0]
    study_designs = version.get("studyDesigns", [])
    if not study_designs:
        raise ValueError("No study designs found")

    criteria_texts = version.get("eligibilityCriterionItems", [])
    text_map = {c["id"]: c["text"] for c in criteria_texts}

    mapped_criteria = []
    for design in study_designs:
        for crit in design.get("eligibilityCriteria", []):
            cat = crit.get("category", {}).get("decode", "").lower()
            crit_type = "inclusion" if cat.startswith("in") else "exclusion"
            item_id = crit.get("criterionItemId")
            raw_text = text_map.get(item_id, "")
            mapped_criteria.append({
                "id": item_id,
                "type": crit_type,
                "text": raw_text
            })

    return post_criteria_with_templates(mapped_criteria, study_number, study_number, api_base_url)
    logging.info("Function process_study_criteria completed and returned a value.")

# ✅ Example usage
results = process_study_criteria("example.json", "Study_XXXXXX", "http://XX/api")
print(json.dumps(results, indent=2))




## Posting activities

In [None]:
import json
import requests
import logging
from difflib import get_close_matches

logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

BASE_URL = ""
HEADERS = {
    "Content-Type": "application/json",
    "Authorization": "Bearer YOUR_TOKEN"
}

logging.info("Logging info for section 31: Entering function get_json_from_file")
def get_json_from_file(file_path):
    with open(file_path, 'r', encoding='utf-8') as f:
        return json.load(f)
        logging.info("Function get_json_from_file completed and returned a value.")

logging.info("Logging info for section 32: Entering function get_activity_by_id")
def get_activity_by_id(activities, act_id):
    return next((a for a in activities if a.get("id") == act_id), None)
    logging.info("Function get_activity_by_id completed and returned a value.")

logging.info("Logging info for section 33: Entering function search_frontend_activity")
def search_frontend_activity(name):
    response = requests.get(f"{BASE_URL}/concepts/activities/activities?page_number=1&page_size=1000", headers=HEADERS)
    if response.status_code != 200:
        logging.error("Could not retrieve activities from frontend")
        return None
        logging.info("Function search_frontend_activity completed and returned a value.")
    items = response.json().get("items", [])
    match = get_close_matches(name.lower(), [i.get("name", "").lower() for i in items], n=1, cutoff=0.6)
    if match:
        for i in items:
            if i.get("name", "").lower() == match[0]:
                return i
                logging.info("Function search_frontend_activity completed and returned a value.")
    return None
    logging.info("Function search_frontend_activity completed and returned a value.")

logging.info("Logging info for section 34: Entering function match_synonym_to_activity")
def match_synonym_to_activity(synonyms):
    response = requests.get(f"{BASE_URL}/concepts/activities/activities?page_number=1&page_size=1000", headers=HEADERS)
    if response.status_code != 200:
        logging.error("Failed to fetch frontend activities for synonym match")
        return None
        logging.info("Function match_synonym_to_activity completed and returned a value.")
    items = response.json().get("items", [])
    synonym_set = set(s.lower() for s in synonyms)
    for item in items:
        name = item.get("name", "").lower()
        if any(get_close_matches(name, synonym_set, n=1, cutoff=0.6)):
            return item
            logging.info("Function match_synonym_to_activity completed and returned a value.")
    return None
    logging.info("Function match_synonym_to_activity completed and returned a value.")

logging.info("Logging info for section 35: Entering function get_posted_study_activity_uids")
def get_posted_study_activity_uids(study_uid):
    endpoint = f"{BASE_URL}/studies/{study_uid}/study-activities?page_number=1&page_size=1000"
    response = requests.get(endpoint, headers=HEADERS)
    if response.status_code != 200:
        logging.warning(f"Could not fetch posted study activities for study {study_uid}")
        return set()
        logging.info("Function get_posted_study_activity_uids completed and returned a value.")
    items = response.json().get("items", [])
    return set(item["activity_uid"] for item in items if "activity_uid" in item)
    logging.info("Function get_posted_study_activity_uids completed and returned a value.")

logging.info("Logging info for section 36: Entering function post_study_activity")
def post_study_activity(study_uid, group_uid, subgroup_uid, activity_uid, posted_uids):
    if activity_uid in posted_uids:
        logging.info(f"Skipping posting activity {activity_uid} as this is already posted.")
        return
        logging.info("Function post_study_activity completed and returned a value.")
    post_payload = {
        "soa_group_term_uid": "CTTerm_000067",
        "activity_uid": activity_uid,
        "activity_subgroup_uid": subgroup_uid,
        "activity_group_uid": group_uid,
        "activity_instance_uid": None
    }
    endpoint = f"{BASE_URL}/studies/{study_uid}/study-activities"
    response = requests.post(endpoint, json=post_payload, headers=HEADERS)
    if response.ok:
        logging.info(f"Posted activity {activity_uid} to study.")
        posted_uids.add(activity_uid)
    else:
        logging.error(f"Failed to post activity {activity_uid}: {response.text}")

logging.info("Logging info for section 37: Entering function get_or_create_group")
def get_or_create_group(group_name):
    clean_name = group_name.lower().replace("grouping activity", "").strip()
    target_name = group_name.upper() if clean_name.startswith("tbd") else clean_name

    response = requests.get(f"{BASE_URL}/concepts/activities/activity-groups?page_number=1&page_size=1000", headers=HEADERS)
    response.raise_for_status()
    groups = response.json().get("items", [])

    for group in groups:
        if group.get("name", "").lower().strip() == target_name.lower().strip():
            group_uid = group.get("uid")
            logging.info(f"Found existing group: {target_name} with UID: {group_uid}")
            return group_uid
            logging.info("Function get_or_create_group completed and returned a value.")
    else:
        payload = {
            "name": target_name,
            "name_sentence_case": clean_name.lower(),
            "definition": f"Auto-generated group for {clean_name}",
            "abbreviation": clean_name[:3].upper(),
            "library_name": "Requested"
        }

        response = requests.post(f"{BASE_URL}/concepts/activities/activity-groups", json=payload, headers=HEADERS)
        response.raise_for_status()
        group_uid = response.json().get("uid")
        logging.info(f"Created new group: {target_name} with UID: {group_uid}")
        requests.post(f"{BASE_URL}/concepts/activities/activity-groups/{group_uid}/approvals?cascade=false", headers=HEADERS)
        return group_uid
        logging.info("Function get_or_create_group completed and returned a value.")




logging.info("Logging info for section 38: Entering function get_or_create_subgroup")
def get_or_create_subgroup(subgroup_name, group_uid):
    clean_name = subgroup_name.lower().replace("grouping activity", "").strip()
    target_name = subgroup_name.upper() if clean_name.startswith("tbd") else clean_name

    response = requests.get(f"{BASE_URL}/concepts/activities/activity-sub-groups?page_number=1&page_size=1000", headers=HEADERS)
    response.raise_for_status()
    subgroups = response.json().get("items", [])

    for sg in subgroups:
        name = sg.get("name", "").lower().strip()
        linked_groups = sg.get("activity_groups", [])
        logging.debug(f"Checking subgroup: {name} (UID: {sg.get('uid')}) linked to groups: {linked_groups}")

        if name == target_name.lower().strip():# and group_uid in linked_groups:
            subgroup_uid = sg.get("uid")
            logging.info(f"Found existing subgroup: {target_name} with UID: {subgroup_uid} linked to group UID: {group_uid}")
            return subgroup_uid
            logging.info("Function get_or_create_subgroup completed and returned a value.")
    else:
        payload = {
            "name": target_name,
            "name_sentence_case": clean_name.lower(),
            "definition": f"Auto-generated subgroup for {clean_name}",
            "abbreviation": clean_name[:3].upper(),
            "library_name": "Requested",
            "activity_groups": [group_uid]
        }

        response = requests.post(f"{BASE_URL}/concepts/activities/activity-sub-groups", json=payload, headers=HEADERS)
        response.raise_for_status()
        subgroup_uid = response.json().get("uid")
        logging.info(f"Created new subgroup: {target_name} with UID: {subgroup_uid}")
        requests.post(f"{BASE_URL}/concepts/activities/activity-sub-groups/{subgroup_uid}/approvals?cascade=false", headers=HEADERS)
        return subgroup_uid
        logging.info("Function get_or_create_subgroup completed and returned a value.")



logging.info("Logging info for section 39: Entering function create_activity_if_not_exist")
def create_activity_if_not_exist(name, label, group_uid, subgroup_uid, study_number):
    payload = {
        "name": name,
        "name_sentence_case": name.lower(),
        "definition": label,
        "abbreviation": None,
        "library_name": "Requested",
        "activity_groupings": [
            {
                "activity_group_uid": group_uid,
                "activity_subgroup_uid": subgroup_uid
            }
        ],
        "synonyms": [],
        "request_rationale": f"Needed for study {study_number}",
        "is_request_final": False,
        "is_data_collected": False,
        "is_multiple_selection_allowed": False
    }
    response = requests.post(f"{BASE_URL}/concepts/activities/activities", json=payload, headers=HEADERS)
    response.raise_for_status()
    activity_uid = response.json().get("uid")
    logging.info(f"Created new activity: {name} (UID: {activity_uid})")
    requests.post(f"{BASE_URL}/concepts/activities/activities/{activity_uid}/approvals?cascade=false", headers=HEADERS)
    return activity_uid
    logging.info("Function create_activity_if_not_exist completed and returned a value.")

                                
logging.info("Logging info for section 40: Entering function main")
def main(file_path, study_uid, study_number):
    usdm = get_json_from_file(file_path)
    study_version = usdm["study"]["versions"][0]
    study_designs = study_version.get("studyDesigns", [])
    posted_uids = get_posted_study_activity_uids(study_uid)

    for design in study_designs:
        activities = design.get("activities", [])
        biomedical_concepts = study_version.get("biomedicalConcepts", [])

        for act in activities:
            label = act.get("label", "") or act.get("description", "")
            description = act.get("description", "") or act.get("label", "")
            name = act.get("name")

            # Use name as fallback if label and description are null
            if not label and not description:
                label = name
                description = name

            bc_ids = act.get("biomedicalConceptIds")

            if "grouping activity" in (description or "").lower():
                logging.info(f"Processing grouping activity: {description}")
                for child_id in act.get("childIds", []):
                    child_act = get_activity_by_id(activities, child_id)
                    if not child_act:
                        logging.warning(f"Child ID {child_id} not found in activities")
                        continue
                    child_label = child_act.get("label") or child_act.get("description") or child_act.get("name")
                    child_bc_ids = child_act.get("biomedicalConceptIds")
                    if not child_bc_ids:
                        matched_act = search_frontend_activity(child_label)
                        if matched_act:
                            grouping = matched_act.get("activity_groupings", [])[0]
                            post_study_activity(study_uid, grouping["activity_group_uid"], grouping["activity_subgroup_uid"], matched_act["uid"], posted_uids)
                        else:
                            group_uid = get_or_create_group(description)
                            subgroup_uid = get_or_create_subgroup(description, group_uid)
                            new_uid = create_activity_if_not_exist(child_label, child_label, group_uid, subgroup_uid, study_number)
                            post_study_activity(study_uid, group_uid, subgroup_uid, new_uid, posted_uids)
                    else:
                        for bc_id in child_bc_ids:
                            bc = next((b for b in biomedical_concepts if b.get("id") == bc_id), None)
                            if bc:
                                match = match_synonym_to_activity(bc.get("synonyms", []))
                                if match:
                                    grouping = match.get("activity_groupings", [])[0]
                                    post_study_activity(study_uid, grouping["activity_group_uid"], grouping["activity_subgroup_uid"], match["uid"], posted_uids)

            else:
                if not bc_ids:
                    matched_act = search_frontend_activity(label)
                    if matched_act:
                        grouping = matched_act.get("activity_groupings", [])[0]
                        post_study_activity(study_uid, grouping["activity_group_uid"], grouping["activity_subgroup_uid"], matched_act["uid"], posted_uids)
                    else:
                        tbd_name = f"TBD_{study_number}"
                        group_uid = get_or_create_group(tbd_name)
                        subgroup_uid = get_or_create_subgroup(tbd_name, group_uid)
                        new_uid = create_activity_if_not_exist(label, label, group_uid, subgroup_uid, study_number)
                        post_study_activity(study_uid, group_uid, subgroup_uid, new_uid, posted_uids)
                else:
                    for bc_id in bc_ids:
                        bc = next((b for b in biomedical_concepts if b.get("id") == bc_id), None)
                        if bc:
                            match = match_synonym_to_activity(bc.get("synonyms", []))
                            if match:
                                grouping = match.get("activity_groupings", [])[0]
                                post_study_activity(study_uid, grouping["activity_group_uid"], grouping["activity_subgroup_uid"], match["uid"], posted_uids)

main("example.json", "Study_xxxxxx", "999-8807") #replace 999-8807 with study number in OSB
