## 1. Prepare the environment

### 1.1 Install SynthPops
   https://github.com/synthpops/synthpops
   python setup.py develop

### 1.2 Add target location information
a. Enter the ./synthpops/synthpops folder \
b. Open defaults.py \
c. Manually add the target location in default_data\
    (e.g., default_data =     {'australia': {
            'country_location': 'australia',
            'state_location': 'aus',
            'location': 'aus',
            'sheet_name': 'Australia',
            'nbrackets': 16,},
            ... 
            }
    )

## 2. Generate Target Region JSON File (Using Australia as Example)

In [31]:
"""
Example demonstrating how to create a location file for Australia using the synthpops data API and save as a Json file (if data is complete).
"""
import synthpops as sp


def create_location_data():
    """
    Create and return a Location object containing predefined demographic data for Australia.
    
    Returns:
        sp.Location: A Location object with comprehensive demographic distributions
                     including population age, employment, enrollment, household, 
                     school, and workplace data.
    """
    location_data = sp.Location()

    # Basic location information
    location_data.location_name = "australia-aus-aus"  # country-state-location format
    location_data.data_provenance_notices = ["This data was completely made up."]
    location_data.reference_links = ["n/a"]
    location_data.citations = ["n/a"]
    location_data.notes = ["The country in the southern hemisphere."]

    # Population age distribution (16 age groups)
    location_data.population_age_distributions = []
    location_data.population_age_distributions.append(sp.PopulationAgeDistribution())
    location_data.population_age_distributions[0].num_bins = 16
    location_data.population_age_distributions[0].distribution = [
        [0, 4, 0.066],    # Ages 0-4: 6.6% of population
        [5, 9, 0.064],    # Ages 5-9: 6.4% of population
        [10, 14, 0.064],  # Ages 10-14: 6.4% of population
        [15, 19, 0.06],   # Ages 15-19: 6.0% of population
        [20, 24, 0.061],  # Ages 20-24: 6.1% of population
        [25, 29, 0.068],  # Ages 25-29: 6.8% of population
        [30, 34, 0.073],  # Ages 30-34: 7.3% of population
        [35, 39, 0.072],  # Ages 35-39: 7.2% of population
        [40, 44, 0.064],  # Ages 40-44: 6.4% of population
        [45, 49, 0.065],  # Ages 45-49: 6.5% of population
        [50, 54, 0.061],  # Ages 50-54: 6.1% of population
        [55, 59, 0.06],   # Ages 55-59: 6.0% of population
        [60, 64, 0.056],  # Ages 60-64: 5.6% of population
        [65, 69, 0.049],  # Ages 65-69: 4.9% of population
        [70, 74, 0.043],  # Ages 70-74: 4.3% of population
        [75, 100, 0.074], # Ages 75+: 7.4% of population
    ]

    # Employment rates by age (age 0-100)
    # Children (0-14): 0% employment rate
    # Youth (15-19): 46.7% employment rate
    # Adults (20-64): varying employment rates peaking at 83.3%
    # Seniors (65+): 0% employment rate (retirement)
    location_data.employment_rates_by_age = [
        [0, 0.0], [1, 0.0], [2, 0.0], [3, 0], [4, 0], [5, 0], [6, 0], [7, 0], [8, 0], [9, 0],
        [10, 0], [11, 0], [12, 0], [13, 0], [14, 0],
        [15, 0.467], [16, 0.467], [17, 0.467], [18, 0.467], [19, 0.467],
        [20, 0.739], [21, 0.739], [22, 0.739], [23, 0.739], [24, 0.739],
        [25, 0.801], [26, 0.801], [27, 0.801], [28, 0.801], [29, 0.801],
        [30, 0.817], [31, 0.817], [32, 0.817], [33, 0.817], [34, 0.817],
        [35, 0.833], [36, 0.833], [37, 0.833], [38, 0.833], [39, 0.833],
        [40, 0.832], [41, 0.832], [42, 0.832], [43, 0.832], [44, 0.832],
        [45, 0.829], [46, 0.829], [47, 0.829], [48, 0.829], [49, 0.829],
        [50, 0.801], [51, 0.801], [52, 0.801], [53, 0.801], [54, 0.801],
        [55, 0.734], [56, 0.734], [57, 0.734], [58, 0.734], [59, 0.734],
        [60, 0.569], [61, 0.569], [62, 0.569], [63, 0.569], [64, 0.569],
        [65, 0.0], [66, 0.0], [67, 0.0], [68, 0.0], [69, 0.0],
        [70, 0.0], [71, 0.0], [72, 0.0], [73, 0.0], [74, 0.0],
        [75, 0.0], [76, 0.0], [77, 0.0], [78, 0.0], [79, 0.0],
        [80, 0.0], [81, 0.0], [82, 0.0], [83, 0.0], [84, 0.0],
        [85, 0.0], [86, 0.0], [87, 0.0], [88, 0.0], [89, 0.0],
        [90, 0.0], [91, 0.0], [92, 0.0], [93, 0.0], [94, 0.0],
        [95, 0.0], [96, 0.0], [97, 0.0], [98, 0.0], [99, 0.0], [100, 0.0],
    ]

    # School enrollment rates by age (age 0-100)
    # Early childhood (3-4): 65-82% enrollment
    # Primary/Secondary (5-17): 93-100% enrollment
    # Tertiary (18-24): declining from 71% to 39%
    # Adult education (25-65): low rates around 6-32%
    # Seniors (66+): 0% enrollment
    location_data.enrollment_rates_by_age = [
        [0, 0.0], [1, 0.0], [2, 0.0], [3, 0.65], [4, 0.82],
        [5, 0.97], [6, 1.0], [7, 0.99], [8, 1.0], [9, 1.0],
        [10, 1.0], [11, 1.0], [12, 1.0], [13, 1.0], [14, 1.0],
        [15, 1.0], [16, 0.99], [17, 0.93], [18, 0.71], [19, 0.68],
        [20, 0.68], [21, 0.63], [22, 0.54], [23, 0.46], [24, 0.39],
        [25, 0.32], [26, 0.28], [27, 0.25], [28, 0.22], [29, 0.21],
        [30, 0.16], [31, 0.16], [32, 0.16], [33, 0.16], [34, 0.16],
        [35, 0.13], [36, 0.13], [37, 0.13], [38, 0.13], [39, 0.13],
        [40, 0.06], [41, 0.06], [42, 0.06], [43, 0.06], [44, 0.06],
        [45, 0.06], [46, 0.06], [47, 0.06], [48, 0.06], [49, 0.06],
        [50, 0.06], [51, 0.06], [52, 0.06], [53, 0.06], [54, 0.06],
        [55, 0.06], [56, 0.06], [57, 0.06], [58, 0.06], [59, 0.06],
        [60, 0.06], [61, 0.06], [62, 0.06], [63, 0.06], [64, 0.06], [65, 0.06],
        [66, 0.0], [67, 0.0], [68, 0.0], [69, 0.0], [70, 0.0],
        [71, 0.0], [72, 0.0], [73, 0.0], [74, 0.0], [75, 0.0],
        [76, 0.0], [77, 0.0], [78, 0.0], [79, 0.0], [80, 0.0],
        [81, 0.0], [82, 0.0], [83, 0.0], [84, 0.0], [85, 0.0],
        [86, 0.0], [87, 0.0], [88, 0.0], [89, 0.0], [90, 0.0],
        [91, 0.0], [92, 0.0], [93, 0.0], [94, 0.0], [95, 0.0],
        [96, 0.0], [97, 0.0], [98, 0.0], [99, 0.0], [100, 0.0],
    ]

    # Overall household size distribution
    location_data.household_size_distribution = [
        [1, 0.256],  # 25.6% single-person households
        [2, 0.335],  # 33.5% two-person households
        [3, 0.159],  # 15.9% three-person households
        [4, 0.154],  # 15.4% four-person households
        [5, 0.063],  # 6.3% five-person households
        [6, 0.033]   # 3.3% six-person households
    ]

    # Household head age brackets (17 age groups from 15-99)
    location_data.household_head_age_brackets = [
        [15, 19], [20, 24], [25, 29], [30, 34], [35, 39],
        [40, 44], [45, 49], [50, 54], [55, 59], [60, 64],
        [65, 69], [70, 74], [75, 79], [80, 84], [85, 89],
        [90, 94], [95, 99],
    ]

    # Household head age distribution by family size
    # Rows represent household sizes (1-6 people)
    # Columns represent age brackets of household heads
    location_data.household_head_age_distribution_by_family_size = [
        # Single-person household
        [1, 10068, 72145, 104068, 90093, 79295, 94253, 106912, 106125, 
         96886, 100966, 97532, 108013, 83253, 85452, 64647, 20660, 3919],
        # Two-person households
        [2, 1539, 23950, 53685, 56607, 49603, 61094, 79923, 96736, 
         105028, 121537, 115099, 131243, 79927, 61624, 32333, 8766, 1417],
        # Three-person households
        [3, 510, 8030, 27060, 45668, 53944, 70564, 85047, 80689, 
         61935, 45518, 28543, 24816, 13364, 8649, 3914, 996, 160],
        # Four-person households
        [4, 128, 2139, 13076, 39484, 68134, 95425, 94718, 63032, 
         33778, 16707, 7460, 5209, 2319, 1393, 622, 159, 30],
        # Five-person households
        [5, 34, 427, 3181, 12961, 25574, 35903, 35676, 23086, 
         12143, 5227, 1826, 1076, 476, 269, 121, 36, 8],
        # Six-person households
        [6, 22, 182, 1050, 6541, 13484, 18584, 17864, 12700, 
         8472, 3094, 901, 410, 173, 99, 67, 21, 3]
    ]

    # Long-term care facility (LTCF) data - empty for this location
    location_data.ltcf_resident_to_staff_ratio_distribution = []
    location_data.ltcf_num_residents_distribution = []
    location_data.ltcf_num_staff_distribution = []
    location_data.ltcf_use_rate_distribution = []

    # School size brackets (number of students)
    location_data.school_size_brackets = [
        [1, 100],        # Small schools: 1-100 students
        [101, 500],      # Medium schools: 101-500 students
        [501, 2000],     # Large schools: 501-2000 students
        [2001, 10000]    # Very large schools: 2001-10000 students
    ]

    # Distribution of schools by size
    location_data.school_size_distribution = [
        0.623,  # 62.3% small schools
        0.305,  # 30.5% medium schools
        0.070,  # 7.0% large schools
        0.002   # 0.2% very large schools
    ]

    # School size distribution by school type (if available)
    location_data.school_size_distribution_by_type = [
        sp.SchoolSizeDistributionByType(
            school_type="pk",  # Pre-kindergarten
            size_distribution=[0.623, 0.305, 0.07, 0.002]
        ),
        sp.SchoolSizeDistributionByType(
            school_type="es",  # Elementary school
            size_distribution=[0.623, 0.305, 0.07, 0.002]
        ),
        sp.SchoolSizeDistributionByType(
            school_type="ms",  # Middle school
            size_distribution=[0.623, 0.305, 0.07, 0.002]
        ),
        sp.SchoolSizeDistributionByType(
            school_type="hs",  # High school
            size_distribution=[0.623, 0.305, 0.07, 0.002]
        ),
        sp.SchoolSizeDistributionByType(
            school_type="uv",  # University
            size_distribution=[0.623, 0.305, 0.07, 0.002]
        )
    ]

    # School types by age ranges (if available)
    location_data.school_types_by_age = [
        sp.SchoolTypeByAge(school_type="pk", age_range=[3, 5]),     # Pre-K: ages 3-5
        sp.SchoolTypeByAge(school_type="es", age_range=[6, 10]),    # Elementary: ages 6-10
        sp.SchoolTypeByAge(school_type="ms", age_range=[11, 13]),   # Middle: ages 11-13
        sp.SchoolTypeByAge(school_type="hs", age_range=[14, 17]),   # High: ages 14-17
        sp.SchoolTypeByAge(school_type="uv", age_range=[18, 100])   # University: ages 18+
    ]

    # Workplace size distribution by number of personnel
    location_data.workplace_size_counts_by_num_personnel = [
        [1, 4, 0.883],      # 88.3% very small workplaces (1-4 employees)
        [5, 19, 0.092],     # 9.2% small workplaces (5-19 employees)
        [20, 199, 0.023],   # 2.3% medium workplaces (20-199 employees)
        [200, 1000, 0.002]  # 0.2% large workplaces (200-1000 employees)
    ]

    return location_data


def main():
    """
    Main function: Create and save location data to file.
    
    This function creates demographic data for Australia and saves it
    to a JSON file that can be used by the synthpops library.
    """
    output_location_filepath = r"D:\HK_Backup\PAPER\SciDataCode\SynConNet\location\australia-aus-aus.json"
    
    print('Creating location data...')
    location_data = create_location_data()
    print('Location data creation completed.')

    print(f'Saving location data to [{output_location_filepath}]')
    sp.save_location_to_filepath(location_data, output_location_filepath)
    print('Location data saved successfully.')


if __name__ == "__main__":
    main()

Creating location data...
Location data creation completed.
Saving location data to [D:\HK_Backup\PAPER\SciDataCode\SynConNet\location\australia-aus-aus.json]
Location data saved successfully.


## 3. Generate Australia Contact Network (as an example)

In [44]:
import synthpops as sp
import sciris as sc
import numpy as np
import random
import pandas as pd
import copy
import os
import time
from collections import Counter
import warnings
warnings.filterwarnings('ignore')

### 3.1 Main functions for Contact network generation

In [None]:
#  Main function to generate initial contact network
def con_net_init(n, rand_seed, location, state_location, country_location, sheet_name, use_default):
    """
    Generate synthetic population data and its corresponding contact network based on the given parameters.

    Parameters:
      n                : Total population
      rand_seed        : Random seed
      location         : Location parameter
      state_location   : State parameter
      country_location : Country parameter
      sheet_name       : sheet name in MUestimates dataset
      use_default      : Whether to use default configuration
      
    Returns:
      PopAttr         : DataFrame with population attributes
      contact_network : DataFrame of the merged contact network
      pop             : Synthpops population object
    """
    # Build the parameter dictionary
    pars = sc.objdict(
        n=n,
        rand_seed=rand_seed,
        household_method='fixed_ages',
        smooth_ages=1,
        location=location,
        state_location=state_location,
        country_location=country_location,
        sheet_name=sheet_name,
        with_school_types=1,
        with_non_teaching_staff=True,
        use_default=use_default,
        do_make=True,
    )

    pars.school_mixing_type = {
        'pk': 'age_and_class_clustered',
        'es': 'age_and_class_clustered',
        'ms': 'age_and_class_clustered',
        'hs': 'random',
        'uv': 'random'
    }
    
    # Generate synthetic population data, including attributes and contact layers
    pop = sp.Pop(**pars)
    popdict = pop.to_dict()
    popdf = pd.DataFrame.from_dict(popdict).transpose()
    cols = [col for col in popdf.columns if col != 'contacts']
    PopAttr = popdf[cols].copy()
    PopAttr.insert(0, 'uid', PopAttr.index)
    
    # Generate contact layers
    contacts = pop.to_people().contacts
    h = pd.DataFrame(contacts[0])
    s = pd.DataFrame(contacts[1])
    w = pd.DataFrame(contacts[2])
    c = pd.DataFrame(contacts[3])
    
    # Assign contact types, 'h' for household, 's' for school, 'w' for workplace, 'c' for community
    h['ctype'], s['ctype'], w['ctype'], c['ctype'] = 'h', 's', 'w', 'c'
    
    # Merge different contact types and remove the beta column if present
    contact_network = pd.concat([h, s, w, c], axis=0)
    if 'beta' in contact_network.columns:
        del contact_network['beta']
    
    print(Counter(contact_network['ctype']))
    return PopAttr, contact_network, pop

In [None]:
def con_net_gen(PopAttr, distribution, br, dr, n, rand_seed, country_location):
    """
    Update population attributes based on birth and death rates and adjust household 
    gender and age distribution to position new mothers appropriately.

    Parameters:
        PopAttr (DataFrame): Population attributes dataframe
        distribution (dict): Death rate distribution by age groups (key: age interval "[00-01]")
        br (float): Birth rate
        dr (float): Death rate  
        n (int): Total population size
        rand_seed (int): Random seed for reproducibility
        country_location (str): Country information for subsequent processing
      
    Returns:
        DataFrame: Updated population attributes with status changes
    """
    
    # Initialize population status - all start as 'alive'
    PopAttr['status'] = 'alive'
    
    # Extract age groups and death probabilities from distribution
    age_groups = list(distribution.keys())
    death_prob = list(distribution.values())
    
    # Calculate people count in each age group and corresponding death count
    people_count = [
        len(PopAttr[PopAttr['age'].between(int(age_group[1:3]), int(age_group[4:6]))]) 
        for age_group in age_groups
    ]
    death_count = np.floor(np.array(people_count) * np.array(death_prob)).astype(int)
    
    # Mark age 0 individuals as newborns
    PopAttr.loc[PopAttr['age'] == 0, 'status'] = 'newborn'
    nb = len(PopAttr[PopAttr['age'] == 0])  # Count of newborns
    
    # Randomly select individuals to die based on age-specific death rates
    for age_group, count in zip(age_groups, death_count):
        age_min, age_max = int(age_group[1:3]), int(age_group[4:6])
        indices = PopAttr[PopAttr['age'].between(age_min, age_max)].index
        if len(indices) >= count:
            selected_indices = random.sample(indices.tolist(), count)
            PopAttr.loc[selected_indices, 'status'] = 'dead'
    
    # Process households with newborns - ensure gender diversity for reproduction
    hhid_list = list(set(PopAttr[PopAttr['age'] == 0]['hhid']))
    newborn_households = []
    gender_changes = {'female_to_male': 0, 'male_to_female': 0}
    
    for hhid in hhid_list:
        household = PopAttr[PopAttr['hhid'] == hhid].copy()
        adults = household[(household['age'] >= 18) & (household['age'] <= 65)]
        
        # Ensure gender diversity in households with only one gender among adults
        if len(adults) > 0 and len(set(adults['sex'])) == 1:
            index = random.choice(list(adults.index))
            if adults['sex'].iloc[0] == 0:  # All female -> change one to male
                household.loc[index, 'sex'] = 1
                gender_changes['female_to_male'] += 1
            else:  # All male -> change one to female
                household.loc[index, 'sex'] = 0
                gender_changes['male_to_female'] += 1
                
        newborn_households.append(household)
    
    # Combine all processed households
    newborn_combined = pd.concat(newborn_households) if newborn_households else pd.DataFrame()
    
    # Randomly assign newborn status to appropriate number of age-0 individuals
    if not newborn_combined.empty:
        age_zero_indices = newborn_combined[newborn_combined['age'] == 0].index
        sample_size = min(nb, len(age_zero_indices))
        if sample_size > 0:
            selected_newborns = np.random.choice(age_zero_indices, sample_size, replace=False)
            newborn_combined.loc[selected_newborns, 'status'] = 'newborn'
        
        # Update original dataframe with processed households
        PopAttr.loc[newborn_combined.index] = newborn_combined
    
    # Balance gender changes by modifying single-person households
    single_households = PopAttr.groupby('hhid').filter(lambda x: len(x) == 1)
    
    # Change gender for single females (to balance male additions)
    single_females = single_households[single_households['sex'] == 0]
    if len(single_females) >= gender_changes['female_to_male']:
        indices_to_change = np.random.choice(
            single_females.index, gender_changes['female_to_male'], replace=False
        )
        PopAttr.loc[indices_to_change, 'sex'] = 1
    
    # Change gender for single males (to balance female additions)  
    single_males = single_households[single_households['sex'] == 1]
    if len(single_males) >= gender_changes['male_to_female']:
        indices_to_change = np.random.choice(
            single_males.index, gender_changes['male_to_female'], replace=False
        )
        PopAttr.loc[indices_to_change, 'sex'] = 0
    
    # Ensure sex column is integer type
    PopAttr['sex'] = PopAttr['sex'].astype(int)
    
    # Separate households with and without newborns
    newborn_hhids = set(PopAttr[PopAttr['age'] == 0]['hhid'])
    hh_with_newborn = PopAttr[PopAttr['hhid'].isin(newborn_hhids)]
    hh_without_newborn = PopAttr[~PopAttr['hhid'].isin(newborn_hhids)]
    
    # Identify adults in newborn households who need age/gender adjustment
    adults_to_replace = []
    
    for hhid, household in hh_with_newborn.groupby('hhid'):
        adults = household[household['age'] > 0]
        
        if len(adults) == 1:  # Single parent household
            adult = adults.iloc[0]
            # Single female parent outside childbearing age, or any single male parent
            if (adult['sex'] == 0 and (adult['age'] < 18 or adult['age'] > 49)) or adult['sex'] == 1:
                adults_to_replace.append(adult['uid'])
                
        elif len(adults) == 2:  # Two-parent household
            genders = adults['sex'].tolist()
            if sum(genders) == 1:  # One male, one female
                female = adults[adults['sex'] == 0].iloc[0]
                if female['age'] < 18 or female['age'] > 49:  # Female outside childbearing age
                    adults_to_replace.append(female['uid'])
            elif sum(genders) == 2:  # Both male
                adults_to_replace.append(adults.iloc[0]['uid'])
            else:  # Both female - check childbearing age
                for _, adult in adults.iterrows():
                    if adult['age'] < 18 or adult['age'] > 49:
                        adults_to_replace.append(adult['uid'])
                        break
                else:  # If both females are in childbearing age, replace first one
                    adults_to_replace.append(adults.iloc[0]['uid'])
                    
        else:  # Extended family (3+ adults)
            females = adults[adults['sex'] == 0]
            if len(females) == 0:  # No females - replace first male
                adults_to_replace.append(adults.iloc[0]['uid'])
            else:  # Find female outside childbearing age
                for _, female in females.iterrows():
                    if female['age'] < 18 or female['age'] > 49:
                        adults_to_replace.append(female['uid'])
                        break
    
    # Find replacement adults (childbearing-age females) from households without newborns
    if adults_to_replace:
        replacement_candidates = hh_without_newborn.query("18 <= age <= 49 & sex == 0")
        if len(replacement_candidates) >= len(adults_to_replace):
            replacements = replacement_candidates.sample(n=len(adults_to_replace))
            
            # Store original attributes for swapping
            to_replace_data = hh_with_newborn[hh_with_newborn['uid'].isin(adults_to_replace)][['age', 'sex']].copy()
            replacement_data = replacements[['age', 'sex']].copy()
            
            # Perform the swap
            hh_with_newborn.loc[hh_with_newborn['uid'].isin(adults_to_replace), ['age', 'sex']] = replacement_data.values
            hh_without_newborn.loc[hh_without_newborn['uid'].isin(replacements['uid']), ['age', 'sex']] = to_replace_data.values
    
    # Assign "new mother" status to eligible females in newborn households
    new_mothers = []
    for hhid, household in hh_with_newborn.groupby('hhid'):
        if 0 in household['age'].values and len(household) > 1:  # Has newborn and multiple members
            eligible_mothers = household[(household['age'].between(18, 49)) & (household['sex'] == 0)]
            if not eligible_mothers.empty:
                selected_mother = eligible_mothers.sample(n=1, random_state=1)
                new_mothers.append(int(selected_mother['uid'].iloc[0]))
    
    hh_with_newborn.loc[hh_with_newborn['uid'].isin(new_mothers), 'status'] = 'new mother'
    
    # Recombine all households and sort by original index
    PopAttr = pd.concat([hh_without_newborn, hh_with_newborn]).sort_index()
    
    # Assign fractional ages to newborns (0.0-0.9 years)
    PopAttr['age'] = PopAttr['age'].astype(float)
    newborn_mask = PopAttr['age'] < 1
    PopAttr.loc[newborn_mask, 'age'] = [
        float(random.randint(0, 9) / 10) for _ in range(newborn_mask.sum())
    ]
    
    # Print status distribution summary
    print("Population status distribution:", Counter(PopAttr['status']))
    
    return PopAttr

### 3.2 Generation function

In [45]:
def main_generation(location, state_location, country_location, sheet_name, distribution, br, dr, rand_seed, experiment_count, n):
    """
    Generate synthetic populations and contact networks for a given location.
    
    Parameters:
        location (str): Location code
        state_location (str): State location code
        country_location (str): Country location name
        sheet_name (str): Sheet name in input data
        distribution (dict): Age-specific death rate distribution
        br (float): Birth rate
        dr (float): Death rate
        rand_seed (int): Random seed to start with
        experiment_count (int): Number of successful experiments to run
        n (int): Population size
    """
    successful_experiments = 0
    location_name = sheet_name

    # Synthetic Population and Contact Network Generation
    while successful_experiments < experiment_count:
        start_time = time.time()
        try:
            PopAttr, contact_network, pop = con_net_init(
                n, rand_seed, location, state_location, country_location, sheet_name, use_default=True
            )
            PopAttr = con_net_gen(PopAttr, distribution, br, dr, n, rand_seed, country_location)
            print('rand_seed:', rand_seed)

            # Create output folders if they don't exist
            country_folder_net = os.path.join('./net', location_name)
            country_folder_pop = os.path.join('./pop', location_name)
            os.makedirs(country_folder_net, exist_ok=True)
            os.makedirs(country_folder_pop, exist_ok=True)

            # Save contact network and population attributes to CSV files
            print(f"Saving {location_name} contact network and population attributes")
            net_csv = os.path.join(country_folder_net, f"{location_name}_net_{successful_experiments}.csv")
            pop_csv = os.path.join(country_folder_pop, f"{location_name}_pop_{successful_experiments}.csv")
            contact_network.to_csv(net_csv, index=False)
            columns_to_remove = ['loc', 'ltcf_res', 'ltcf_staff', 'ltcfid','wpindcode']
            PopAttr = PopAttr.drop(columns=[col for col in columns_to_remove if col in PopAttr.columns])
            PopAttr.to_csv(pop_csv, index=False)

            successful_experiments += 1
            rand_seed += 1

        except Exception as e:
            print(f"rand_seed: {rand_seed} encountered error: {e}, skipping")
            rand_seed += 1

        finally:
            elapsed_time = time.time() - start_time
            if elapsed_time > 30:
                print(f"rand_seed: {rand_seed} took {elapsed_time:.2f} seconds, exceeding 30 seconds, skipping")

    print(f"Completed {experiment_count} successful random experiments")

### 3.3 Generation sample dataset

In [None]:
if __name__ == "__main__":

    # # Location Parameters
    location = 'aus' # location
    state_location = 'aus' # State
    country_location = 'australia' # Country
    sheet_name       = 'Australia' # sheet name in MUestimates dataset
    use_default      = 0 # whether to use default configuration

    # Australia Birth Rate Distribution
    distribution = {
        "[00-04]": 0.006844,
        "[05-09]": 0.000533,
        "[10-14]": 0.000862,
        "[15-19]": 0.002951,
        "[20-24]": 0.004526,
        "[25-29]": 0.0056978,
        "[30-34]": 0.00695,
        "[35-39]": 0.009051,
        "[40-44]": 0.011234,
        "[45-49]": 0.017954,
        "[50-54]": 0.024644,
        "[55-59]": 0.036541,
        "[60-64]": 0.050775,
        "[65-69]": 0.065728,
        "[70-74]": 0.096237,
        "[75-79]": 0.114619,
        "[80-84]": 0.145152,
        "[85-89]": 0.166398,
        "[90-99]": 0.233261,
    }

    # The rates of Birth and Death in Australia 
    br = 0.0115
    dr = 0.0063

    # parameters for the main function
    pars = {
        'location': location,
        'state_location': state_location,
        'country_location': country_location,
        'sheet_name': sheet_name,
        'distribution': distribution,
        'br': br,
        'dr': dr,
        'rand_seed': 0,
        'experiment_count': 10, # counts of contact network and population attributes to generate
        'n': 10000 # population size of contact network
    }

    main_generation(**pars)



Counter({'s': 32279, 'c': 30104, 'w': 26535, 'h': 11313})
Population status distribution: Counter({'alive': 9435, 'dead': 296, 'newborn': 137, 'new mother': 132})
rand_seed: 0
Saving Australia contact network and population attributes
Counter({'s': 31998, 'c': 30031, 'w': 28846, 'h': 11281})
Population status distribution: Counter({'alive': 9436, 'dead': 304, 'newborn': 132, 'new mother': 128})
rand_seed: 1
Saving Australia contact network and population attributes
Counter({'s': 33298, 'c': 30077, 'w': 24577, 'h': 11288})
Population status distribution: Counter({'alive': 9409, 'dead': 298, 'newborn': 147, 'new mother': 146})
rand_seed: 2
Saving Australia contact network and population attributes
Counter({'s': 32358, 'c': 29973, 'w': 29401, 'h': 11309})
Population status distribution: Counter({'alive': 9432, 'dead': 297, 'newborn': 137, 'new mother': 134})
rand_seed: 3
Saving Australia contact network and population attributes
Counter({'s': 31863, 'c': 29776, 'w': 26415, 'h': 11315})
Po