### TODO
- Now we can take the special cases we added to info_grupos back to frac_vars.xlsx since we have them in cols to avoid.
- The HelperFunctions can raise Errors or warning that help us identify null values, mismatching vars and things like that.

In [93]:
import copy
import datetime as dt
import importlib # needed so that we can reload packages
import matplotlib.pyplot as plt
import os, os.path
import numpy as np
import pandas as pd
import pathlib
import sys
import time
import pickle
from typing import Union
import warnings
from datetime import datetime
warnings.filterwarnings("ignore")
from info_grupos import empirical_vars_to_avoid, frac_vars_special_cases_list
from genera_muestra import GenerateLHS
from utils import HelperFunctions

##  IMPORT SISEPUEDE EXAMPLES AND TRANSFORMERS

from sisepuede.manager.sisepuede_examples import SISEPUEDEExamples
from sisepuede.manager.sisepuede_file_structure import SISEPUEDEFileStructure
import sisepuede.core.support_classes as sc
import sisepuede.transformers as trf
import sisepuede.utilities._plotting as spu
import sisepuede.utilities._toolbox as sf
import sisepuede as si

In [94]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [95]:
target_country = 'croatia'
experiment_id = 1


In [96]:

FILE_PATH = os.getcwd()
build_path = lambda PATH : os.path.abspath(os.path.join(*PATH))

DATA_PATH = build_path([FILE_PATH, "..", "data"])
OUTPUT_PATH = build_path([FILE_PATH, "..", "output"])

SSP_OUTPUT_PATH = build_path([OUTPUT_PATH, "ssp"])

REAL_DATA_FILE_PATH = build_path([DATA_PATH, "real_data.csv"]) 

SALIDAS_EXPERIMENTOS_PATH = build_path([OUTPUT_PATH, "experiments"]) 

INPUTS_ESTRESADOS_PATH = build_path([SALIDAS_EXPERIMENTOS_PATH, "sim_inputs"])
OUTPUTS_ESTRESADOS_PATH = build_path([SALIDAS_EXPERIMENTOS_PATH, "sim_outputs"])
helper_functions = HelperFunctions()

helper_functions.ensure_directory_exists(INPUTS_ESTRESADOS_PATH)
helper_functions.ensure_directory_exists(OUTPUTS_ESTRESADOS_PATH)

Directory already exists: /home/tony-ubuntu/decision_sciences/lhs_ssp_sampling/output/experiments/sim_inputs
Directory already exists: /home/tony-ubuntu/decision_sciences/lhs_ssp_sampling/output/experiments/sim_outputs


In [97]:
### Cargamos datos de ejemplo de costa rica

examples = SISEPUEDEExamples()
cr = examples("input_data_frame")

In [98]:
df_input = pd.read_csv(REAL_DATA_FILE_PATH)
df_input.head()

Unnamed: 0,region,iso_code3,period,area_gnrl_country_ha,avgload_trns_freight_tonne_per_vehicle_aviation,avgload_trns_freight_tonne_per_vehicle_rail_freight,avgload_trns_freight_tonne_per_vehicle_road_heavy_freight,avgload_trns_freight_tonne_per_vehicle_water_borne,avgmass_lvst_animal_buffalo_kg,avgmass_lvst_animal_cattle_dairy_kg,...,yf_agrc_fruits_tonne_ha,yf_agrc_herbs_and_other_perennial_crops_tonne_ha,yf_agrc_nuts_tonne_ha,yf_agrc_other_annual_tonne_ha,yf_agrc_other_woody_perennial_tonne_ha,yf_agrc_pulses_tonne_ha,yf_agrc_rice_tonne_ha,yf_agrc_sugar_cane_tonne_ha,yf_agrc_tubers_tonne_ha,yf_agrc_vegetables_and_vines_tonne_ha
0,croatia,HRV,0,8807000,70,2923,31.751466,6468,315,508,...,5.546667,28.8742,0.602367,1.930675,0.5205,2.638183,0,0,35.7648,21.067738
1,croatia,HRV,1,8807000,70,2923,31.751466,6468,315,508,...,5.555383,29.6558,0.4799,2.30405,0.7342,3.256933,0,0,47.5766,20.412554
2,croatia,HRV,2,8807000,70,2923,31.751466,6468,315,508,...,4.304906,30.039,0.3604,2.07595,0.8615,3.199083,0,0,41.0978,22.329531
3,croatia,HRV,3,8807000,70,2923,31.751466,6468,315,508,...,6.272229,30.039,0.1603,2.662133,0.9852,3.26668,0,0,37.42445,21.5058
4,croatia,HRV,4,8807000,70,2923,31.751466,6468,315,508,...,5.878853,30.039,0.196325,2.543567,0.9045,4.06804,0,0,39.8149,21.874247


In [99]:
df_input = df_input.rename(columns={'period':'time_period'})
df_input = helper_functions.add_missing_cols(cr, df_input.copy())
df_input = df_input.drop(columns='iso_code3')

In [100]:
df_input.head()

Unnamed: 0,region,time_period,area_gnrl_country_ha,avgload_trns_freight_tonne_per_vehicle_aviation,avgload_trns_freight_tonne_per_vehicle_rail_freight,avgload_trns_freight_tonne_per_vehicle_road_heavy_freight,avgload_trns_freight_tonne_per_vehicle_water_borne,avgmass_lvst_animal_buffalo_kg,avgmass_lvst_animal_cattle_dairy_kg,avgmass_lvst_animal_cattle_nondairy_kg,...,nemomod_entc_input_activity_ratio_fuel_production_fp_hydrogen_electrolysis_water,nemomod_entc_input_activity_ratio_fuel_production_fp_hydrogen_reformation_ccs_electricity,energydensity_gravimetric_enfu_gj_per_tonne_fuel_ammonia,energydensity_gravimetric_enfu_gj_per_tonne_fuel_water,frac_trns_fuelmix_water_borne_ammonia,nemomod_entc_output_activity_ratio_fuel_production_fp_ammonia_production_ammonia,nemomod_entc_output_activity_ratio_fuel_production_fp_hydrogen_reformation_ccs_hydrogen,nemomod_entc_frac_min_share_production_fp_hydrogen_reformation_ccs,nemomod_entc_input_activity_ratio_fuel_production_fp_hydrogen_reformation_ccs_natural_gas,nemomod_entc_input_activity_ratio_fuel_production_fp_hydrogen_reformation_ccs_oil
0,croatia,0,8807000,70,2923,31.751466,6468,315,508,303,...,4e-06,0,18.6,5e-05,0.0,1,1,0.0,1.315,0.0
1,croatia,1,8807000,70,2923,31.751466,6468,315,508,303,...,4e-06,0,18.6,5e-05,0.0,1,1,0.0,1.315,0.0
2,croatia,2,8807000,70,2923,31.751466,6468,315,508,303,...,4e-06,0,18.6,5e-05,0.0,1,1,0.0,1.315,0.0
3,croatia,3,8807000,70,2923,31.751466,6468,315,508,303,...,4e-06,0,18.6,5e-05,0.0,1,1,0.0,1.315,0.0
4,croatia,4,8807000,70,2923,31.751466,6468,315,508,303,...,4e-06,0,18.6,5e-05,0.0,1,1,0.0,1.315,0.0


In [101]:
# Double checking that our df is in the correct shape MAKE SURE THIS IS OK THEY HAVE TO BE EQUAL!
helper_functions.compare_dfs(cr, df_input)

Columns in df1 but not in df2: set()
Columns in df2 but not in df1: set()
Columns shared in both df1 and df2: {'frac_lsmm_n_loss_leaching_poultry_manure', 'frac_trww_n_removed_untreated_with_sewerage', 'avgmass_lvst_animal_chickens_kg', 'physparam_lvst_bo_cattle_dairy_kg_ch4_kg_manure', 'frac_lvst_livestock_demand_imported_horses', 'elecfuelefficiency_trns_water_borne_km_per_kwh', 'prodinit_ippu_recycled_glass_tonne', 'ef_ippu_tonne_ch2f2_per_tonne_production_electronics', 'nemomod_entc_total_annual_min_capacity_investment_pp_gas_ccs_gw', 'nemomod_entc_input_activity_ratio_fuel_production_me_natural_gas_diesel', 'ef_ippu_tonne_c3h3f5_per_mmm_gdp_product_use_ods_refrigeration', 'consumpinit_inen_energy_tj_per_tonne_production_mining', 'frac_trns_fuelmix_road_heavy_regional_diesel', 'fuelefficiency_trns_road_light_hydrocarbon_gas_liquids_km_per_litre', 'frac_agrc_other_annual_cl1_tropical', 'ef_fgtv_production_flaring_tonne_co2_per_m3_fuel_natural_gas', 'nemomod_entc_capital_cost_pp_coal

In [102]:
# Checking if there are any columns with null values in it
helper_functions.get_cols_with_nans(df_input)

[]


[]

In [103]:
columns_all_999 = df_input.columns[(df_input == -999).any()].tolist()
columns_all_999

['frac_entc_max_elec_production_increase_to_satisfy_msp_pp_biogas',
 'frac_entc_max_elec_production_increase_to_satisfy_msp_pp_biomass',
 'frac_entc_max_elec_production_increase_to_satisfy_msp_pp_coal',
 'frac_entc_max_elec_production_increase_to_satisfy_msp_pp_coal_ccs',
 'frac_entc_max_elec_production_increase_to_satisfy_msp_pp_gas',
 'frac_entc_max_elec_production_increase_to_satisfy_msp_pp_gas_ccs',
 'frac_entc_max_elec_production_increase_to_satisfy_msp_pp_geothermal',
 'frac_entc_max_elec_production_increase_to_satisfy_msp_pp_hydropower',
 'frac_entc_max_elec_production_increase_to_satisfy_msp_pp_nuclear',
 'frac_entc_max_elec_production_increase_to_satisfy_msp_pp_ocean',
 'frac_entc_max_elec_production_increase_to_satisfy_msp_pp_oil',
 'frac_entc_max_elec_production_increase_to_satisfy_msp_pp_solar',
 'frac_entc_max_elec_production_increase_to_satisfy_msp_pp_waste_incineration',
 'frac_entc_max_elec_production_increase_to_satisfy_msp_pp_wind',
 'limit_gnrl_annual_emissions_mt_ch

In [104]:
empirical_vars_to_avoid

['area_gnrl_country_ha',
 'gdp_mmm_usd',
 'occrateinit_gnrl_occupancy',
 'population_gnrl_rural',
 'population_gnrl_urban',
 'lndu_reallocation_factor']

In [105]:
df_input['lndu_reallocation_factor'].head()

0    0
1    0
2    0
3    0
4    0
Name: lndu_reallocation_factor, dtype: int64

In [106]:
# Avoid land use stuff and some frac special cases
pij_cols = [col for col in df_input.columns if col.startswith('pij')]
cols_to_avoid = pij_cols + frac_vars_special_cases_list + columns_all_999 + empirical_vars_to_avoid
cols_to_stress = helper_functions.get_indicators_col_names(df_input, cols_with_issue=cols_to_avoid)

In [107]:
df_input['ef_lndu_conv_croplands_to_forests_secondary_gg_co2_ha'].head()

0    0
1    0
2    0
3    0
4    0
Name: ef_lndu_conv_croplands_to_forests_secondary_gg_co2_ha, dtype: int64

In [108]:
# # Add a small epsilon to variables that are full of zeros but not frac_ columns
# epsilon = 1e-6
# non_frac_cols_to_stress = [col for col in cols_to_stress if not col.startswith('frac_')]
# for col in non_frac_cols_to_stress:
#     if (df_input[col] == 0).all():  # Check if the column is full of zeros
#         df_input[col] += epsilon  # Add epsilon to the entire column

In [109]:
df_input['ef_lndu_conv_croplands_to_forests_secondary_gg_co2_ha'].head()

0    0
1    0
2    0
3    0
4    0
Name: ef_lndu_conv_croplands_to_forests_secondary_gg_co2_ha, dtype: int64

In [110]:
# Defines upper bound to pass to GenerateLHS
u_bound = 2

# Defines number of sample vectors that GenerateLHS will create
n_arrays = 100
sampling_file_path = os.path.join('sampling_files', f'sample_scaled_{n_arrays}_{u_bound}.pickle') 

# Generates sampling matrix
if not os.path.exists(sampling_file_path):
    # Generates sampling matrix if it does not exist
    generate_sample = GenerateLHS(n_arrays, n_var=len(cols_to_stress), u_bound=u_bound)
    generate_sample.generate_sample()

# Load the sampling matrix
with open(sampling_file_path, 'rb') as handle:
    sample_scaled = pickle.load(handle)

In [111]:
lhs_sampling_array = sample_scaled[experiment_id]

In [112]:
print(lhs_sampling_array)
print(lhs_sampling_array.shape)

[0.68131578 0.56667986 1.26215516 ... 1.01133995 0.57521847 1.51992531]
(2033,)


In [113]:
# # Plotting the histogram
# plt.figure(figsize=(8, 5))
# plt.hist(sample_scaled[1], bins=30, edgecolor='black', alpha=0.7)
# plt.title('Histogram of Sampled Random Values')
# plt.xlabel('Value')
# plt.ylabel('Frequency')
# plt.grid(axis='y', alpha=0.75)
# plt.show()

In [114]:
# Creating new df with the sampled data
stressed_df = df_input.copy()

In [115]:
cols_to_review = [
 'frac_enfu_minimum_fuel_to_electricity_fuel_biogas',
 'frac_enfu_minimum_fuel_to_electricity_fuel_waste',
 'frac_inen_energy_plastic_coal',
 'frac_inen_energy_plastic_coke',
 'frac_inen_energy_plastic_diesel',
 'frac_inen_energy_plastic_electricity',
 'frac_inen_energy_plastic_furnace_gas',
 'frac_inen_energy_plastic_gasoline',
 'frac_inen_energy_plastic_hydrocarbon_gas_liquids',
 'frac_inen_energy_plastic_hydrogen',
 'frac_inen_energy_plastic_kerosene',
 'frac_inen_energy_plastic_natural_gas',
 'frac_inen_energy_plastic_oil',
 'frac_inen_energy_plastic_solar',
 'frac_inen_energy_plastic_solid_biomass',
 'frac_inen_energy_recycled_plastic_coal',
 'frac_inen_energy_recycled_plastic_coke',
 'frac_inen_energy_recycled_plastic_diesel',
 'frac_inen_energy_recycled_plastic_electricity',
 'frac_inen_energy_recycled_plastic_furnace_gas',
 'frac_inen_energy_recycled_plastic_gasoline',
 'frac_inen_energy_recycled_plastic_hydrocarbon_gas_liquids',
 'frac_inen_energy_recycled_plastic_hydrogen',
 'frac_inen_energy_recycled_plastic_kerosene',
 'frac_inen_energy_recycled_plastic_natural_gas',
 'frac_inen_energy_recycled_plastic_oil',
 'frac_inen_energy_recycled_plastic_solar',
 'frac_inen_energy_recycled_plastic_solid_biomass',
 'frac_ippu_production_with_co2_capture_cement',
 'frac_ippu_production_with_co2_capture_chemicals',
 'frac_ippu_production_with_co2_capture_glass',
 'frac_ippu_production_with_co2_capture_lime_and_carbonite',
 'frac_ippu_production_with_co2_capture_metals',
 'frac_ippu_production_with_co2_capture_plastic',
 'frac_trww_n_removed_untreated_no_sewerage',
 'frac_trww_n_removed_untreated_with_sewerage',
 'frac_trww_p_removed_untreated_no_sewerage',
 'frac_trww_p_removed_untreated_with_sewerage',
 'frac_trww_tow_removed_untreated_no_sewerage',
 'frac_trww_tow_removed_untreated_with_sewerage']

In [116]:
stressed_df[cols_to_review].head()

Unnamed: 0,frac_enfu_minimum_fuel_to_electricity_fuel_biogas,frac_enfu_minimum_fuel_to_electricity_fuel_waste,frac_inen_energy_plastic_coal,frac_inen_energy_plastic_coke,frac_inen_energy_plastic_diesel,frac_inen_energy_plastic_electricity,frac_inen_energy_plastic_furnace_gas,frac_inen_energy_plastic_gasoline,frac_inen_energy_plastic_hydrocarbon_gas_liquids,frac_inen_energy_plastic_hydrogen,...,frac_ippu_production_with_co2_capture_glass,frac_ippu_production_with_co2_capture_lime_and_carbonite,frac_ippu_production_with_co2_capture_metals,frac_ippu_production_with_co2_capture_plastic,frac_trww_n_removed_untreated_no_sewerage,frac_trww_n_removed_untreated_with_sewerage,frac_trww_p_removed_untreated_no_sewerage,frac_trww_p_removed_untreated_with_sewerage,frac_trww_tow_removed_untreated_no_sewerage,frac_trww_tow_removed_untreated_with_sewerage
0,0.0,0.0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0.0,0.0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0.01,0.01,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0.02,0.02,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0.03,0.03,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [117]:
stressed_df[cols_to_review].sum(axis=1)

0     0.00
1     0.00
2     0.02
3     0.04
4     0.06
5     0.08
6     0.10
7     0.10
8     0.10
9     0.10
10    0.10
11    0.10
12    0.10
13    0.10
14    0.10
15    0.10
16    0.10
17    0.10
18    0.10
19    0.10
20    0.10
21    0.10
22    0.10
23    0.10
24    0.10
25    0.10
26    0.10
27    0.10
28    0.10
29    0.10
30    0.10
31    0.10
32    0.10
33    0.10
34    0.10
35    0.10
dtype: float64

In [118]:

stressed_df[cols_to_stress] = (df_input[cols_to_stress] * sample_scaled[experiment_id]).to_numpy()

In [119]:
stressed_df[cols_to_review].head()

Unnamed: 0,frac_enfu_minimum_fuel_to_electricity_fuel_biogas,frac_enfu_minimum_fuel_to_electricity_fuel_waste,frac_inen_energy_plastic_coal,frac_inen_energy_plastic_coke,frac_inen_energy_plastic_diesel,frac_inen_energy_plastic_electricity,frac_inen_energy_plastic_furnace_gas,frac_inen_energy_plastic_gasoline,frac_inen_energy_plastic_hydrocarbon_gas_liquids,frac_inen_energy_plastic_hydrogen,...,frac_ippu_production_with_co2_capture_glass,frac_ippu_production_with_co2_capture_lime_and_carbonite,frac_ippu_production_with_co2_capture_metals,frac_ippu_production_with_co2_capture_plastic,frac_trww_n_removed_untreated_no_sewerage,frac_trww_n_removed_untreated_with_sewerage,frac_trww_p_removed_untreated_no_sewerage,frac_trww_p_removed_untreated_with_sewerage,frac_trww_tow_removed_untreated_no_sewerage,frac_trww_tow_removed_untreated_with_sewerage
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.016366,0.001487,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.032732,0.002974,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.049098,0.004461,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [120]:
stressed_df['lndu_reallocation_factor'].head()

0    0
1    0
2    0
3    0
4    0
Name: lndu_reallocation_factor, dtype: int64

In [121]:
stressed_df['ef_lndu_conv_croplands_to_forests_secondary_gg_co2_ha'].head()

0    0.0
1    0.0
2    0.0
3    0.0
4    0.0
Name: ef_lndu_conv_croplands_to_forests_secondary_gg_co2_ha, dtype: float64

In [122]:
stressed_df['frac_agrc_bevs_and_spices_cl1_temperate'].head()

0    0.722487
1    0.722487
2    0.722487
3    0.722487
4    0.722487
Name: frac_agrc_bevs_and_spices_cl1_temperate, dtype: float64

In [123]:
stressed_df['frac_agrc_bevs_and_spices_cl1_tropical'].head()

0    0.0
1    0.0
2    0.0
3    0.0
4    0.0
Name: frac_agrc_bevs_and_spices_cl1_tropical, dtype: float64

In [124]:
# Load new groups that need normalization
df_frac_vars = pd.read_excel('frac_vars.xlsx', sheet_name='frac_vars_no_special_cases')
df_frac_vars.head()

Unnamed: 0,frac_var_name,frac_var_name_prefix
0,frac_agrc_bevs_and_spices_cl1_temperate,frac_agrc_bevs_and_spices_cl1
1,frac_agrc_bevs_and_spices_cl1_tropical,frac_agrc_bevs_and_spices_cl1
2,frac_agrc_bevs_and_spices_cl2_dry,frac_agrc_bevs_and_spices_cl2
3,frac_agrc_bevs_and_spices_cl2_wet,frac_agrc_bevs_and_spices_cl2
4,frac_agrc_cereals_cl1_temperate,frac_agrc_cereals_cl1


In [125]:
need_norm_prefix = df_frac_vars.frac_var_name_prefix.unique()
random_scale = 1e-2  # Scale for random noise
epsilon = 1e-6

In [126]:
for subgroup in need_norm_prefix:
    subgroup_cols = [i for i in stressed_df.columns if subgroup in i]
    
    # Skip normalization for columns in cols_to_avoid
    if any(col in cols_to_avoid for col in subgroup_cols):
        continue

    # Check if the sum of the group is zero or too small
    group_sum = stressed_df[subgroup_cols].sum(axis=1)
    is_zero_sum = group_sum < epsilon

    # Add random variability for zero-sum groups
    if is_zero_sum.any():
        noise = np.random.uniform(0, random_scale, size=(is_zero_sum.sum(), len(subgroup_cols)))
        stressed_df.loc[is_zero_sum, subgroup_cols] = noise

    # Apply softmax normalization
    stressed_df[subgroup_cols] = stressed_df[subgroup_cols].apply(
        lambda row: np.exp(row) / np.exp(row).sum(), axis=1
    )

# Special case for ce_problematic
ce_problematic = [
    'frac_waso_biogas_food',
    'frac_waso_biogas_sludge',
    'frac_waso_biogas_yard',
    'frac_waso_compost_food',
    'frac_waso_compost_methane_flared',
    'frac_waso_compost_sludge',
    'frac_waso_compost_yard'
]

# Apply softmax normalization for ce_problematic
stressed_df[ce_problematic] = stressed_df[ce_problematic].apply(
    lambda row: np.exp(row) / np.exp(row).sum(), axis=1
)

In [127]:
print(stressed_df['frac_agrc_bevs_and_spices_cl1_temperate'].head())
print(stressed_df['frac_agrc_bevs_and_spices_cl1_tropical'].head())

0    0.673154
1    0.673154
2    0.673154
3    0.673154
4    0.673154
Name: frac_agrc_bevs_and_spices_cl1_temperate, dtype: float64
0    0.326846
1    0.326846
2    0.326846
3    0.326846
4    0.326846
Name: frac_agrc_bevs_and_spices_cl1_tropical, dtype: float64


In [128]:
stressed_df['frac_agrc_bevs_and_spices_cl1_tropical'].head() + stressed_df['frac_agrc_bevs_and_spices_cl1_temperate'].head()

0    1.0
1    1.0
2    1.0
3    1.0
4    1.0
dtype: float64

####################

In [131]:
stressed_df[cols_to_review].head()

Unnamed: 0,frac_enfu_minimum_fuel_to_electricity_fuel_biogas,frac_enfu_minimum_fuel_to_electricity_fuel_waste,frac_inen_energy_plastic_coal,frac_inen_energy_plastic_coke,frac_inen_energy_plastic_diesel,frac_inen_energy_plastic_electricity,frac_inen_energy_plastic_furnace_gas,frac_inen_energy_plastic_gasoline,frac_inen_energy_plastic_hydrocarbon_gas_liquids,frac_inen_energy_plastic_hydrogen,...,frac_ippu_production_with_co2_capture_glass,frac_ippu_production_with_co2_capture_lime_and_carbonite,frac_ippu_production_with_co2_capture_metals,frac_ippu_production_with_co2_capture_plastic,frac_trww_n_removed_untreated_no_sewerage,frac_trww_n_removed_untreated_with_sewerage,frac_trww_p_removed_untreated_no_sewerage,frac_trww_p_removed_untreated_with_sewerage,frac_trww_tow_removed_untreated_no_sewerage,frac_trww_tow_removed_untreated_with_sewerage
0,0.497975,0.502025,0.076733,0.077053,0.076842,0.0768,0.077189,0.077069,0.07718,0.077243,...,0.16652,0.166087,0.166543,0.166846,0.500196,0.499804,0.500095,0.499905,0.499357,0.500643
1,0.499912,0.500088,0.07692,0.076633,0.077036,0.076981,0.076772,0.077121,0.076972,0.076982,...,0.16651,0.167026,0.166639,0.167153,0.499455,0.500545,0.50101,0.49899,0.499561,0.500439
2,0.50372,0.49628,0.076803,0.0772,0.077001,0.077251,0.07686,0.076648,0.077107,0.076739,...,0.167101,0.166999,0.16684,0.166332,0.498523,0.501477,0.499095,0.500905,0.500529,0.499471
3,0.507439,0.492561,0.076776,0.076683,0.076589,0.076642,0.077109,0.076844,0.077159,0.077278,...,0.16653,0.167304,0.166996,0.166296,0.50146,0.49854,0.500109,0.499891,0.497778,0.502222
4,0.511157,0.488843,0.077352,0.077193,0.076938,0.077168,0.076639,0.076934,0.077119,0.076595,...,0.166331,0.166708,0.166965,0.166535,0.500024,0.499976,0.499567,0.500433,0.50071,0.49929


In [132]:
# # Assuming df_estresado is defined and contains columns
# vars_grupo = [i for i in df_estresado.columns if i.startswith('frac_')]

# df_frac_vars = pd.DataFrame(vars_grupo, columns=['frac_var_name'])
# df_frac_vars.sort_values(by='frac_var_name', inplace=True)

# # Extract prefix by removing the last '_{word}' segment
# df_frac_vars['frac_var_name_prefix'] = df_frac_vars['frac_var_name'].apply(lambda x: '_'.join(x.split('_')[:-1]))

# df_frac_vars.to_csv('frac_vars.csv', index=False)

###########

In [133]:
# Checking if there are any columns with null values in it
columns_with_na = helper_functions.get_cols_with_nans(stressed_df)
if columns_with_na:
    stressed_df[columns_with_na] = stressed_df[columns_with_na].fillna(0)

helper_functions.get_cols_with_nans(stressed_df)

[]
[]


[]

In [134]:
transformers = trf.transformers.Transformers(
    {},
    df_input = stressed_df,
)

##  SETUP SOME SISEPUEDE STUFF

file_struct = SISEPUEDEFileStructure()

matt = file_struct.model_attributes
regions = sc.Regions(matt)
time_periods = sc.TimePeriods(matt)

# set an ouput path and instantiate

trf.instantiate_default_strategy_directory(
        transformers,
        SSP_OUTPUT_PATH,
    )

# then, you can load this back in after modifying (play around with it)
transformations = trf.Transformations(
        SSP_OUTPUT_PATH,
        transformers = transformers,
    )

strategies = trf.Strategies(
        transformations,
        export_path = "transformations",
        prebuild = True,
    )

In [135]:


# call the example
df_vargroups = examples("variable_trajectory_group_specification")

strategies.build_strategies_to_templates(
        df_trajgroup = df_vargroups,
        include_simplex_group_as_trajgroup = True,
        strategies = [0, 1000],
    )



0

In [137]:
import sisepuede as si
ssp = si.SISEPUEDE(
        "calibrated",
        initialize_as_dummy = False, # no connection to Julia is initialized if set to True
        regions = [target_country],
        db_type = "csv",
        strategies = strategies,
        try_exogenous_xl_types_in_variable_specification = True,
    )

2024-11-26 12:29:12,171 - INFO - Successfully initialized SISEPUEDEFileStructure.
2024-11-26 12:29:12,174 - INFO - 	Setting export engine to 'csv'.
2024-11-26 12:29:12,176 - INFO - Successfully instantiated table ANALYSIS_METADATA
2024-11-26 12:29:12,177 - INFO - Successfully instantiated table ATTRIBUTE_DESIGN
2024-11-26 12:29:12,179 - INFO - Successfully instantiated table ATTRIBUTE_LHC_SAMPLES_EXOGENOUS_UNCERTAINTIES
2024-11-26 12:29:12,180 - INFO - Successfully instantiated table ATTRIBUTE_LHC_SAMPLES_LEVER_EFFECTS
2024-11-26 12:29:12,182 - INFO - Successfully instantiated table ATTRIBUTE_PRIMARY
2024-11-26 12:29:12,183 - INFO - Successfully instantiated table ATTRIBUTE_STRATEGY
2024-11-26 12:29:12,185 - INFO - Successfully instantiated table MODEL_BASE_INPUT_DATABASE
2024-11-26 12:29:12,186 - INFO - Successfully instantiated table MODEL_INPUT
2024-11-26 12:29:12,189 - INFO - Successfully instantiated table MODEL_OUTPUT
2024-11-26 12:29:12,191 - INFO - SISEPUEDEOutputDatabase succe

Detected IPython. Loading juliacall extension. See https://juliapy.github.io/PythonCall.jl/stable/compat/#IPython


Precompiling NemoMod...
Info Given NemoMod was explicitly requested, output will be shown live [0K
[0KERROR: Method overwriting is not permitted during Module precompilation. Use `__precompile__(false)` to opt-out of precompilation.
   2480.9 ms  ? NemoMod
[ Info: Precompiling NemoMod [a3c327a0-d2f0-11e8-37fd-d12fd35c3c72] 
ERROR: Method overwriting is not permitted during Module precompilation. Use `__precompile__(false)` to opt-out of precompilation.
┌ Info: Skipping precompilation due to precompilable error. Importing NemoMod [a3c327a0-d2f0-11e8-37fd-d12fd35c3c72].
└   exception = Error when precompiling module, potentially caused by a __precompile__(false) declaration in the module.
2024-11-26 12:30:12,830 - INFO - Successfully initialized JuMP optimizer from solver module HiGHS.
2024-11-26 12:30:12,861 - INFO - Successfully initialized SISEPUEDEModels.
2024-11-26 12:30:12,870 - INFO - Table ANALYSIS_METADATA successfully written to /home/tony-ubuntu/anaconda3/envs/ssp_env/lib/py

In [138]:
# Checks if the land use reallocation factor is set to 0.0
helper_functions.check_land_use_factor(ssp_object=ssp, target_country=target_country)

In [139]:
# Create parameters dict for the model to run
dict_run = {
        ssp.key_future: [0],
        ssp.key_design: [0],
        ssp.key_strategy: [
            0,
            1000,
        ],
    }


In [None]:
# we'll save inputs since we're doing a small set of runs
ssp.project_scenarios(
        dict_run,
        save_inputs = True,
    )

2024-11-26 12:30:16,855 - INFO - 
***	STARTING REGION croatia	***

2024-11-26 12:30:20,675 - INFO - Trying run primary_id = 0 in region croatia
2024-11-26 12:30:20,676 - INFO - Running AFOLU model
2024-11-26 12:30:20,885 - INFO - AFOLU model run successfully completed
2024-11-26 12:30:20,886 - INFO - Running CircularEconomy model
2024-11-26 12:30:20,944 - INFO - CircularEconomy model run successfully completed
2024-11-26 12:30:20,945 - INFO - Running IPPU model
2024-11-26 12:30:21,040 - INFO - IPPU model run successfully completed
2024-11-26 12:30:21,041 - INFO - Running Energy model (EnergyConsumption without Fugitive Emissions)
2024-11-26 12:30:21,065 - DEBUG - Missing elasticity information found in 'project_energy_consumption_by_fuel_from_effvars': using specified future demands.
2024-11-26 12:30:21,182 - INFO - EnergyConsumption without Fugitive Emissions model run successfully completed
2024-11-26 12:30:21,183 - INFO - Running Energy model (Electricity and Fuel Production: trying

2024-26-Nov 12:30:21.777 Opened SQLite database at /home/tony-ubuntu/anaconda3/envs/ssp_env/lib/python3.11/site-packages/sisepuede/tmp/nemomod_intermediate_database.sqlite.
2024-26-Nov 12:30:22.139 Added NEMO structure to SQLite database at /home/tony-ubuntu/anaconda3/envs/ssp_env/lib/python3.11/site-packages/sisepuede/tmp/nemomod_intermediate_database.sqlite.
2024-26-Nov 12:30:39.627 Started modeling scenario. NEMO version = 2.0.0, solver = HiGHS.


└ @ NemoMod ~/.julia/packages/NemoMod/p49Bn/src/scenario_calculation.jl:479
└ @ NemoMod ~/.julia/packages/NemoMod/p49Bn/src/scenario_calculation.jl:6112


2024-26-Nov 12:31:46.155 Finished modeling scenario.


2024-11-26 12:31:46,416 - INFO - NemoMod ran successfully with the following status: OPTIMAL
2024-11-26 12:31:46,442 - INFO - EnergyProduction model run successfully completed
2024-11-26 12:31:46,444 - INFO - Running Energy (Fugitive Emissions)
2024-11-26 12:31:46,501 - INFO - Fugitive Emissions from Energy model run successfully completed
2024-11-26 12:31:46,502 - INFO - Appending Socioeconomic outputs
2024-11-26 12:31:46,513 - INFO - Socioeconomic outputs successfully appended.
2024-11-26 12:31:46,520 - INFO - Model run for primary_id = 0 successfully completed in 85.84 seconds (n_tries = 1).
2024-11-26 12:31:46,528 - INFO - Trying run primary_id = 1001 in region croatia
2024-11-26 12:31:46,530 - INFO - Running AFOLU model
2024-11-26 12:31:46,744 - INFO - AFOLU model run successfully completed
2024-11-26 12:31:46,745 - INFO - Running CircularEconomy model
2024-11-26 12:31:46,802 - INFO - CircularEconomy model run successfully completed
2024-11-26 12:31:46,803 - INFO - Running IPPU mo

2024-26-Nov 12:31:48.933 Started modeling scenario. NEMO version = 2.0.0, solver = HiGHS.


└ @ NemoMod ~/.julia/packages/NemoMod/p49Bn/src/scenario_calculation.jl:479
└ @ NemoMod ~/.julia/packages/NemoMod/p49Bn/src/scenario_calculation.jl:6112


2024-26-Nov 12:32:36.623 Finished modeling scenario.


2024-11-26 12:32:36,832 - INFO - NemoMod ran successfully with the following status: OPTIMAL
2024-11-26 12:32:36,845 - INFO - EnergyProduction model run successfully completed
2024-11-26 12:32:36,846 - INFO - Running Energy (Fugitive Emissions)
2024-11-26 12:32:36,902 - INFO - Fugitive Emissions from Energy model run successfully completed
2024-11-26 12:32:36,903 - INFO - Appending Socioeconomic outputs
2024-11-26 12:32:36,915 - INFO - Socioeconomic outputs successfully appended.
2024-11-26 12:32:36,920 - INFO - Model run for primary_id = 1001 successfully completed in 50.39 seconds (n_tries = 1).
2024-11-26 12:32:36,948 - INFO - 
***	 REGION croatia COMPLETE	***

2024-11-26 12:32:37,064 - INFO - Table MODEL_OUTPUT successfully written to /home/tony-ubuntu/anaconda3/envs/ssp_env/lib/python3.11/site-packages/sisepuede/out/sisepuede_run_2024-11-26T12;29;11.519660/sisepuede_run_2024-11-26T12;29;11.519660_output_database/MODEL_OUTPUT.csv.
2024-11-26 12:32:37,068 - INFO - Table ATTRIBUTE_PR

{'croatia': [0, 1001]}

: 

In [40]:
INPUTS_ESTRESADOS_FILE_PATH = build_path([INPUTS_ESTRESADOS_PATH, f"sim_input_{experiment_id}.csv"])
OUTPUTS_ESTRESADOS_FILE_PATH = build_path([OUTPUTS_ESTRESADOS_PATH, f"sim_output_{experiment_id}.csv"])


df_out = ssp.read_output(None)
df_out.to_csv(OUTPUTS_ESTRESADOS_FILE_PATH, index=False)
stressed_df.to_csv(INPUTS_ESTRESADOS_FILE_PATH, index=False)