In [15]:
import os
import sys
import numpy as np
import pandas as pd
from sisepuede.core.support_classes import Regions, TimePeriods
from sisepuede.core.model_attributes import ModelAttributes

from sisepuede.manager.sisepuede_examples import SISEPUEDEExamples
from sisepuede.manager.sisepuede_file_structure import SISEPUEDEFileStructure
import sisepuede.core.support_classes as sc
import sisepuede.transformers as trf
import sisepuede.utilities._plotting as spu
import sisepuede.utilities._toolbox as sf
import sisepuede as si
import yaml
import sys
sys.path.append('../')

from scripts.utilities.air_transport_freight import AirTransportFreight
from scripts.utilities.general_utils import GeneralUtils

In [16]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [17]:
def load_formula_config_from_yaml(yaml_file):
    """
    Load configuration from a YAML file.

    Parameters:
    yaml_file (str): Path to the YAML configuration file.

    Returns:
    dict: A dictionary with column names as keys and lambda functions as values.
    """
    with open(yaml_file, 'r') as file:
        config_yaml = yaml.safe_load(file)

    # Convert string formulas to lambda functions
    config = {
        col: eval(f"lambda row: {formula}") for col, formula in config_yaml.items()
    }
    return config

In [18]:
def merge_all_freight_data(df_aviation, df_freight):
    iso_alpha_3_codes = sorted(set(df_freight['LOCATION']).intersection(df_aviation['iso_alpha_3']))
    years_hist = np.arange(2011, 2021)
    df_merged = df_aviation.copy()

    subjects = ['RAIL', 'ROAD', 'INLAND', 'COAST']
    for count in iso_alpha_3_codes:
        for year in years_hist:
            for subject in subjects:
                try:
                    value = df_freight.loc[
                        (df_freight['LOCATION'] == count) &
                        (df_freight['TIME'] == year) &
                        (df_freight['SUBJECT'] == subject)
                    ]['Value'].values[0]
                except IndexError:
                    value = 0
                df_merged.loc[
                    (df_merged['iso_alpha_3'] == count) &
                    (df_merged['year'] == year), subject
                ] = value

    df_merged.dropna(axis=0, how='any', inplace=True)
    df_merged['Total'] = df_merged.apply(
        lambda row: row['Aviation (mtkm)'] + row['RAIL'] + row['ROAD'] + row['INLAND'] + row['COAST'], axis=1
    )

    return df_merged


In [19]:
def create_historical_df(df, config):
    """
    Create new columns in the DataFrame based on the provided configuration.

    Parameters:
    df (pd.DataFrame): Input DataFrame.
    config (dict): Configuration dictionary where keys are new column names and values are lambda functions or formulas for calculations.

    Returns:
    pd.DataFrame: Updated DataFrame with new columns.
    """
    for new_col, formula in config.items():
        df[new_col] = df.apply(lambda row: formula(row) if row['Total'] != 0 else 0, axis=1)
    return df[['iso_alpha_3', 'year'] + list(config.keys())]



In [20]:
def main():

    parent_dir = os.path.abspath('..')
    raw_data_dir_path = os.path.join(parent_dir, 'data', 'raw')
    # cw_dir_path = os.path.join(parent_dir, 'data', 'cw')
    config_files_dir_path = os.path.join(parent_dir, 'scripts', 'config')
    processed_data_dir_path = os.path.join(parent_dir, 'data', 'processed')

    gu = GeneralUtils()

    socioeconomic_vars_paths = gu.read_yaml_file(os.path.join(config_files_dir_path, 'socioeconomic_vars_paths.yaml'))
    
    # iso_alpha_3_cw_df = pd.read_csv(os.path.join(cw_dir_path, 'iso_alpha_3_cw.csv'))
    df_freight=pd.read_csv(os.path.join(raw_data_dir_path, 'imputed_freight_data.csv'))    

    atf = AirTransportFreight(socioeconomic_vars_paths, os.path.join(raw_data_dir_path, 'API_IS.AIR.GOOD.MT.K1_DS2_en_csv_v2_5358418.csv'))
    
    df_aviation = atf.get_aviation_freight_data()

    df_freight = df_freight.fillna(0)
    # print(df_freight)

    df_merged = merge_all_freight_data(df_aviation, df_freight)

    config = load_formula_config_from_yaml(os.path.join(config_files_dir_path, 'frac_trns_mtkm_dem_freight_formulas_config.yaml'))

    # Call the function
    historical_df = create_historical_df(df_merged, config)
    historical_df.to_csv(os.path.join(processed_data_dir_path, 'historical', 'frac_trns_mtkm_dem_freight.csv'), index=False)
    
    return historical_df

In [21]:
df = main()
df

Unnamed: 0,iso_alpha_3,year,frac_trns_mtkm_dem_freight_water_borne,frac_trns_mtkm_dem_freight_road_heavy_freight,frac_trns_mtkm_dem_freight_rail_freight,frac_trns_mtkm_dem_freight_aviation
0,ABW,2011,0.107268,0.722273,0.164478,0.005981
1,ABW,2012,0.107268,0.722272,0.164478,0.005983
2,ABW,2013,0.237736,0.387709,0.364529,0.010026
3,ABW,2014,0.237735,0.387701,0.364526,0.010038
4,ABW,2015,0.237728,0.387706,0.364516,0.010050
...,...,...,...,...,...,...
1815,ZWE,2016,0.107912,0.726610,0.165466,0.000012
1816,ZWE,2017,0.107913,0.726612,0.165466,0.000010
1817,ZWE,2018,0.107913,0.726612,0.165466,0.000010
1818,ZWE,2019,0.107889,0.726454,0.165430,0.000226
