In [1]:
import pandas as pd
import numpy as np

In [2]:
def ef_ch4_methane_commitment(DOC, f_rec, management_level):
    """
    CH4 emission factor formula for methane commitment methodology, based on DOC, f_rec and management level.
    Source: IPCC 2006
    """

    MCF_dic = {
    'managed': 1,
    'unmanaged': 0.8,
    'uncategorized': 0.6
    }

    OX_dic = {
    'managed': 0.1,
    'unmanaged': 0,
    'uncategorized': 0
    }

    mcf = MCF_dic.get(management_level)
    ox = OX_dic.get(management_level)

    Lo = mcf*DOC*0.6*0.5*16/12

    return Lo*(1-f_rec)*(1-ox)

In [3]:
treatment_types_translations = {
    'Unidade de triagem (galpão ou usina)': 'sorting unit',
    'Lixão': 'open dump',
    'Aterro controlado': 'controlled landfill',
    'Unidade de compostagem (pátio ou usina)': 'composting unit',
    'Aterro sanitário': 'controlled landfill',
    'Unidade de transbordo': 'transfer unit',
    'Unid. tratamento por microondas ou autoclave': 'autoclave treatment unit',
    'Área de transb e triagem de RCC e volumosos (=ATT)': 'sorting unit',
    'Área de reciclagem de RCC (=un reciclagem entulho)': 'waste recycling area',
    'Unidade de manejo de galhadas e podas': 'pruning management unit',
    'Aterro de Resíduos da Construção Civil (=inertes)': 'construction waste landfill',
    'Vala especifica de RSS': 'clinical waste trench',
    'Outra': 'other',
    'Unidade de tratamento por incineração': 'incineration',
    'Coprocessamento': 'other',
    'Área em recuperação': 'other'
}

In [4]:
file_path = './Planilha_Unidades_Fluxos_RS_2022.xlsx'

In [5]:
df = pd.read_excel(file_path)

In [6]:
# list with the correct columns names
col_names = ['municipality_code', 'IBGE_code', 'municipality_where_the_Unit_is', 'UF', 'region_name', 'region_code', 'id_population', 'year', 
             'unit_code', 'unit_name', 'unit_type', 'municipality_sending', 'total_SW', 'dom_plus_pub', 'clinical', 'construction', 'pruning', 'others'] 

# assign the correct columns names
df.columns = col_names

# drop the first 11 rows
df = df[11:]

# drop unnecessary columns
df = df.drop(columns=['municipality_code', 'IBGE_code','region_name', 'region_code', 'id_population', 'unit_code', 'unit_name', 'UF', 'dom_plus_pub', 'clinical', 'construction', 'pruning', 'others'])

In [7]:
# extraction of the name of the actor, which in this case is the municipality that is sending the waste to the unit (treatment side)
df['actor_name'] = df['municipality_sending'].str.split('/').str[0]

In [8]:
# check if the municipality where the unit is located is the same as the actor name
df['columns_match'] = df['municipality_where_the_Unit_is'] == df['actor_name']

In [9]:
# apply the translation to the treatment type
df['treatment_type'] = df['unit_type'].map(treatment_types_translations)

### Solid waste

emissions_CH4 = MSW * Lo * (1-f_rec) * (1-OX)

Lo = MCF * DOC * 0.6 * 0.5 * 16/12

In [10]:
# filter the df only for the treatment types that are valid for soil waste disposal
df_sw = df[df['treatment_type'].isin(['open dump', 'controlled landfill'])]

In [11]:
## DOC = degradable organic carbon [source = IPCC 2006]
## units = kg C / t waste
df_sw['DOC'] = 120 ## (region = world)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_sw['DOC'] = 120 ## (region = world)


In [12]:
# Assign the management level based on the treatment type (managed for controlled landfill and unmanaged for open dump)
df_sw.loc[:,'management_level'] = np.where(df_sw['treatment_type'] == 'controlled landfill', 'managed', 'unmanaged')

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_sw.loc[:,'management_level'] = np.where(df_sw['treatment_type'] == 'controlled landfill', 'managed', 'unmanaged')


In [13]:
# Apply the function to each row
df_sw.loc[:,'emissionfactor_value'] = df_sw.apply(lambda row: ef_ch4_methane_commitment(
    DOC=row['DOC'],
    f_rec=0,  
    management_level=row['management_level']
), axis=1)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_sw.loc[:,'emissionfactor_value'] = df_sw.apply(lambda row: ef_ch4_methane_commitment(


In [14]:
# assign the emission factor units
df_sw['emissionfactor_units'] = 'kg/t'

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_sw['emissionfactor_units'] = 'kg/t'


In [15]:
# calculate the emissions value
df_sw['emissions_value'] = df_sw['emissionfactor_value']*df_sw['total_SW']

# assign the emissions units
df_sw['emissions_units'] = 'kg'

# assign the gas name and the activity name
df_sw['gas_name'] = 'CH4'
df_sw['activity_name'] = 'solid waste disposal'

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_sw['emissions_value'] = df_sw['emissionfactor_value']*df_sw['total_SW']
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_sw['emissions_units'] = 'kg'
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_sw['gas_name'] = 'CH4'
A value is trying to be set on a copy of a slice from a DataFrame.
Try u

In [16]:
# assign the GPC reference number based on where the waste is treated
df_sw.loc[:, 'GPC_refno'] = np.where(df_sw.loc[:,'columns_match'] == True, 'III.1.1', 'III.1.2')

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_sw.loc[:, 'GPC_refno'] = np.where(df_sw.loc[:,'columns_match'] == True, 'III.1.1', 'III.1.2')


In [17]:
# create the metadata column to store the subcategory information
df_sw["metadata"] = df_sw.apply(
    lambda row: {
        "activity_subcategory_type1": 'waste_type',
        "activity_subcategory_typename1": 'municipal solid waste',
        "activity_subcategory_type2": 'treatment_type',
        "activity_subcategory_typename2": row['treatment_type'],
        "activity_subcategory_type3": 'management_level',
        "activity_subcategory_typename3": row['management_level'],
        "activity_subcategory_type4": 'DOC',
        "activity_subcategory_typename4": 150,
        "activity_subcategory_type5": 'f_rec',
        "activity_subcategory_typename5": 0
    },
    axis=1,
)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_sw["metadata"] = df_sw.apply(


In [18]:
# drop unnecessary columns
df_sw.drop(columns=['municipality_where_the_Unit_is', 'unit_type', 'municipality_sending', 'columns_match', 'treatment_type', 'DOC', 'management_level'], inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_sw.drop(columns=['municipality_where_the_Unit_is', 'unit_type', 'municipality_sending', 'columns_match', 'treatment_type', 'DOC', 'management_level'], inplace=True)


### Clinical waste

In [19]:
# filter the df only for the treatment types that are valid for clinical waste disposal
df_clinical = df[df['treatment_type']=='clinical waste trench']

In [20]:
## DOC = degradable organic carbon [source = IPCC 2006]
## units = kg C / t waste
## (region = world)
df_clinical.loc[:,'DOC'] = 150 

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_clinical.loc[:,'DOC'] = 150


In [21]:
# assign the management level
df_clinical.loc[:,'management_level'] = 'managed'

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_clinical.loc[:,'management_level'] = 'managed'


In [22]:
# Apply the function to each row
df_clinical.loc[:,'emissionfactor_value'] = df_clinical.apply(lambda row: ef_ch4_methane_commitment(
    DOC=row['DOC'],
    f_rec=0,  # Applying f_rec = 0 for all rows
    management_level=row['management_level']
), axis=1)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_clinical.loc[:,'emissionfactor_value'] = df_clinical.apply(lambda row: ef_ch4_methane_commitment(


In [23]:
# assign the emission factor units
df_clinical['emissionfactor_units'] = 'kg/t'

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_clinical['emissionfactor_units'] = 'kg/t'


In [24]:
# calculate the emissions value
df_clinical['emissions_value'] = df_clinical['emissionfactor_value']*df_clinical['total_SW']

# assign the emissions units
df_clinical['emissions_units'] = 'kg'

# assign the gas name and the activity name
df_clinical['gas_name'] = 'CH4'
df_clinical['activity_name'] = 'clinical waste disposal'

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_clinical['emissions_value'] = df_clinical['emissionfactor_value']*df_clinical['total_SW']
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_clinical['emissions_units'] = 'kg'
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_clinical['gas_name'] = 'CH4'
A value is trying to be set on a copy of a

In [25]:
# assign the GPC reference number based on where the waste is treated
df_clinical.loc[:, 'GPC_refno'] = np.where(df_clinical.loc[:,'columns_match'] == True, 'III.1.1', 'III.1.2')

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_clinical.loc[:, 'GPC_refno'] = np.where(df_clinical.loc[:,'columns_match'] == True, 'III.1.1', 'III.1.2')


In [26]:
# create the metadata column to store the subcategory information
df_clinical["metadata"] = df_clinical.apply(
    lambda row: {
        "activity_subcategory_type1": 'waste_type',
        "activity_subcategory_typename1": 'clinical waste',
        "activity_subcategory_type2": 'treatment_type',
        "activity_subcategory_typename2": row['treatment_type'],
        "activity_subcategory_type3": 'management_level',
        "activity_subcategory_typename3": row['management_level'],
        "activity_subcategory_type4": 'DOC',
        "activity_subcategory_typename4": 150,
        "activity_subcategory_type5": 'f_rec',
        "activity_subcategory_typename5": 0
    },
    axis=1,
)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_clinical["metadata"] = df_clinical.apply(


In [27]:
# drop unnecessary columns
df_clinical.drop(columns=['municipality_where_the_Unit_is', 'unit_type', 'municipality_sending', 'columns_match', 'treatment_type', 'DOC', 'management_level'], inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_clinical.drop(columns=['municipality_where_the_Unit_is', 'unit_type', 'municipality_sending', 'columns_match', 'treatment_type', 'DOC', 'management_level'], inplace=True)


### Incineration

In [28]:
# filter the df only for the treatment types that are valid for incineration
df_incineration = df[df['treatment_type']=='incineration']

In [29]:
# Emission factor for N2O from incineration
#Source IPCC 2006
df_incineration['N20'] = 50*1e-3 ## parameter = continuous and semi-continuous incinerators
## 'ef_units' 'kg/t'

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_incineration['N20'] = 50*1e-3 ## parameter = continuous and semi-continuous incinerators


In [30]:
## Source IPCC 2006
df_incineration['CH4'] = 0.2*1e-3 ## parameter = stoke
## 'ef_units' 'kg/t'

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_incineration['CH4'] = 0.2*1e-3 ## parameter = stoke


In [31]:
## for clinical waste [source = IPCC 2006]
wf = 1
dm = 0.9   ## type of waste = Other, inert waste
cf = 0.6   ## carbon fraction
fcf = 0.25 ## fossil carbon content
of = 1     ## oxidation factor for incineration

ef_co2_value = wf*dm*cf*fcf*of*(44/12)

In [32]:
## Source IPCC 2006
df_incineration['CO2'] = ef_co2_value*1e3        # 'ef_units' 'kg/t'

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_incineration['CO2'] = ef_co2_value*1e3        # 'ef_units' 'kg/t'


In [33]:
# reformat the df
df_incineration = df_incineration.melt(
    id_vars=['municipality_where_the_Unit_is', 'year', 'unit_type', 'municipality_sending', 'total_SW', 'actor_name', 'columns_match', 'treatment_type'], 
    value_vars=['N20', 'CH4', 'CO2'], 
    var_name='gas_name', 
    value_name='emissionfactor_value')

In [34]:
# assign the emission factor units
df_incineration['emissionfactor_units'] = 'kg/t'

In [35]:
# calculate the emissions value
df_incineration['emissions_value'] = df_incineration['emissionfactor_value']*df_incineration['total_SW']

# assign the emissions units and the activity name
df_incineration['emissions_units'] = 'kg'
df_incineration['activity_name'] = 'clinical waste incineration'

In [36]:
# assign the GPC reference number based on where the waste is incinerated
df_incineration['GPC_refno'] = np.where(df_incineration['columns_match'] == True, 'III.3.1', 'III.3.2')

In [37]:
# create the metadata column to store the subcategory information
df_incineration["metadata"] = df_incineration.apply(
    lambda row: {
        "activity_subcategory_type1": 'waste_type',
        "activity_subcategory_typename1": 'clinical waste',
        "activity_subcategory_type2": 'treatment_type',
        "activity_subcategory_typename2": row['treatment_type'],
        "activity_subcategory_type3": 'management_level',
        "activity_subcategory_typename3": 'managed',
        "activity_subcategory_type4": 'technology_type',
        "activity_subcategory_typename4": 'continuous and semi-continuous incinerators',
        "activity_subcategory_type5": 'boiler_type',
        "activity_subcategory_typename5": 'stoke'
    },
    axis=1,
)

In [38]:
# drop unnecessary columns
df_incineration.drop(columns=['municipality_where_the_Unit_is', 'unit_type', 'municipality_sending', 'columns_match', 'treatment_type'], inplace=True)

### Biological treatment

In [39]:
# filter the df only for the treatment types that are valid for biological treatment
df_bio = df[df['treatment_type'].isin(['composting unit', 'pruning management unit'])]

In [40]:
## Source IPCC
## ef units = kg ch4 / t of waste
## composting - dry waste
df_bio['CH4'] = 10
df_bio['N2O'] = 0.6

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_bio['CH4'] = 10
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_bio['N2O'] = 0.6


In [41]:
# reformat the df
df_bio = df_bio.melt(
    id_vars=['municipality_where_the_Unit_is', 'year', 'unit_type', 'municipality_sending', 'total_SW', 'actor_name', 'columns_match', 'treatment_type'], 
    value_vars=['CH4', 'N2O'], 
    var_name='gas_name', 
    value_name='emissionfactor_value')

In [42]:
# assign the emission factor units
df_bio['emissionfactor_units'] = 'kg/t'

In [43]:
# calculate the emissions value
df_bio['emissions_value'] = df_bio['emissionfactor_value']*df_bio['total_SW']

# assign the emissions units and the activity name
df_bio['emissions_units'] = 'kg'
df_bio['activity_name'] = 'composting of organic waste'

In [44]:
# assign the GPC reference number based on where the waste is treated
df_bio['GPC_refno'] = np.where(df_bio['columns_match'] == True, 'III.2.1', 'III.2.2')

In [45]:
# create the metadata column to store the subcategory information
df_bio["metadata"] = df_bio.apply(
    lambda row: {
        "activity_subcategory_type1": 'waste_type',
        "activity_subcategory_typename1": 'organic waste',
        "activity_subcategory_type2": 'treatment_type',
        "activity_subcategory_typename2": row['treatment_type'],
        "activity_subcategory_type3": 'management_level',
        "activity_subcategory_typename3": 'managed',
        "activity_subcategory_type4": 'waste_state',
        "activity_subcategory_typename4": 'dry waste'
    },
    axis=1,
)

In [46]:
# drop unnecessary columns
df_bio.drop(columns=['municipality_where_the_Unit_is', 'unit_type', 'municipality_sending', 'columns_match', 'treatment_type'], inplace=True)

### Joining dfs

In [47]:
# concatenate the dataframes
df_final = pd.concat([df_sw, df_clinical, df_incineration, df_bio], ignore_index=True)

In [48]:
df_final.rename(columns={'total_SW': 'activity_value'}, inplace=True)

In [50]:
# drop the rows with zero emissions
df_final = df_final[df_final['emissions_value'] != 0]

# drop the rows with NaN values
df_final.dropna(subset=['emissions_value'], inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_final.dropna(subset=['emissions_value'], inplace=True)
