In [199]:
import pandas as pd

# Auxiliar Functions

In [200]:
# Function to convert and set the HS2 ID value
def new_HS2ID(df_input, col_hs2='HS2 ID', col_section='Section ID'):
    df_output = df_input.copy()
    df_output[col_hs2] = df_output.apply(lambda row: str(row[col_hs2])[len(str(row[col_section])):], axis=1)
    return df_output

# Export

In [201]:
# Datasets
RUS_export_2021 = pd.read_csv('raw_data/OEC_data/2021_Russia_exports.csv')
RUS_export_2023 = pd.read_csv('raw_data/OEC_data/2023_Russia_exports.csv')

UKR_export_2021 = pd.read_csv('raw_data/OEC_data/2021_Ukraine_exports.csv')
UKR_export_2023 = pd.read_csv('raw_data/OEC_data/2023_Ukraine_exports.csv')

## Fix HS2 ID column


In [202]:
# Fix HS2 ID values
RUS_export_2021 = new_HS2ID(RUS_export_2021)
RUS_export_2023 = new_HS2ID(RUS_export_2023)

UKR_export_2021 = new_HS2ID(UKR_export_2021)
UKR_export_2023 = new_HS2ID(UKR_export_2023)

In [203]:
unique_values = RUS_export_2021['Section'].unique()
print(unique_values)

['Animal Products' 'Vegetable Products' 'Animal and Vegetable Bi-Products'
 'Foodstuffs' 'Mineral Products' 'Chemical Products'
 'Plastics and Rubbers' 'Animal Hides' 'Wood Products' 'Paper Goods'
 'Textiles' 'Footwear and Headwear' 'Stone And Glass' 'Precious Metals'
 'Metals' 'Machines' 'Transportation' 'Instruments' 'Weapons'
 'Miscellaneous' 'Arts and Antiques']


In [204]:
# Define the interested main sections for the Economy of the two countries
list_section = ['Chemical Products',
'Vegetable Products',
'Metals',
'Mineral Products',
'Animal and Vegetable Bi-Products',
'Machines',
'Wood Products',
'Precious Metals']

In [205]:
# Transformation of the dataset: keeps only the interested section, put all the rest in 'Other' section
RUS_export_2021.loc[~RUS_export_2021['Section'].isin(list_section), 'Section'] = 'Other'
RUS_export_2023.loc[~RUS_export_2023['Section'].isin(list_section), 'Section'] = 'Other'
UKR_export_2021.loc[~UKR_export_2021['Section'].isin(list_section), 'Section'] = 'Other'
UKR_export_2023.loc[~UKR_export_2023['Section'].isin(list_section), 'Section'] = 'Other'

In [206]:
# Apply the function to convert and set also the HS4 ID
RUS_export_2021 = new_HS2ID(RUS_export_2021, col_hs2='HS4 ID')
RUS_export_2023 = new_HS2ID(RUS_export_2023, col_hs2='HS4 ID')
UKR_export_2021 = new_HS2ID(UKR_export_2021, col_hs2='HS4 ID')
UKR_export_2023 = new_HS2ID(UKR_export_2023, col_hs2='HS4 ID')


In [207]:
# List of the specific item code HS2 and HS4, used for the visualization in the project
wanted_HS2 = ['71','72','74','75','76','73','78','79','80','81','83','82','59','84','85','31','28','29','38','30','33','34','32','36','35','37','10','12','26','25','44']
wanted_HS4 = ['2709','2710','2711','2701','1512']

In [208]:
# Apply the Other category to the uninterested sections
def standardizza_section(df):
    df = df.copy()
    condizione = (~df['HS2 ID'].isin(wanted_HS2)) & (~df['HS4 ID'].isin(wanted_HS4))
    df.loc[condizione, 'Section'] = 'Other'
    return df

In [209]:
# Application
RUS_export_2021 = standardizza_section(RUS_export_2021)
RUS_export_2023 = standardizza_section(RUS_export_2023)
UKR_export_2021 = standardizza_section(UKR_export_2021)
UKR_export_2023 = standardizza_section(UKR_export_2023)

In [210]:
# Dictionaries
dict_HS4 = {
    'Crude/Refined Petroleum' : ['2709', '2710'],
    'Natural Gas' : ['2711'],
    'Coal' : ['2701'],
    'Seed Oils' : ['1512']
}

dict_HS2 = {
    'Metals' : ['72','74','75','76','73','78','79','80','81','83','82','59'],
    'Precious stones, metals, & pearls' : ['71'],
    'Machines' : ['84','85'],
    'Chemicals' : ['31','28','29','38','30','33','34','32','36','35','37'],
    'Cereals' : ['10'],
    'Oils Seeds & Oleaginous Fruits' : ['12'],
    'Minerals' : ['26', '25'],
    'Wood' : ['44']
}

In [211]:
# Function to flat the dictionary
def flatten_dict(d):
    return {val: key for key, val_list in d.items() for val in val_list}

map_hs4 = flatten_dict(dict_HS4)
map_hs2 = flatten_dict(dict_HS2)

# Function to set the right category
def assegna_section(df):
    df = df.copy()
    mapping_hs4 = df['HS4 ID'].map(map_hs4)
    mapping_hs2 = df['HS2 ID'].map(map_hs2)
    df['Section'] = mapping_hs4.fillna(mapping_hs2).fillna('Other')
    return df

In [212]:
# Application
RUS_export_2021 = assegna_section(RUS_export_2021)
RUS_export_2023 = assegna_section(RUS_export_2023)
UKR_export_2021 = assegna_section(UKR_export_2021)
UKR_export_2023 = assegna_section(UKR_export_2023)

In [213]:
# Grouped by 'Section', summing the 'Trade Value', for each dataset
grouped_RUS_export_2021 = RUS_export_2021.groupby('Section')['Trade Value'].sum().reset_index()
grouped_RUS_export_2023 = RUS_export_2023.groupby('Section')['Trade Value'].sum().reset_index()
grouped_UKR_export_2021 = UKR_export_2021.groupby('Section')['Trade Value'].sum().reset_index()
grouped_UKR_export_2023 = UKR_export_2023.groupby('Section')['Trade Value'].sum().reset_index()

In [214]:
keep_sections = [
    "Cereals",
    "Chemicals",
    "Coal",
    "Crude/Refined Petroleum",
    "Machines",
    "Metals",
    "Minerals",
    "Natural Gas",
    "Other",
    "Precious stones, metals, & pearls",
    "Wood"
]

grouped_RUS_export_2021 = (
    grouped_RUS_export_2021
    .assign(Section=lambda df: df['Section'].where(df['Section'].isin(keep_sections), 'Other'))
    .groupby('Section', as_index=False)['Trade Value']
    .sum()
)

grouped_RUS_export_2023 = (
    grouped_RUS_export_2023
    .assign(Section=lambda df: df['Section'].where(df['Section'].isin(keep_sections), 'Other'))
    .groupby('Section', as_index=False)['Trade Value']
    .sum()
)

In [215]:
keep_sections_ukr = [
    "Cereals",
    "Chemicals",
    "Machines",
    "Metals",
    "Minerals",
    "Oils Seeds & Oleaginous Fruits",
    "Other",
    "Seed Oils",
    "Wood"
]

grouped_UKR_export_2021 = (
    grouped_UKR_export_2021
    .assign(Section=lambda df: df['Section'].where(df['Section'].isin(keep_sections_ukr), 'Other'))
    .groupby('Section', as_index=False)['Trade Value']
    .sum()
)

grouped_UKR_export_2023 = (
    grouped_UKR_export_2023
    .assign(Section=lambda df: df['Section'].where(df['Section'].isin(keep_sections_ukr), 'Other'))
    .groupby('Section', as_index=False)['Trade Value']
    .sum()
)

In [216]:
# Transform the final dataset into json files with Section as key
grouped_RUS_export_2021.set_index('Section')['Trade Value'].to_json('../../data/final/trade-data/final_datasets/RUS_export_2021_grouped_v2.json')
grouped_RUS_export_2023.set_index('Section')['Trade Value'].to_json('../../data/final/trade-data/final_datasets/RUS_export_2023_grouped_v2.json')
grouped_UKR_export_2021.set_index('Section')['Trade Value'].to_json('../../data/final/trade-data/final_datasets/UKR_export_2021_grouped_v2.json')
grouped_UKR_export_2023.set_index('Section')['Trade Value'].to_json('../../data/final/trade-data/final_datasets/UKR_export_2023_grouped_v2.json')