In [189]:
import pandas as pd
import chardet
import pycountry

In [190]:
with open("raw_data/trade-register.csv", "rb") as f:
    result = chardet.detect(f.read())
    print(result["encoding"])

ISO-8859-1


## Investigating countries not recognized by Tableau 
Put the source data in tableau and made two csvs of countries it couldn't recognize/map. Then making a list of unique countries between recipients and suppliers

In [191]:
with open("int_data/unidentified_suppliers.csv", "rb") as f:
    supp_encoding = chardet.detect(f.read())
    print(supp_encoding["encoding"])

UTF-16


In [192]:
unid_supp_df = pd.read_csv('int_data/unidentified_suppliers.csv', encoding = supp_encoding['encoding'], sep='\t').reset_index()
unid_rec_df = pd.read_csv('int_data/unidentified_recipients.csv', encoding = supp_encoding['encoding'],sep='\t').reset_index()

unid_supp_df.head()

Unnamed: 0,index,Supplier,Number ordered
0,0,Yugoslavia,2198.0
1,1,unknown supplier(s),5390.0
2,2,United Nations**,4.0
3,3,Soviet Union,845420.0
4,4,South Yemen,12.0


In [193]:
unique_unidentified_countries = list(set(unid_supp_df['Supplier']).union(set(unid_rec_df['Recipient'])))
unique_unidentified_countries

[nan,
 'LRA (Uganda)*',
 'LTTE (Sri Lanka)*',
 '0.25',
 'PLO (Israel)*',
 'NATO**',
 'United Nations**',
 'Provisional IRA (UK)*',
 'OSCE**',
 'SLA (Lebanon)*',
 'Biafra',
 'UNITA (Angola)*',
 'European Union**',
 'GUNT (Chad)*',
 'Houthi rebels (Yemen)*',
 'Libya GNC',
 'Yemen Arab Republic (North Yemen)',
 'RUF (Sierra Leone)*',
 'PIJ (Israel/Palestine)*',
 'Viet Cong (South Vietnam)*',
 'Contras (Nicaragua)*',
 'Yugoslavia',
 'Mujahedin (Afghanistan)*',
 'Soviet Union',
 'East Germany (GDR)',
 'MNLF (Philippines)*',
 'Khmer Rouge (Cambodia)*',
 'unknown rebel group*',
 'Armas (Guatemala)*',
 'Indonesia rebels*',
 'UIC (Somalia)*',
 'South Vietnam',
 'EPLF (Ethiopia)*',
 'LF (Lebanon)*',
 'MPLA (Portugal)*',
 'RPF (Rwanda)*',
 'PRC (Israel/Palestine)*',
 'PAIGC (Portugal)*',
 'Syria rebels*',
 'FNLA (Angola)*',
 'NTC (Libya)*',
 '3',
 'MTA (Myanmar)*',
 'FAN (Chad)*',
 'Kurdistan Regional Government (Iraq)*',
 'ELF (Ethiopia)*',
 'Hamas (Palestine)*',
 'unknown recipient(s)',
 'Czech

In [194]:
df = pd.read_csv('raw_data/trade-register.csv', encoding=result["encoding"])


In [195]:
unique_values_source_data = pd.unique(df[['Supplier', 'Recipient']].values.ravel())

# Convert to list if needed
unique_values_source_data = unique_values_source_data.tolist()
unique_values_source_data

['Brazil',
 'Afghanistan',
 'Russia',
 'Soviet Union',
 'United Kingdom',
 'Czechoslovakia',
 'United States',
 'Norway',
 'India',
 'Canada',
 'unknown supplier(s)',
 'Slovakia',
 'Ukraine',
 'Czechia',
 'Switzerland',
 'Belarus',
 'Bosnia-Herzegovina',
 'China',
 'Italy',
 'Turkiye',
 'South Africa',
 'African Union**',
 'Egypt',
 'Israel',
 'Albania',
 'East Germany (GDR)',
 'Germany',
 'France',
 'Algeria',
 'Finland',
 'UAE',
 'Spain',
 'Romania',
 'Netherlands',
 'Morocco',
 'Sweden',
 'Denmark',
 'Poland',
 'Syria',
 'Amal (Lebanon)*',
 'ANC (South Africa)*',
 'Angola',
 'Hungary',
 'Moldova',
 'Peru',
 'Bulgaria',
 'Kazakhstan',
 'Portugal',
 'Lithuania',
 'Cuba',
 'Anti-Castro rebels (Cuba)*',
 'Antigua and Barbuda',
 'Dominican Republic',
 'Argentina',
 'Austria',
 'Ireland',
 'Belgium',
 'Armas (Guatemala)*',
 'Armenia',
 'Montenegro',
 'Jordan',
 'Australia',
 'South Korea',
 'New Zealand',
 'Azerbaijan',
 'Georgia',
 'Pakistan',
 'Bahamas',
 'Bahrain',
 'Oman',
 'Malta',
 

In [196]:
tableau_identified_countries = [item for item in unique_values_source_data if item not in unique_unidentified_countries]
tableau_identified_countries

['Brazil',
 'Afghanistan',
 'Russia',
 'United Kingdom',
 'United States',
 'Norway',
 'India',
 'Canada',
 'Slovakia',
 'Ukraine',
 'Czechia',
 'Switzerland',
 'Belarus',
 'Bosnia-Herzegovina',
 'China',
 'Italy',
 'Turkiye',
 'South Africa',
 'Egypt',
 'Israel',
 'Albania',
 'Germany',
 'France',
 'Algeria',
 'Finland',
 'UAE',
 'Spain',
 'Romania',
 'Netherlands',
 'Morocco',
 'Sweden',
 'Denmark',
 'Poland',
 'Syria',
 'Angola',
 'Hungary',
 'Moldova',
 'Peru',
 'Bulgaria',
 'Kazakhstan',
 'Portugal',
 'Lithuania',
 'Cuba',
 'Antigua and Barbuda',
 'Dominican Republic',
 'Argentina',
 'Austria',
 'Ireland',
 'Belgium',
 'Armenia',
 'Montenegro',
 'Jordan',
 'Australia',
 'South Korea',
 'New Zealand',
 'Azerbaijan',
 'Georgia',
 'Pakistan',
 'Bahamas',
 'Bahrain',
 'Oman',
 'Malta',
 'Bangladesh',
 'Singapore',
 'Serbia',
 'Malaysia',
 'Barbados',
 'Belize',
 'Taiwan',
 'Benin',
 'Gabon',
 "Cote d'Ivoire",
 'Indonesia',
 'Bhutan',
 'Thailand',
 'Bolivia',
 'Uruguay',
 'Venezuela',


In [197]:
no_tableau_matches = df[df[['Supplier','Recipient']].isin(unique_unidentified_countries).any(axis=1)]
no_tableau_matches.sort_values('Year of order', ascending=False)

Unnamed: 0,Recipient,Supplier,Year of order,Unnamed: 4,Number ordered,.1,Weapon designation,Weapon description,Number delivered,.2,Year(s) of delivery,status,Comments,SIPRI TIV per unit,SIPRI TIV for total order,SIPRI TIV of delivered weapons
27827,unknown recipient(s),United States,2023.0,,6.0,,MD-500E,light helicopter,0.0,,,New,Cayuse Warrior Plus armed version; recipient i...,0.70,4.2,0.0
827,Antigua and Barbuda,unknown supplier(s),2023.0,,1.0,,PA-31 Navajo,light transport aircraft,1.0,,2023,Second hand,Probably second-hand; PA-31-350 Chieftain version,0.30,0.3,0.3
27878,unknown recipient(s),Israel,2023.0,,,,Barak-MX,SAM system,0.0,,,New,$1.2 b deal; designation uncertain (reported a...,47.00,0.0,0.0
15350,Mali,unknown supplier(s),2023.0,?,1.0,,Il-76M,heavy transport aircraft,1.0,,2023,Second hand,Second-hand; Il-76TD version,34.00,34.0,34.0
9793,Honduras,unknown supplier(s),2023.0,,2.0,?,Bell-412,helicopter,0.0,,,Second hand but modernized,Second-hand but modernized before delivery; de...,3.30,6.6,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7729,Ethiopia,Czechoslovakia,1948.0,,20.0,,AH-IV-Hb,light tank,20.0,,1950,New,,0.08,1.6,1.6
9923,Hungary,Soviet Union,1947.0,?,50.0,?,SU-100,tank destroyer,50.0,?,1950; 1951,New,,1.00,50.0,50.0
23902,Switzerland,Czechoslovakia,1946.0,,90.0,?,PzJ-38(t) Hetzer,tank destroyer,90.0,?,1950; 1951; 1952,New,ST-1 version; Swiss designation Panzerjäger G-13,0.50,45.0,45.0
27505,,3,9.0,9,,,,,,,,,,,,


In [198]:
# Filter rows where 'Year of order' > 1995
filtered_df = no_tableau_matches[no_tableau_matches['Year of order'] > 1995]

# Get distinct values from both 'Recipient' and 'Supplier' columns
distinct_values = pd.concat([filtered_df['Recipient'], filtered_df['Supplier']]).dropna().unique()

# Convert to a list
distinct_list = list(distinct_values)

print(distinct_list)

['Afghanistan', 'African Union**', 'Angola', 'Antigua and Barbuda', 'Argentina', 'Azerbaijan', 'Benin', 'Bolivia', 'Botswana', 'Burkina Faso', 'Cambodia', 'Cameroon', 'Canada', 'Central African Republic', 'Chile', 'Colombia', 'Congo', "Cote d'Ivoire", 'Czechia', 'Darfur rebels (Sudan)*', 'Denmark', 'Djibouti', 'DR Congo', 'Ecuador', 'Equatorial Guinea', 'Ethiopia', 'Ghana', 'Guinea', 'Hamas (Palestine)*', 'Hezbollah (Lebanon)*', 'Honduras', 'Houthi rebels (Yemen)*', 'India', 'Indonesia', 'Iran', 'Iraq', 'Jamaica', 'Jordan', 'Kurdistan Regional Government (Iraq)*', 'Kyrgyzstan', 'Laos', 'Latvia', 'Liberia', 'Libya GNC', 'Libya HoR*', 'LRA (Uganda)*', 'LTTE (Sri Lanka)*', 'Madagascar', 'Malaysia', 'Mali', 'Mozambique', 'Myanmar', 'NATO**', 'New Zealand', 'Niger', 'NLA (Macedonia)*', 'Northern Alliance (Afghanistan)*', 'NTC (Libya)*', 'OSCE**', 'Peru', 'Philippines', 'PIJ (Israel/Palestine)*', 'PKK (Turkiye)*', 'PRC (Israel/Palestine)*', 'Regional Security System**', 'RUF (Sierra Leone)*'

In [199]:
set(unique_unidentified_countries).intersection(distinct_list)

{'African Union**',
 'Darfur rebels (Sudan)*',
 'European Union**',
 'Hamas (Palestine)*',
 'Hezbollah (Lebanon)*',
 'Houthi rebels (Yemen)*',
 'Kurdistan Regional Government (Iraq)*',
 'LRA (Uganda)*',
 'LTTE (Sri Lanka)*',
 'Libya GNC',
 'Libya HoR*',
 'NATO**',
 'NLA (Macedonia)*',
 'NTC (Libya)*',
 'Northern Alliance (Afghanistan)*',
 'OSCE**',
 'PIJ (Israel/Palestine)*',
 'PKK (Turkiye)*',
 'PRC (Israel/Palestine)*',
 'RUF (Sierra Leone)*',
 'Regional Security System**',
 'SNA (Somalia)*',
 'Syria rebels*',
 'UIC (Somalia)*',
 'Ukraine Rebels*',
 'United Nations**',
 'United Wa State (Myanmar)*',
 'unknown rebel group*',
 'unknown recipient(s)',
 'unknown supplier(s)'}

In [200]:
since_2000 = set(unique_unidentified_countries) - set(distinct_list)

print(since_2000) 

{nan, 'MTA (Myanmar)*', 'FAN (Chad)*', '0.25', 'ELF (Ethiopia)*', 'PLO (Israel)*', 'Provisional IRA (UK)*', 'Czechoslovakia', 'North Yemen', 'SLA (Lebanon)*', 'Southern rebels (Yemen)*', 'SPLA (Sudan)*', 'ANC (South Africa)*', 'Biafra', 'UNITA (Angola)*', 'GUNT (Chad)*', 'Viet Minh (France)*', 'Yemen Arab Republic (North Yemen)', 'Viet Cong (South Vietnam)*', 'Contras (Nicaragua)*', 'Amal (Lebanon)*', 'Yugoslavia', 'Mujahedin (Afghanistan)*', 'Soviet Union', 'East Germany (GDR)', 'MNLF (Philippines)*', 'Khmer Rouge (Cambodia)*', 'Armas (Guatemala)*', 'South Yemen', 'Indonesia rebels*', 'Haiti rebels*', 'Anti-Castro rebels (Cuba)*', 'South Vietnam', 'ZAPU (Zimbabwe)*', 'EPLF (Ethiopia)*', 'LF (Lebanon)*', 'MPLA (Portugal)*', 'Katanga', 'RPF (Rwanda)*', 'PAIGC (Portugal)*', 'FNLA (Angola)*', 'FMLN (El Salvador)*', 'Lebanon Palestinian rebels*', 'FRELIMO (Portugal)*', 'Pathet Lao (Laos)*', '3'}


In [201]:
# 10 most valuable per unit items
df.sort_values('SIPRI TIV per unit', ascending=False).head(10)


Unnamed: 0,Recipient,Supplier,Year of order,Unnamed: 4,Number ordered,.1,Weapon designation,Weapon description,Number delivered,.2,Year(s) of delivery,status,Comments,SIPRI TIV per unit,SIPRI TIV for total order,SIPRI TIV of delivered weapons
10670,India,Russia,2004.0,,1.0,,Gorshkov,aircraft carrier,1.0,,2013,New,$2.3 b deal (originally $625-750 m deal but in...,1250.0,1250.0,1250.0
10050,India,Russia,2019.0,,1.0,,Project-971I,nuclear submarine,0.0,,,New,$3 b deal for 10-year lease; Indian designatio...,1000.0,1000.0,0.0
10540,India,Russia,2004.0,?,1.0,,Project-971I,nuclear submarine,1.0,,2012,New,$650 m 10-year lease (returned to Russia 2021)...,1000.0,1000.0,1000.0
4643,China,Ukraine,1998.0,,1.0,,Kuznetsov,aircraft carrier,1.0,,2012,Second hand,Second-hand (production stopped 1992 with end ...,631.6,631.6,631.6
4782,China,Russia,2002.0,,2.0,,Project-956/Sovremenny,destroyer,2.0,,2005; 2006,New,$1-1.5 b deal; Type-956EM version; option on 2...,500.0,1000.0,1000.0
4780,China,Russia,1998.0,,2.0,,Project-956/Sovremenny,destroyer,2.0,,1999; 2000,New,$667 m deal; Project-956E version; originally ...,500.0,1000.0,1000.0
1292,Australia,Spain,2007.0,,3.0,,Hobart,destroyer,3.0,,2017; 2018; 2020,New,AUD8.5-9.7 b ($7.2-8.2 b) 'Project Sea-4000' o...,499.0,1497.0,1497.0
7328,Egypt,Italy,2020.0,,2.0,,FREMM,frigate,2.0,,2020; 2021,New,Originally produced for Italy but sold to Egypt,483.0,966.0,966.0
16093,Morocco,France,2008.0,,1.0,,FREMM,frigate,1.0,,2014,New,EUR470 m deal; Moroccan designation Mohammed VI,482.95,482.95,482.95
10771,Indonesia,Italy,2021.0,?,6.0,,FREMM,frigate,0.0,,,New,Selected 2021 but not yet ordered by end-2023,482.95,2897.7,0.0


In [202]:
len(df['Weapon description'].unique())

222

## Categorize weapon descriptions
Make a high level category and a sub-category for each weapon description 

In [203]:
category_mapping = {
    'Aircraft': [
        'fighter aircraft', 'bomber aircraft', 'transport aircraft', 'reconnaissance aircraft', 
        'trainer aircraft', 'AEW&C aircraft', 'SIGINT aircraft', 'trainer/combat aircraft', 
        'AGS aircraft', 'FGA aircraft', 'FGA/EW aircraft', 'airship', 'target tow aircraft', 'light aircraft', 'ground attack aircraft',
        'ASW aircraft', 'AEW aircraft', 'AEW/AGS aircraft', 'reconnaissance/SIGINT aircraft', 'transport ac/helicopter', 'transport/trainer aircraft', 'airship', 
        'light transport aircraft', 'heavy transport aircraft', 'tanker/transport aircraft', 'trainer/light aircraft', 'light/trainer aircraft'
    ],
    'Helicopters': [
        'combat helicopter', 'transport helicopter', 'ASW helicopter', 'light helicopter', 
        'AEW helicopter', 'anti-ship helicopter', 'SIGINT helicopter', 'helicopter', 'ASW Helicopter'
    ],
    'Unmanned Vehicles': [
        'UAV', 'armed UAV', 'reconnaissance AV', 'maritime patrol UAV', 'light aircraft/UAV', 'loitering munition' 
    ],
    'Naval Vessels': [
        'frigate', 'destroyer', 'corvette', 'patrol craft', 'submarine', 'cargo ship', 'tanker',
        'support ship', 'training ship', 'MCM ship', 'replenishment ship', 'minehunter', 'icebreaker', 
        'OPV', 'OPV/training ship', 'OPV/support ship', 'gunboat', 'landing ship', 'salvage ship', 
        'survey ship', 'cruiser', 'aircraft carrier', 'minelayer', 'replenishment tanker', 
        'corvette/minesweeper', 'OPV/transport ship', 'support/landing ship', 'landing/patrol craft',
        'transport ship', 'FAC (Fast Attack Craft)', 'minesweeper', 'landing craft', 'tug', 'nuclear submarine', 'OPV/tug',
        'frigate/landing ship', 'SSB (Ballistic Missile Submarine)', 'SIGINT ship', 'patrol craft/transport craft', 'transport craft',
        'support craft', 'cargo craft', 'midget submarine', 'icebreaker/OPV', 'FAC', 'SSB'
    ],
    'Ground Vehicles/Artillery': [
        'tank', 'light tank', 'armoured car', 'APC', 'IFV', 'AFSV', 'APV', 'AMV', 'ARV', 'AEV/ARV', 
        'armoured bridgelayer', 'training tank', 'tank turret', 'self-propelled gun chassis', 'tank chassis',
        'APC/APV', 'APC/IFV', 'APC/CP', 'IFV/AFSV', 'IFV/APC turret', 'APC turret', 'IFV turret', 'self-propelled AD system',
        'self-propelled gun', 'self-propelled MRL', 'towed gun', 'self-propelled AA gun', 'coastal defence system', 'AA gun (Anti-Aircraft Gun)',
        'towed MRL', 'ALV (Armored Logistics Vehicle)', 'naval gun', 'tank destroyer', 'mortar', 'self-propelled mortar', 'anti-aircraft gun',
        'SPG turret', 'ASW mortar', 'AEV (Armored Engineering Vehicle)', 'helicopter turret', 'anti-tank AV turret', 'self-propelled AA gun turret',
        'apc (Armored Personnel Carrier)', 'AA gun/SAM system', 'AA gun system', 'coastal defence gun', 'mortar turret', 'AA gun', 'ALV', 'AEV', 'apc'
    ],
    'Missiles/Rockets/Bombs': [
        'SAM', 'portable SAM', 'mobile SAM system', 'naval SAM system', 'SAM system', 'SAM/ABM', 'ABM missile',
        'SSM', 'SSM launcher', 'SSM TEL', 'anti-ship missile', 'anti-radar missile', 'anti-tank missile', 
        'guided bomb', 'guided rocket', 'guided rocket/SSM', 'guided rocket/ASM', 'SSM/ASM', 'anti-ship missile/SSM',
        'anti-ship/land-attack missile', 'land-attack missile', 'anti-tank missile/ASM', 'guided glide bomb',
        'anti-ship/ASW torpedo', 'anti-ship torpedo', 'SSM/anti-ship missile', 'ASW torpedo', 'ASW MRL', 
        'anti-ship missile/ASM', 'BVRAAM', 'BVRAAM/SAM', 'SRAAM', 'ASM', 'ABM/SAM system', 'naval MRL', 'ASM (Anti-Ship Missile)',
        'guided shell', 'ASW missile', 'anti-ship/anti-radar missile', 'naval mine/torpedo', 'ABM system', 'ASW rocket launcher',
        'anti-ship missile/ASM/SAM', 'SLBM (Submarine-Launched Ballistic Missile)', 'ASM', 'SLBM', 'anti-radar missile/ASM'
    ],
    'Radar/Detection Systems': [
        'air search radar', 'AGS radar', 'sea search radar', 'multi-function radar', 'air search system', 
        'fire control radar', 'air/sea search radar', 'artillery locating radar', 'ground surv radar', 
        'ground/sea search radar', 'AEW radar', 'MP aircraft radar', 'aircraft EO system', 
        'aircraft EO/radar system', 'air/ground surv radar', 'SAM system radar', 'multi-role radar', 'radar',
        'combat aircraft radar', 'combat heli radar', 'SONAR', 'submarine sonar', 'ASW sonar', 'SONAR system',
        'aircraft recce system', 'height-finding radar', 'ASW sonar', 'SONAR', 'recce satellite', 'EO search/fire control', 'EO system',
        'AGS/MP aircraft radar', 'Naval EO system', 'AGS/SIGINT system', 'surveillance satellite'
    ],
    'Specialized Equipment': [
        'aircraft engine', 'turbofan', 'turbojet', 'gas turbine', 'vehicle engine', 'AIP engine', 
        'air refuel system', 'maritime patrol aircraft', 'SIGINT system', 
        'SIGINT aircraft', 'AALS', 'VEHICLE ENGINE', 'turbojet', 'nuclear reactor', 'ship engine', 'turboprop'
    ],
    'Other': [
        'SSB' , 'training equipment'
    ]
}

# Expanded sub-category mappings
sub_category_mapping = {
    # Aircraft Sub-categories
    'Aircraft': {
        'Fighter/Combat Aircraft': ['fighter aircraft', 'trainer/combat aircraft', 'FGA aircraft', 'FGA/EW aircraft', 'ground attack aircraft', 'ASW aircraft'],
        'Bomber/Heavy Aircraft': ['bomber aircraft', 'heavy transport aircraft'],
        'Reconnaissance/Surveillance Aircraft': ['reconnaissance aircraft', 'AEW&C aircraft', 'SIGINT aircraft', 'AGS aircraft', 'light aircraft', 'AEW aircraft',
                                                'AEW/AGS aircraft', 'reconnaissance/SIGINT aircraft', 'airship'],
        'Trainer Aircraft': ['trainer aircraft', 'trainer/light aircraft', 'light/trainer aircraft', 'target tow aircraft'],
        'Transport Aircraft': ['transport aircraft', 'light transport aircraft', 'tanker/transport aircraft', 'transport ac/helicopter', 'transport/trainer aircraft']
    },
    # Helicopter Sub-categories
    'Helicopters': {
        'Combat Helicopters': ['combat helicopter', 'ASW helicopter', 'anti-ship helicopter', 'SIGINT helicopter'],
        'Transport/Utility Helicopters': ['transport helicopter', 'light helicopter'],
        'Specialized Helicopters': ['AEW helicopter', 'ASW Helicopter', 'helicopter']
    },
    # Unmanned Vehicles Sub-categories
    'Unmanned Vehicles': {
        'UAV/Drone': ['UAV', 'armed UAV', 'light aircraft/UAV', 'loitering munition'],
        'Reconnaissance/Surveillance UAV': ['reconnaissance AV', 'maritime patrol UAV']
    },
    'Naval Vessels' :{
        'Combat Vessel': ['frigate', 'destroyer', 'corvette', 'submarine', 'gunboat', 'cruiser', 'aircraft carrier', 
                         'corvette/minesweeper', 'FAC (Fast Attack Craft)', 'minesweeper', 'nuclear submarine', 'SSB (Ballistic Missile Submarine)',
                         'midget submarine', 'FAC', 'SSB'],
        'Support Vessel' : [ 'patrol craft', 'cargo ship', 'tanker',
        'support ship', 'training ship', 'MCM ship', 'replenishment ship', 'minehunter', 'icebreaker', 
        'OPV', 'OPV/training ship', 'OPV/support ship', 'landing ship', 'salvage ship', 
        'survey ship', 'minelayer', 'replenishment tanker', 'OPV/transport ship', 'support/landing ship', 
        'landing/patrol craft', 'transport ship', 'landing craft', 'tug', 'OPV/tug', 'frigate/landing ship', 'SIGINT ship', 'patrol craft/transport craft',
        'transport craft', 'support craft', 'cargo craft', 'icebreaker/OPV']
    },
    'Ground Vehicles/Artillery' : {
    'Main Battle Tanks': ['tank', 'light tank', 'training tank', 'tank chassis', 'tank destroyer'],
    'Armored Personnel Carriers (APC)': ['APC', 'APC/APV', 'APC/IFV', 'APC/CP', 'apc (Armored Personnel Carrier)', 'apc'],
    'Infantry Fighting Vehicles (IFV)': ['IFV', 'IFV/AFSV', 'IFV/APC turret', 'IFV turret'],
    'Armored Reconnaissance Vehicles (ARV)': ['ARV', 'AEV/ARV'],
    'Armored Fighting Support Vehicles (AFSV)': ['AFSV', 'AMV', 'ALV (Armored Logistics Vehicle)', 'ALV'],
    'Light Armored Vehicles (APV)': ['APV', 'armoured car'],
    'Self-Propelled Artillery': ['self-propelled gun chassis', 'self-propelled AD system', 'self-propelled gun', 'self-propelled MRL', 'self-propelled AA gun',
                                'AA gun (Anti-Aircraft Gun)', 'self-propelled mortar', 'SPG turret', 'self-propelled AA gun turret'],
    'Engineering Vehicles': ['armoured bridgelayer', 'AEV (Armored Engineering Vehicle)', 'AEV'],
    'Tank Components': ['tank turret'],
    'Artillery' : ['towed gun', 'mortar', 'coastal defence system', 'towed MRL', 'naval gun', 'anti-aircraft gun', 'ASW mortar', 'helicopter turret', 'anti-tank AV turret',
                  'APC turret', 'AA gun/SAM system', 'AA gun system', 'coastal defence gun', 'mortar turret', 'AA gun']
    },
    'Missiles/Rockets/Bombs' : {
    'Surface-to-Air Missiles (SAM)': [
        'SAM', 'portable SAM', 'mobile SAM system', 'naval SAM system', 'SAM system', 'SAM/ABM', 'ABM missile', 'ABM/SAM system', 'ASM (Anti-Ship Missile)', 'ABM system'
    ],
    'Surface-to-Surface Missiles (SSM)': [
        'SSM', 'SSM launcher', 'SSM TEL', 'SSM/ASM', 'SSM/anti-ship missile', 'anti-ship missile/SSM', 'land-attack missile'
    ],
    'Air-to-Air Missiles (AAM)': [
        'BVRAAM', 'SRAAM', 'BVRAAM/SAM'
    ],
    'Anti-Ship Missiles (ASM)': [
        'anti-ship missile', 'anti-ship missile/ASM', 'anti-ship/ASW torpedo', 'anti-ship torpedo', 'SSM/anti-ship missile', 'anti-ship/anti-radar missile',
        'naval mine/torpedo', 'anti-ship missile/ASM/SAM', 'ASM'
    ],
    'Anti-Radar Missiles': [
        'anti-radar missile', 'anti-radar missile/ASM'
    ],
    'Anti-Tank Missiles': [
        'anti-tank missile', 'anti-tank missile/ASM'
    ],
    'Guided Bombs and Rockets': [
        'guided bomb', 'guided rocket', 'guided glide bomb', 'guided rocket/SSM', 'guided rocket/ASM', 'guided shell'
    ],
    'Land-Attack Missiles': [
        'anti-ship/land-attack missile', 'land-attack missile'
    ],
    'Anti-Submarine Warfare (ASW) Weapons': [
        'ASW torpedo', 'ASW MRL', 'anti-ship/ASW torpedo', 'ASW missile', 'ASW rocket launcher'
    ],
    'Naval Rocket Launchers': [
        'naval MRL'
    ],
    'Ballistic Missiles' : ['SLBM (Submarine-Launched Ballistic Missile)', 'SLBM']    
    
    },
    'Radar/Detection Systems' : {
    'Air Search Radar': [
        'air search radar', 'AEW radar', 'air search system', 'air/sea search radar', 'air/ground surv radar'
    ],
    'Sea Search Detection Systems': [
        'sea search radar', 'ground/sea search radar', 'ASW sonar', 'SONAR', 'submarine sonar', 'AGS/MP aircraft radar'
    ],
    'Ground Surveillance Radar': [
        'ground surv radar', 'ground/sea search radar', 'air/ground surv radar', 'AGS/SIGINT system'
    ],
    'Multi-function Radar': [
        'multi-function radar', 'multi-role radar'
    ],
    'Fire Control Radar': [
        'fire control radar', 'SAM system radar', 'AGS radar', 'EO system', 'EO search/fire control', 'Naval EO system'
    ],
    'Artillery/Weapon Locating Radar': [
        'artillery locating radar'
    ],
    'Aircraft-Mounted Systems': [
        'MP aircraft radar', 'aircraft EO system', 'aircraft EO/radar system', 'combat aircraft radar', 'combat heli radar',
        'aircraft recce system'
    ],
    'Other Detection Systems': ['height-finding radar', 'recce satellite', 'surveillance satellite'
                               ]
    },
    'Specialized Equipment' : {
    'Aircraft Engines': [
        'aircraft engine', 'turbofan', 'turbojet', 'turboprop'
    ],
    'Vehicle Engines': [
        'vehicle engine', 'VEHICLE ENGINE', 'gas turbine', 'AIP engine', 'nuclear reactor', 'ship engine'
    ],
    'Refueling and Logistics Systems': [
        'air refuel system', 'AALS'
    ],
    'Intelligence and Surveillance Systems': [
        'SIGINT system', 'SIGINT aircraft', 'maritime patrol aircraft'
    ]
}
    # Add more sub-categories as needed in similar structure...
}

# The mapping functions will work the same, now with the expanded mappings.

def map_category(description):
    for category, items in category_mapping.items():
        if description in items:
            return category
    return 'Unknown'

def map_sub_category(description):
    for category, sub_categories in sub_category_mapping.items():
        for sub_category, items in sub_categories.items():
            if description in items:
                return sub_category
    return 'Unknown'

# Applying the mapping functions
df['Category'] = df['Weapon description'].apply(map_category)
df['Sub-Category'] = df['Weapon description'].apply(map_sub_category)



In [204]:
# Creating a DataFrame with three columns: 'Description', 'Category', and 'Sub-category' for exporting to csv for sharing 
csv_mapping_data = {'Category': [], 'Sub-category': [], 'Description': []}

for category, subcategories in sub_category_mapping.items():
    for sub_category, descriptions in subcategories.items():
        for description in descriptions:
            csv_mapping_data['Category'].append(category)
            csv_mapping_data['Sub-category'].append(sub_category)
            csv_mapping_data['Description'].append(description)

# Generating the DataFrame
df_three_columns = pd.DataFrame(csv_mapping_data)
df_three_columns.head()

Unnamed: 0,Category,Sub-category,Description
0,Aircraft,Fighter/Combat Aircraft,fighter aircraft
1,Aircraft,Fighter/Combat Aircraft,trainer/combat aircraft
2,Aircraft,Fighter/Combat Aircraft,FGA aircraft
3,Aircraft,Fighter/Combat Aircraft,FGA/EW aircraft
4,Aircraft,Fighter/Combat Aircraft,ground attack aircraft


In [205]:
df_three_columns.to_csv('final_data/description_mapping.csv')

In [206]:
df[df['Sub-Category']=='Unknown']['Weapon description'].unique()

array([nan], dtype=object)

In [207]:
# drop nonsense columns
df.drop([' .1', ' .2', ' '], axis=1, inplace=True)
df.head()

Unnamed: 0,Recipient,Supplier,Year of order,Number ordered,Weapon designation,Weapon description,Number delivered,Year(s) of delivery,status,Comments,SIPRI TIV per unit,SIPRI TIV for total order,SIPRI TIV of delivered weapons,Category,Sub-Category
0,Afghanistan,Brazil,2017.0,6.0,EMB-314 Super Tucano,trainer/combat aircraft,6.0,2018,New,A-29B version; financed by USA; ordered via US...,4.5,27.0,27.0,Aircraft,Fighter/Combat Aircraft
1,Afghanistan,Russia,2004.0,6.0,aircraft engine,aircraft engine,6.0,2005,New,Klimov TV-3-117 turboshaft; spare engines for ...,0.5,3.0,3.0,Specialized Equipment,Aircraft Engines
2,Afghanistan,Soviet Union,1977.0,26.0,AI-25,turbofan,26.0,1977; 1978,New,For 26 L-39 trainer aircraft from Czechoslovak...,0.6,15.6,15.6,Specialized Equipment,Aircraft Engines
3,Afghanistan,Soviet Union,1988.0,2300.0,R-17 Elbrus,SSM,2300.0,1988; 1989; 1990; 1991,New,Mainly for use against Mujahideen rebel forces,1.25,2875.0,2875.0,Missiles/Rockets/Bombs,Surface-to-Surface Missiles (SSM)
4,Afghanistan,United Kingdom,2009.0,2.0,Mi-17,transport helicopter,2.0,2010,Second hand,Second-hand Mi-17 version bought by UK for 'Pr...,2.9,5.8,5.8,Helicopters,Transport/Utility Helicopters


In [208]:
filtered_df = df[df['Year(s) of delivery'].str.len() > 4]
print(filtered_df['Year(s) of delivery'].unique())



['1977; 1978' '1988; 1989; 1990; 1991' '1980; 1981; 1982' ...
 '1968; 1969; 1970; 1971; 1972; 1973; 1974; 1975; 1976; 1977; 1978; 1979; 1980; 1981; 1982'
 '1963; 1964; 1965; 1966; 1967; 1968; 1969; 1970; 1971; 1972; 1973; 1974; 1975; 1976; 1977; 1978; 1979; 1980; 1981; 1982; 1983; 1984; 1985'
 '1964; 1965; 1966; 1967; 1968; 1969; 1970; 1971; 1972; 1973; 1974; 1975; 1976; 1977; 1978; 1979; 1980; 1981; 1982; 1983']


In [209]:
def count_years_delivered(years_of_delivery):
    # Count semicolons and return one less than the count to count number of unique years weapons delivered in
    if isinstance(years_of_delivery, str):
        # Count semicolons if the entry is a string
        return years_of_delivery.count(';')+1
    else:
        # Return 0 for non-string entries (like NaN)
        return 0

In [210]:
df['Num years Delivered In'] = df['Year(s) of delivery'].apply(count_years_delivered)
df.head()

Unnamed: 0,Recipient,Supplier,Year of order,Number ordered,Weapon designation,Weapon description,Number delivered,Year(s) of delivery,status,Comments,SIPRI TIV per unit,SIPRI TIV for total order,SIPRI TIV of delivered weapons,Category,Sub-Category,Num years Delivered In
0,Afghanistan,Brazil,2017.0,6.0,EMB-314 Super Tucano,trainer/combat aircraft,6.0,2018,New,A-29B version; financed by USA; ordered via US...,4.5,27.0,27.0,Aircraft,Fighter/Combat Aircraft,1
1,Afghanistan,Russia,2004.0,6.0,aircraft engine,aircraft engine,6.0,2005,New,Klimov TV-3-117 turboshaft; spare engines for ...,0.5,3.0,3.0,Specialized Equipment,Aircraft Engines,1
2,Afghanistan,Soviet Union,1977.0,26.0,AI-25,turbofan,26.0,1977; 1978,New,For 26 L-39 trainer aircraft from Czechoslovak...,0.6,15.6,15.6,Specialized Equipment,Aircraft Engines,2
3,Afghanistan,Soviet Union,1988.0,2300.0,R-17 Elbrus,SSM,2300.0,1988; 1989; 1990; 1991,New,Mainly for use against Mujahideen rebel forces,1.25,2875.0,2875.0,Missiles/Rockets/Bombs,Surface-to-Surface Missiles (SSM),4
4,Afghanistan,United Kingdom,2009.0,2.0,Mi-17,transport helicopter,2.0,2010,Second hand,Second-hand Mi-17 version bought by UK for 'Pr...,2.9,5.8,5.8,Helicopters,Transport/Utility Helicopters,1


In [211]:
# Get first and last year of delivery in seperate cols

In [212]:
def get_first_year(years_of_delivery):
    if isinstance(years_of_delivery, str):
        # Split the string by semicolons and get the first year, stripping any spaces
        return years_of_delivery.split(';')[0].strip()
    else:
        # Return NaN for non-string entries (like NaN)
        return float('nan')

def get_last_year(years_of_delivery):
    if isinstance(years_of_delivery, str):
        # Split the string by semicolons and get the last year, stripping any spaces
        return years_of_delivery.split(';')[-1].strip()
    else:
        # Return NaN for non-string entries (like NaN)
        return float('nan')

In [213]:
df['First Year'] = df['Year(s) of delivery'].apply(get_first_year)
df['Last Year'] = df['Year(s) of delivery'].apply(get_last_year)
df.head(10)

Unnamed: 0,Recipient,Supplier,Year of order,Number ordered,Weapon designation,Weapon description,Number delivered,Year(s) of delivery,status,Comments,SIPRI TIV per unit,SIPRI TIV for total order,SIPRI TIV of delivered weapons,Category,Sub-Category,Num years Delivered In,First Year,Last Year
0,Afghanistan,Brazil,2017.0,6.0,EMB-314 Super Tucano,trainer/combat aircraft,6.0,2018,New,A-29B version; financed by USA; ordered via US...,4.5,27.0,27.0,Aircraft,Fighter/Combat Aircraft,1,2018,2018
1,Afghanistan,Russia,2004.0,6.0,aircraft engine,aircraft engine,6.0,2005,New,Klimov TV-3-117 turboshaft; spare engines for ...,0.5,3.0,3.0,Specialized Equipment,Aircraft Engines,1,2005,2005
2,Afghanistan,Soviet Union,1977.0,26.0,AI-25,turbofan,26.0,1977; 1978,New,For 26 L-39 trainer aircraft from Czechoslovak...,0.6,15.6,15.6,Specialized Equipment,Aircraft Engines,2,1977,1978
3,Afghanistan,Soviet Union,1988.0,2300.0,R-17 Elbrus,SSM,2300.0,1988; 1989; 1990; 1991,New,Mainly for use against Mujahideen rebel forces,1.25,2875.0,2875.0,Missiles/Rockets/Bombs,Surface-to-Surface Missiles (SSM),4,1988,1991
4,Afghanistan,United Kingdom,2009.0,2.0,Mi-17,transport helicopter,2.0,2010,Second hand,Second-hand Mi-17 version bought by UK for 'Pr...,2.9,5.8,5.8,Helicopters,Transport/Utility Helicopters,1,2010,2010
5,Afghanistan,Soviet Union,1980.0,1000.0,Fleyta/Skorpion,anti-tank missile,1000.0,1980; 1981; 1982,New,For Mi-24A helicopters,0.02,20.0,20.0,Missiles/Rockets/Bombs,Anti-Tank Missiles,3,1980,1982
6,Afghanistan,Soviet Union,1988.0,250.0,R-60,SRAAM,250.0,1988; 1989; 1990,New,For MiG-23MF and MiG-21bis combat aircraft,0.09,22.5,22.5,Missiles/Rockets/Bombs,Air-to-Air Missiles (AAM),3,1988,1990
7,Afghanistan,Soviet Union,1987.0,40.0,MiG-21MF,fighter aircraft,40.0,1987; 1988; 1989; 1990,Second hand,Second-hand; probably incl some MiG-21UM,4.28,171.2,171.2,Aircraft,Fighter/Combat Aircraft,4,1987,1990
8,Afghanistan,Soviet Union,1985.0,100.0,Strela-3,portable SAM,100.0,1986; 1987; 1988; 1989,New,Probably incl for Mi-24 combat helicopters,0.06,6.0,6.0,Missiles/Rockets/Bombs,Surface-to-Air Missiles (SAM),4,1986,1989
9,Afghanistan,Soviet Union,1979.0,50.0,Mi-8T,transport helicopter,50.0,1979; 1980; 1981,New,,6.0,300.0,300.0,Helicopters,Transport/Utility Helicopters,3,1979,1981


Add WDI indicators

In [214]:
wdi_df = pd.read_csv('raw_data/WDICSV.csv') 
country_df = pd.read_csv('raw_data/WDICountry.csv')


In [215]:
wdi_df = wdi_df.merge(country_df[['Table Name','Region','Income Group', 'Long Name']], how='left', left_on='Country Name', right_on = 'Table Name')
wdi_df = wdi_df.drop('Table Name', axis=1)

In [216]:
wdi_df_long = pd.melt(wdi_df, id_vars=['Country Name', 'Country Code', 'Indicator Name', 'Indicator Code', 'Region', 'Income Group'],
                      var_name = 'Year',
                      value_name = 'indicator_value')
wdi_df_long.head()

Unnamed: 0,Country Name,Country Code,Indicator Name,Indicator Code,Region,Income Group,Year,indicator_value
0,Africa Eastern and Southern,AFE,Access to clean fuels and technologies for coo...,EG.CFT.ACCS.ZS,,,1960,
1,Africa Eastern and Southern,AFE,Access to clean fuels and technologies for coo...,EG.CFT.ACCS.RU.ZS,,,1960,
2,Africa Eastern and Southern,AFE,Access to clean fuels and technologies for coo...,EG.CFT.ACCS.UR.ZS,,,1960,
3,Africa Eastern and Southern,AFE,Access to electricity (% of population),EG.ELC.ACCS.ZS,,,1960,
4,Africa Eastern and Southern,AFE,"Access to electricity, rural (% of rural popul...",EG.ELC.ACCS.RU.ZS,,,1960,


In [217]:
indicators = [
    "Military expenditure (% of GDP)",
    "Military expenditure (% of general government expenditure)",
    "Military expenditure (current LCU)",
    "Armed Forces Personnel (% of total labor force)",
    "Armed Forces personnel, total",
    "Arms exports (SIPRI trend indicator values)",
    "Arms Imports (SIPRI trend indicator values))",
    "Military expenditure (Current USD)",

]

In [218]:
def pivot_and_save_indicators(df, indicators, csv_name=None):
    # Filter the DataFrame to include only the specified indicators
    filtered_df = df[df['Indicator Name'].isin(indicators)]
    
    # Pivot the DataFrame
    pivoted_df = filtered_df.pivot(
        index=["Country Name", "Country Code", "Year", "Region", "Income Group"],
        columns="Indicator Name",
        values="indicator_value"
    ).reset_index()
    
    if csv_name != None:
    # Save the pivoted DataFrame to a CSV file
        pivoted_df.to_csv(csv_name, index=False)
    
    # Return the pivoted DataFrame
    return pivoted_df

In [219]:
arms_indicators = pivot_and_save_indicators(wdi_df_long, indicators)

In [220]:
non_region_df = arms_indicators[arms_indicators['Region'].notna()]
non_region_df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 13845 entries, 0 to 17289
Data columns (total 9 columns):
 #   Column                                                      Non-Null Count  Dtype 
---  ------                                                      --------------  ----- 
 0   Country Name                                                13845 non-null  object
 1   Country Code                                                13845 non-null  object
 2   Year                                                        13845 non-null  object
 3   Region                                                      13845 non-null  object
 4   Income Group                                                13780 non-null  object
 5   Arms exports (SIPRI trend indicator values)                 2453 non-null   object
 6   Military expenditure (% of GDP)                             7665 non-null   object
 7   Military expenditure (% of general government expenditure)  4482 non-null   object
 8   Military exp

In [221]:
# Convert 'Column1' to numeric with errors coerced to NaN
non_region_df['Year'] = pd.to_numeric(non_region_df['Year'], errors='coerce')

# Drop rows where 'Column1' is NaN
non_region_df = non_region_df.dropna(subset=['Year'])

# Convert the column to integer type
non_region_df['Year'] = non_region_df['Year'].astype(int)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  non_region_df['Year'] = pd.to_numeric(non_region_df['Year'], errors='coerce')


In [222]:
non_region_df = non_region_df[non_region_df['Year']>=2000]

In [223]:
# Filter rows where 'Region' is not NaN
filtered_df = non_region_df[non_region_df['Region'].notna()]

# Get unique values from the 'Country Name' column
unique_countries_wdi = filtered_df['Country Name'].unique()

# Convert to a list (optional)
unique_countries_list = unique_countries_wdi.tolist()

print(unique_countries_list)

['Afghanistan', 'Albania', 'Algeria', 'American Samoa', 'Andorra', 'Angola', 'Antigua and Barbuda', 'Argentina', 'Armenia', 'Aruba', 'Australia', 'Austria', 'Azerbaijan', 'Bahamas, The', 'Bahrain', 'Bangladesh', 'Barbados', 'Belarus', 'Belgium', 'Belize', 'Benin', 'Bermuda', 'Bhutan', 'Bolivia', 'Bosnia and Herzegovina', 'Botswana', 'Brazil', 'British Virgin Islands', 'Brunei Darussalam', 'Bulgaria', 'Burkina Faso', 'Burundi', 'Cabo Verde', 'Cambodia', 'Cameroon', 'Canada', 'Cayman Islands', 'Central African Republic', 'Chad', 'Channel Islands', 'Chile', 'China', 'Colombia', 'Comoros', 'Congo, Dem. Rep.', 'Congo, Rep.', 'Costa Rica', 'Croatia', 'Cuba', 'Cyprus', 'Czechia', 'Denmark', 'Djibouti', 'Dominica', 'Dominican Republic', 'Ecuador', 'Egypt, Arab Rep.', 'El Salvador', 'Equatorial Guinea', 'Eritrea', 'Estonia', 'Eswatini', 'Ethiopia', 'Faroe Islands', 'Fiji', 'Finland', 'France', 'French Polynesia', 'Gabon', 'Gambia, The', 'Georgia', 'Germany', 'Ghana', 'Gibraltar', 'Greece', 'Gre

In [224]:
tableau_countries_not_in_indicators_data = [item for item in tableau_identified_countries if item not in unique_countries_list]
tableau_countries_not_in_indicators_data

['Russia',
 'Slovakia',
 'Bosnia-Herzegovina',
 'Turkiye',
 'Egypt',
 'UAE',
 'Syria',
 'South Korea',
 'Bahamas',
 'Taiwan',
 "Cote d'Ivoire",
 'Venezuela',
 'Iran',
 'Brunei',
 'Kyrgyzstan',
 'Congo',
 'DR Congo',
 'North Korea',
 'eSwatini',
 'Gambia',
 'Laos',
 'Micronesia',
 'Northern Cyprus',
 'Palestine',
 'Saint Kitts and Nevis',
 'Saint Vincent',
 'Western Sahara',
 'Yemen']

In [225]:
mapping_wbi_to_sipri = {
    'Russian Federation': 'Russia',
 'Slovak Republic': 'Slovakia',
 'Bosnia and Herzegovina': 'Bosnia-Herzegovina',
 'Turkey': 'Turkiye',
 'Egypt, Arab Rep.': 'Egypt',
 'United Arab Emirates': 'UAE',
 'Syrian Arab Republic': 'Syria',
 'Korea, Rep.': 'South Korea',
 'Bahamas, The': 'Bahamas',
 'Macao SAR, China': 'Taiwan',
 'Côte d’Ivoire': "Cote d'Ivoire",
 'Venezuela, RB': 'Venezuela',
 'Iran, Islamic Rep.': 'Iran',
 'Brunei Darussalam': 'Brunei',
 'Kyrgyz Republic': 'Kyrgyzstan',
 'Congo, Rep.': 'Congo',
 'Congo, Dem. Rep.': 'DR Congo',
 "Korea, Dem. People's Rep.": 'North Korea',
 'Eswatini': 'eSwatini',
 'Gambia, The': 'Gambia',
 'Lao PDR': 'Laos',
 'Micronesia, Fed. Sts.': 'Micronesia',
 'Cyprus': 'Northern Cyprus',
 'West Bank and Gaza': 'Palestine',
 'St. Kitts and Nevis': 'Saint Kitts and Nevis',
 'St. Vincent and the Grenadines': 'Saint Vincent',
 'Yemen, Rep.': 'Yemen'}
# {
#     "Russia": "Russian Federation",
#     "Slovakia": "Slovak Republic",
#     "Bosnia-Herzegovina": "Bosnia and Herzegovina",
#     "Turkiye": "Turkey",  # Adjusted for Turkiye as it isn't explicitly listed
#     "Egypt": "Egypt, Arab Rep.",
#     "UAE": "United Arab Emirates",
#     "Syria": "Syrian Arab Republic",
#     "South Korea": "Korea, Rep.",
#     "Bahamas": "Bahamas, The",
#     "Taiwan": "Macao SAR, China",  # Proxy since Taiwan isn't listed
#     "Cote d'Ivoire": "Côte d’Ivoire",  # Adjusted for naming
#     "Venezuela": "Venezuela, RB",
#     "Iran": "Iran, Islamic Rep.",
#     "Brunei": "Brunei Darussalam",
#     "Kyrgyzstan": "Kyrgyz Republic",
#     "Congo": "Congo, Rep.",
#     "DR Congo": "Congo, Dem. Rep.",
#     "North Korea": "Korea, Dem. People's Rep.",
#     "eSwatini": "Eswatini",
#     "Gambia": "Gambia, The",
#     "Laos": "Lao PDR",
#     "Micronesia": "Micronesia, Fed. Sts.",
#     "Northern Cyprus": "Cyprus",  # Manual mapping, as Northern Cyprus is not explicitly listed
#     "Palestine": "West Bank and Gaza",
#     "Saint Kitts and Nevis": "St. Kitts and Nevis",
#     "Saint Vincent": "St. Vincent and the Grenadines",
#     "Western Sahara": None,  # No clear match
#     "Yemen": "Yemen, Rep."
# }

In [226]:
non_region_df['sipri_country_mapping'] = non_region_df['Country Name'].apply(
    lambda x: x if x not in mapping_wbi_to_sipri else mapping_wbi_to_sipri[x]
)

In [227]:
non_region_df.head()

Indicator Name,Country Name,Country Code,Year,Region,Income Group,Arms exports (SIPRI trend indicator values),Military expenditure (% of GDP),Military expenditure (% of general government expenditure),Military expenditure (current LCU),sipri_country_mapping
40,Afghanistan,AFG,2000,South Asia,Low income,,,,,Afghanistan
41,Afghanistan,AFG,2001,South Asia,Low income,,,,,Afghanistan
42,Afghanistan,AFG,2002,South Asia,Low income,,,,,Afghanistan
43,Afghanistan,AFG,2003,South Asia,Low income,,,,,Afghanistan
44,Afghanistan,AFG,2004,South Asia,Low income,,2.431254,16.134336,5986000000.0,Afghanistan


In [228]:
non_region_df.to_csv('final_data/arms_indicator_data.csv')

In [229]:
df.head()

Unnamed: 0,Recipient,Supplier,Year of order,Number ordered,Weapon designation,Weapon description,Number delivered,Year(s) of delivery,status,Comments,SIPRI TIV per unit,SIPRI TIV for total order,SIPRI TIV of delivered weapons,Category,Sub-Category,Num years Delivered In,First Year,Last Year
0,Afghanistan,Brazil,2017.0,6.0,EMB-314 Super Tucano,trainer/combat aircraft,6.0,2018,New,A-29B version; financed by USA; ordered via US...,4.5,27.0,27.0,Aircraft,Fighter/Combat Aircraft,1,2018,2018
1,Afghanistan,Russia,2004.0,6.0,aircraft engine,aircraft engine,6.0,2005,New,Klimov TV-3-117 turboshaft; spare engines for ...,0.5,3.0,3.0,Specialized Equipment,Aircraft Engines,1,2005,2005
2,Afghanistan,Soviet Union,1977.0,26.0,AI-25,turbofan,26.0,1977; 1978,New,For 26 L-39 trainer aircraft from Czechoslovak...,0.6,15.6,15.6,Specialized Equipment,Aircraft Engines,2,1977,1978
3,Afghanistan,Soviet Union,1988.0,2300.0,R-17 Elbrus,SSM,2300.0,1988; 1989; 1990; 1991,New,Mainly for use against Mujahideen rebel forces,1.25,2875.0,2875.0,Missiles/Rockets/Bombs,Surface-to-Surface Missiles (SSM),4,1988,1991
4,Afghanistan,United Kingdom,2009.0,2.0,Mi-17,transport helicopter,2.0,2010,Second hand,Second-hand Mi-17 version bought by UK for 'Pr...,2.9,5.8,5.8,Helicopters,Transport/Utility Helicopters,1,2010,2010


In [230]:
df = df.drop(columns=['.1', '.2'], errors='ignore')

df['Year of order'] = df['Year of order'].astype(int)

In [231]:
df

Unnamed: 0,Recipient,Supplier,Year of order,Number ordered,Weapon designation,Weapon description,Number delivered,Year(s) of delivery,status,Comments,SIPRI TIV per unit,SIPRI TIV for total order,SIPRI TIV of delivered weapons,Category,Sub-Category,Num years Delivered In,First Year,Last Year
0,Afghanistan,Brazil,2017,6.0,EMB-314 Super Tucano,trainer/combat aircraft,6.0,2018,New,A-29B version; financed by USA; ordered via US...,4.50,27.0,27.0,Aircraft,Fighter/Combat Aircraft,1,2018,2018
1,Afghanistan,Russia,2004,6.0,aircraft engine,aircraft engine,6.0,2005,New,Klimov TV-3-117 turboshaft; spare engines for ...,0.50,3.0,3.0,Specialized Equipment,Aircraft Engines,1,2005,2005
2,Afghanistan,Soviet Union,1977,26.0,AI-25,turbofan,26.0,1977; 1978,New,For 26 L-39 trainer aircraft from Czechoslovak...,0.60,15.6,15.6,Specialized Equipment,Aircraft Engines,2,1977,1978
3,Afghanistan,Soviet Union,1988,2300.0,R-17 Elbrus,SSM,2300.0,1988; 1989; 1990; 1991,New,Mainly for use against Mujahideen rebel forces,1.25,2875.0,2875.0,Missiles/Rockets/Bombs,Surface-to-Surface Missiles (SSM),4,1988,1991
4,Afghanistan,United Kingdom,2009,2.0,Mi-17,transport helicopter,2.0,2010,Second hand,Second-hand Mi-17 version bought by UK for 'Pr...,2.90,5.8,5.8,Helicopters,Transport/Utility Helicopters,1,2010,2010
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
29053,Zimbabwe,Russia,1998,6.0,Mi-24P/Mi-35P,combat helicopter,6.0,1999,Second hand,Second-hand; bought for use in DRC against reb...,3.60,21.6,21.6,Helicopters,Combat Helicopters,1,1999,1999
29054,Zimbabwe,Italy,1967,20.0,Model-56 105mm,towed gun,20.0,1967,New,Supplier uncertain,0.45,9.0,9.0,Ground Vehicles/Artillery,Artillery,1,1967,1967
29055,Zimbabwe,Soviet Union,1975,15.0,T-34-85,tank,15.0,1975,Second hand,Second-hand; supplier uncertain,0.38,5.7,5.7,Ground Vehicles/Artillery,Main Battle Tanks,1,1975,1975
29056,Zimbabwe,China,2004,5.0,Type-89/ZSD-89,APC,5.0,2004,New,ARV version,0.30,1.5,1.5,Ground Vehicles/Artillery,Armored Personnel Carriers (APC),1,2004,2004


In [232]:
df.to_csv('final_data/cleaned_transfer_data.csv')

In [233]:
sipri_wdi_merge = df.merge(non_region_df, how='left', left_on=['Recipient','Year of order'], right_on=['sipri_country_mapping','Year'])

In [234]:
sipri_wdi_merge[(sipri_wdi_merge['sipri_country_mapping'].isna())&(sipri_wdi_merge['Year of order']>2000)]

Unnamed: 0,Recipient,Supplier,Year of order,Number ordered,Weapon designation,Weapon description,Number delivered,Year(s) of delivery,status,Comments,...,Country Name,Country Code,Year,Region,Income Group,Arms exports (SIPRI trend indicator values),Military expenditure (% of GDP),Military expenditure (% of general government expenditure),Military expenditure (current LCU),sipri_country_mapping
150,African Union**,South Africa,2021,24.0,Puma M-36,APC,24.0,2022,New,For AU peacekeeping forces in Somalia (ATMIS);...,...,,,,,,,,,,
151,African Union**,South Africa,2005,60.0,Mamba,APC/APV,60.0,2006,New,For use by AU/AMIS peacekeeping forces in Darf...,...,,,,,,,,,,
152,African Union**,South Africa,2007,68.0,Casspir,APC,68.0,2008,Second hand but modernized,Second-hand but modernized before delivery; fo...,...,,,,,,,,,,
153,African Union**,unknown supplier(s),2004,15.0,Mi-17,transport helicopter,15.0,2005,Second hand,Leased from and operated by civilian company f...,...,,,,,,,,,,
154,African Union**,Egypt,2019,10.0,Fahd,APC,10.0,2019,New,For Burundi forces with African Union Mission ...,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
27896,unknown recipient(s),Israel,2007,4.0,CoMPASS,EO system,4.0,2010; 2011,New,$37 m deal; for combat and transport helicopte...,...,,,,,,,,,,
27898,unknown recipient(s),Czechia,2022,8.0,One 150,UAV,8.0,2023,New,EUR6.8 m deal; recipient is non-European state...,...,,,,,,,,,,
27899,unknown recipient(s),South Africa,2021,,Maatla,APC,0.0,,New,,...,,,,,,,,,,
27900,unknown recipient(s),Israel,2021,,Hermes-900,UAV,0.0,,New,$300 m deal; recipient is Asian state,...,,,,,,,,,,


In [235]:
sipri_wdi_merge[(sipri_wdi_merge['sipri_country_mapping'].isna())&(sipri_wdi_merge['Year of order']>2000)]['Recipient'].value_counts()

Recipient
Turkiye                                  141
unknown recipient(s)                      61
Cote d'Ivoire                             44
Libya HoR*                                24
Cyprus                                    23
NATO**                                    13
Northern Alliance (Afghanistan)*          12
African Union**                           11
United Nations**                           9
Hezbollah (Lebanon)*                       8
Syria rebels*                              8
Libya GNC                                  6
Houthi rebels (Yemen)*                     5
Hamas (Palestine)*                         4
Ukraine Rebels*                            4
Darfur rebels (Sudan)*                     3
unknown rebel group*                       3
LTTE (Sri Lanka)*                          2
NTC (Libya)*                               2
United Wa State (Myanmar)*                 1
UIC (Somalia)*                             1
PKK (Turkiye)*                             1


In [236]:
non_region_df[(non_region_df['Year']==2007)&(non_region_df['sipri_country_mapping']=='Turkey')]

Indicator Name,Country Name,Country Code,Year,Region,Income Group,Arms exports (SIPRI trend indicator values),Military expenditure (% of GDP),Military expenditure (% of general government expenditure),Military expenditure (current LCU),sipri_country_mapping


## Create csv with one row per year delivered

In [237]:
delivered_df = df.dropna(subset=['First Year'])

delivered_df['First Year'] = delivered_df['First Year'].astype(int)

# Expand rows based on `Num years Delivered In`
annual_delivery_df = delivered_df.loc[delivered_df.index.repeat(delivered_df['Num years Delivered In'])].copy()

# Calculate the `year_delivered` for each expanded row
annual_delivery_df['year_delivered'] = annual_delivery_df.groupby(level=0).cumcount() + annual_delivery_df['First Year']
annual_delivery_df['num_delivered_in_year'] = annual_delivery_df['Number delivered']/annual_delivery_df['Num years Delivered In']
annual_delivery_df['SIPRI_TIV_delivered_in_year'] = annual_delivery_df['num_delivered_in_year']*annual_delivery_df['SIPRI TIV per unit']


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  delivered_df['First Year'] = delivered_df['First Year'].astype(int)


In [238]:
annual_delivery_df = annual_delivery_df.rename(columns={'Number delivered':'Total number delivered'})
annual_delivery_df.head()

Unnamed: 0,Recipient,Supplier,Year of order,Number ordered,Weapon designation,Weapon description,Total number delivered,Year(s) of delivery,status,Comments,...,SIPRI TIV for total order,SIPRI TIV of delivered weapons,Category,Sub-Category,Num years Delivered In,First Year,Last Year,year_delivered,num_delivered_in_year,SIPRI_TIV_delivered_in_year
0,Afghanistan,Brazil,2017,6.0,EMB-314 Super Tucano,trainer/combat aircraft,6.0,2018,New,A-29B version; financed by USA; ordered via US...,...,27.0,27.0,Aircraft,Fighter/Combat Aircraft,1,2018,2018,2018,6.0,27.0
1,Afghanistan,Russia,2004,6.0,aircraft engine,aircraft engine,6.0,2005,New,Klimov TV-3-117 turboshaft; spare engines for ...,...,3.0,3.0,Specialized Equipment,Aircraft Engines,1,2005,2005,2005,6.0,3.0
2,Afghanistan,Soviet Union,1977,26.0,AI-25,turbofan,26.0,1977; 1978,New,For 26 L-39 trainer aircraft from Czechoslovak...,...,15.6,15.6,Specialized Equipment,Aircraft Engines,2,1977,1978,1977,13.0,7.8
2,Afghanistan,Soviet Union,1977,26.0,AI-25,turbofan,26.0,1977; 1978,New,For 26 L-39 trainer aircraft from Czechoslovak...,...,15.6,15.6,Specialized Equipment,Aircraft Engines,2,1977,1978,1978,13.0,7.8
3,Afghanistan,Soviet Union,1988,2300.0,R-17 Elbrus,SSM,2300.0,1988; 1989; 1990; 1991,New,Mainly for use against Mujahideen rebel forces,...,2875.0,2875.0,Missiles/Rockets/Bombs,Surface-to-Surface Missiles (SSM),4,1988,1991,1988,575.0,718.75


In [239]:
annual_delivery_df.to_csv('final_data/annual_delivery_data.csv')