**This is the simplified Opensantions data in csv format**

In [1]:
#import packages
import requests
import pandas as pd
import numpy as np
import datetime
import os
import re

# Retrieval of the data

In [2]:
#I can access the data via pandas very easily, output already in tabular form
#data = pd.read_csv('https://data.opensanctions.org/datasets/20240121/us_ofac_sdn/targets.simple.csv')
#data

In [3]:
#create urls for all relevant dates
date_list = pd.date_range(start='20230717',end='20231231',freq='D').strftime('%Y%m%d')
date_list

Index(['20230717', '20230718', '20230719', '20230720', '20230721', '20230722',
       '20230723', '20230724', '20230725', '20230726',
       ...
       '20231222', '20231223', '20231224', '20231225', '20231226', '20231227',
       '20231228', '20231229', '20231230', '20231231'],
      dtype='object', length=168)

In [4]:
#get all the urls for all the dates we need the data for
#make each day an individual list so we can compare and match them
websites = []

for i in date_list:
    test = 'https://data.opensanctions.org/datasets/'+(i)+'/default/targets.simple.csv'
    websites.append(test)
print(websites)

['https://data.opensanctions.org/datasets/20230717/default/targets.simple.csv', 'https://data.opensanctions.org/datasets/20230718/default/targets.simple.csv', 'https://data.opensanctions.org/datasets/20230719/default/targets.simple.csv', 'https://data.opensanctions.org/datasets/20230720/default/targets.simple.csv', 'https://data.opensanctions.org/datasets/20230721/default/targets.simple.csv', 'https://data.opensanctions.org/datasets/20230722/default/targets.simple.csv', 'https://data.opensanctions.org/datasets/20230723/default/targets.simple.csv', 'https://data.opensanctions.org/datasets/20230724/default/targets.simple.csv', 'https://data.opensanctions.org/datasets/20230725/default/targets.simple.csv', 'https://data.opensanctions.org/datasets/20230726/default/targets.simple.csv', 'https://data.opensanctions.org/datasets/20230727/default/targets.simple.csv', 'https://data.opensanctions.org/datasets/20230728/default/targets.simple.csv', 'https://data.opensanctions.org/datasets/20230729/d

#first check for missing days
for site in websites:
    try:
        response = requests.get(site)
        response.raise_for_status()  # Raises an HTTPError for bad response
    except requests.exceptions.RequestException as e:
        print(f"Error accessing {site}: {e}")
        continue

In [5]:
#now loop over the batches to get all the data and append the dataframes to one another
#this gives us a list of dataframes
#the loop must contain a date identifyer as a new column so each dataframe is marked with the retrieval date that is then needed to match the different days against each other
entities_list = []
date_pattern = r'/datasets/(\d{8})/'
for site in websites:
    response = requests.get(site)
    if response.status_code != 200:
        continue
    data = pd.read_csv(site, low_memory=False)
    match = re.search(date_pattern, site) #extract the date from the url
    if match:
        date = match.group(1)
        data['date_stamp'] = date #set an individual date stamp for each dataframe matching the url date
        #ata =  data.set_index('date_stamp') #set as an index
    entities_list.append(data)
res = pd.concat(entities_list)  # concatenate list of dataframes

In [6]:
res

Unnamed: 0,id,schema,name,aliases,birth_date,countries,addresses,identifiers,sanctions,phones,emails,dataset,first_seen,last_seen,last_change,date_stamp
0,NK-224TRezPqwzhQZ37exWxtX,Person,SANAVBARI NIKITENKO,,1992-06-28,ru;tj,,,"""Red Notice - participation in the activity of...",,,INTERPOL Red Notices,2023-05-06T06:10:15,2023-07-17T18:02:42,2023-06-06T16:26:42,20230717
1,NK-228ZdYZVXaZBSBgVwapnks,Company,"Private enterprise ""Master-SG""","""ПП """"МАГІСТАР-СГ"""""";""ПРИВАТНЕ ПІДПРИЄМСТВО """"...",,ua,"79034, Ukraine, Lviv region, Lviv city, str. N...",42206417,169/2021 - valid - 2021-04-15 - 2024-04-21;Ука...,,,Ukraine Consolidated State Registry;Ukraine NA...,2023-04-20T10:50:14,2023-07-17T18:13:52,2023-07-07T12:23:04,20230717
2,NK-228jBYSTdUSvbZvsKsiHh6,Company,"АКЦИОНЕРНОЕ ОБЩЕСТВО ""ЭЛЕКТРОАГРЕГАТ""","""Joint-stock company """"Electroagregat"""""";""Акци...",,ru,"305022, Russian Federation, Kursk region, Kurs...",1024600965531;1025400524313;4631005223;4632010...,192/2023 - valid - 2023-04-01 - 2033-04-01;Ука...,,,Russian Unified State Register of Legal Entiti...,2023-04-20T10:50:14,2023-07-17T18:13:52,2023-05-22T10:22:17,20230717
3,NK-22FBSypYXKBCkX2JoWwgrk,Company,"GRUPO MECANICA DEL VUELO SISTEMAS, S.A.U.","GRUPO MECÁNICA DEL VUELO SISTEMAS, S.A.U.;GRUP...",,es;vn,"ISAAC NEWTON 11, PARQUE TECNOLOGICO DE MADRID,...",A83135111,CROSS-DEBARMENT: WBG - 2021-03-02 - 2024-09-01...,,,African Development Bank Debarred Entities;Asi...,2023-04-20T12:18:15,2023-07-17T14:59:43,2023-04-20T12:52:02,20230717
4,NK-22HtK7WrxZ2sU3rmhz6PuZ,Person,Michael Kuajien,"KUAJIEN, Michael;Michael Kuajian;Michael Kuaji...",1979-01-01,ke;ss,Nairobi Kenya,,SDN List - Executive Order 13818 (Global Magni...,,,US OFAC Specially Designated Nationals (SDN) L...,2023-04-20T09:52:21,2023-07-17T18:13:53,2023-04-20T10:27:20,20230717
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
550942,wbdeb-811412,LegalEntity,MULTI-TECH CONSULT (PTY) LTD.,,,bw,"PLOT 20746, BLOCK 3, INDUSTRIAL, GABORONE, Bot...",,Fraudulent Practices - 2023 - 2027,,,WorldBank Debarred Providers,2023-12-06T15:03:01,2023-12-31T15:03:01,2023-12-06T15:03:01,20231231
550943,wbdeb-811413,LegalEntity,MR. PETER LAMBILEKI,,,bw,"PLOT 20746, BLOCK 3, INDUSTRIAL, GABORONE, Bot...",,Fraudulent Practices - 2023 - 2027,,,WorldBank Debarred Providers,2023-12-06T15:03:01,2023-12-31T15:03:01,2023-12-06T15:03:01,20231231
550944,wbdeb-816302,LegalEntity,LEI SITAO,,,cn,"NO.6 EPANG 1ST ROAD, WEIYANG DISTRICT, XI'AN C...",,Cross Debarment: ADB - 2020 - 2999-12-31,,,WorldBank Debarred Providers,2023-12-22T15:03:01,2023-12-31T15:03:01,2023-12-22T15:03:01,20231231
550945,wbdeb-83317,LegalEntity,SEYDOU IDANI,,,bf,,,Consultant Guidelines 1.25(a)(i) - 2004 - 2999...,,,WorldBank Debarred Providers,2023-04-20T12:18:15,2023-12-31T15:03:01,2023-04-20T12:18:15,20231231


# Match the dates against each other

In [7]:
#now match the different dates against each other and return an indicator per row for a new listing or delisting
# Sort the DataFrame based on date_stamp and id
res.sort_values(by=['id', 'date_stamp'], inplace=True)

# Identify new entries and deletions based on the index and columns you want to compare
new_entries = ~res.duplicated(subset=['id'], keep='first')
deletions = ~res.duplicated(subset=['id'], keep='last')

# Identify unchanged rows by checking for duplicates based on the id and date_stamp
unchanged_rows = ~new_entries & ~deletions

# Set the values of new_entry, deletion, and unchanged columns
res['new_entry'] = new_entries
res['deletion'] = deletions
res['unchanged'] = unchanged_rows

# Reset index for the final result
res.reset_index(drop=True, inplace=True)

In [8]:
res

Unnamed: 0,id,schema,name,aliases,birth_date,countries,addresses,identifiers,sanctions,phones,emails,dataset,first_seen,last_seen,last_change,date_stamp,new_entry,deletion,unchanged
0,NK-223yQP6hRaMuiALDCJ6xbY,Organization,"Limited Liability Company ""Zelinsky Group""","""Общество с ограниченной ответственностью """"Зе...",,ru,"115054, Russian Federation, Moscow, 57 Dubinin...",1187746408761;7725491052,"813/2023 - valid - 2023-12-07 - 2033-12-07;""Th...",,,Ukraine NABC Sanctions Tracker;Ukraine Nationa...,2023-12-08T12:30:01,2023-12-13T18:30:01,2023-12-13T09:22:01,20231213,True,False,False
1,NK-223yQP6hRaMuiALDCJ6xbY,Organization,"Limited Liability Company ""Zelinsky Group""","""Общество с ограниченной ответственностью """"Зе...",,ru,"115054, Russian Federation, Moscow, 57 Dubinin...",1187746408761;7725491052,"813/2023 - valid - 2023-12-07 - 2033-12-07;""Яв...",,,Ukraine NABC Sanctions Tracker;Ukraine Nationa...,2023-12-08T12:30:01,2023-12-14T18:30:01,2023-12-13T09:22:01,20231214,False,False,True
2,NK-223yQP6hRaMuiALDCJ6xbY,Organization,"Limited Liability Company ""Zelinsky Group""","""Общество с ограниченной ответственностью """"Зе...",,ru,"115054, Russian Federation, Moscow, 57 Dubinin...",1187746408761;7725491052,"813/2023 - valid - 2023-12-07 - 2033-12-07;""Th...",,,Ukraine NABC Sanctions Tracker;Ukraine Nationa...,2023-12-08T12:30:01,2023-12-15T18:30:01,2023-12-13T09:22:01,20231215,False,False,True
3,NK-223yQP6hRaMuiALDCJ6xbY,Organization,"Limited Liability Company ""Zelinsky Group""","""Общество с ограниченной ответственностью """"Зе...",,ru,"115054, Russian Federation, Moscow, 57 Dubinin...",1187746408761;7725491052,"813/2023 - valid - 2023-12-07 - 2033-12-07;""Th...",,,Ukraine NABC Sanctions Tracker;Ukraine Nationa...,2023-12-08T12:30:01,2023-12-16T18:30:01,2023-12-13T09:22:01,20231216,False,False,True
4,NK-223yQP6hRaMuiALDCJ6xbY,Organization,"Limited Liability Company ""Zelinsky Group""","""Общество с ограниченной ответственностью """"Зе...",,ru,"115054, Russian Federation, Moscow, 57 Dubinin...",1187746408761;7725491052,"813/2023 - valid - 2023-12-07 - 2033-12-07;""Є ...",,,Ukraine NABC Sanctions Tracker;Ukraine Nationa...,2023-12-08T12:30:01,2023-12-17T18:30:01,2023-12-13T09:22:01,20231217,False,False,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
70256000,zafic-710-renel-destina,Person,RENEL DESTINA,,,ht,,,"""""",,,South African Targeted Financial Sanctions,2023-12-09T12:45:01,2023-12-10T18:45:01,2023-12-09T12:45:01,20231210,False,True,False
70256001,zafic-711-wilson-joseph,Person,WILSON JOSEPH,,,ht,,,"""""",,,South African Targeted Financial Sanctions,2023-12-09T12:45:01,2023-12-09T18:45:01,2023-12-09T12:45:01,20231209,True,False,False
70256002,zafic-711-wilson-joseph,Person,WILSON JOSEPH,,,ht,,,"""""",,,South African Targeted Financial Sanctions,2023-12-09T12:45:01,2023-12-10T18:45:01,2023-12-09T12:45:01,20231210,False,True,False
70256003,zafic-712-vitelhomme-innocent,Person,VITELHOMME INNOCENT,,,ht,"64, Soisson, Tabarre 49 i Port-au-Prince Haiti","National Identification Number, Haiti 004-341-...","""""",,,South African Targeted Financial Sanctions,2023-12-09T12:45:01,2023-12-09T18:45:01,2023-12-09T12:45:01,20231209,True,False,False


In [9]:
#test thematching function
test = res.query('new_entry == True')
test

Unnamed: 0,id,schema,name,aliases,birth_date,countries,addresses,identifiers,sanctions,phones,emails,dataset,first_seen,last_seen,last_change,date_stamp,new_entry,deletion,unchanged
0,NK-223yQP6hRaMuiALDCJ6xbY,Organization,"Limited Liability Company ""Zelinsky Group""","""Общество с ограниченной ответственностью """"Зе...",,ru,"115054, Russian Federation, Moscow, 57 Dubinin...",1187746408761;7725491052,"813/2023 - valid - 2023-12-07 - 2033-12-07;""Th...",,,Ukraine NABC Sanctions Tracker;Ukraine Nationa...,2023-12-08T12:30:01,2023-12-13T18:30:01,2023-12-13T09:22:01,20231213,True,False,False
19,NK-224TRezPqwzhQZ37exWxtX,Person,SANAVBARI NIKITENKO,,1992-06-28,ru;tj,,,"""Red Notice - participation in the activity of...",,,INTERPOL Red Notices,2023-05-06T06:10:15,2023-07-17T18:02:42,2023-06-06T16:26:42,20230717,True,False,False
187,NK-226GXBdQ5p6NjgrTpTQNVW,Company,"Открытое акционерное общество ""Электростальски...","""Open Joint-Stock Company """"Elektrostal Chemic...",,ru,"144001, Russian, Federation, Moscow region, м....",1025007108390;5053002307;505301001,"""ВАТ """"ЕХМЗ ім. М.Д. Зелінського"""" розробляє т...",,,"Russian National Settlement Depository (NSD, I...",2022-01-01T00:00:00,2023-12-08T18:30:01,2023-12-08T12:30:01,20231208,True,False,False
211,NK-228ZdYZVXaZBSBgVwapnks,Company,"Private enterprise ""Master-SG""","""ПП """"МАГІСТАР-СГ"""""";""ПРИВАТНЕ ПІДПРИЄМСТВО """"...",,ua,"79034, Ukraine, Lviv region, Lviv city, str. N...",42206417,169/2021 - valid - 2021-04-15 - 2024-04-21;Ука...,,,Ukraine Consolidated State Registry;Ukraine NA...,2023-04-20T10:50:14,2023-07-17T18:13:52,2023-07-07T12:23:04,20230717,True,False,False
379,NK-228jBYSTdUSvbZvsKsiHh6,Company,"АКЦИОНЕРНОЕ ОБЩЕСТВО ""ЭЛЕКТРОАГРЕГАТ""","""Joint-stock company """"Electroagregat"""""";""Акци...",,ru,"305022, Russian Federation, Kursk region, Kurs...",1024600965531;1025400524313;4631005223;4632010...,192/2023 - valid - 2023-04-01 - 2033-04-01;Ука...,,,Russian Unified State Register of Legal Entiti...,2023-04-20T10:50:14,2023-07-17T18:13:52,2023-05-22T10:22:17,20230717,True,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
70255996,zafic-708-protogene-ruvugayimikore,Person,PROTOGÈNE RUVUGAYIMIKORE,,,rw,Not Known Nyiragongo North Kivu Democratic Rep...,,"""""",,,South African Targeted Financial Sanctions,2023-10-27T06:45:01,2023-10-27T18:45:01,2023-10-27T06:45:01,20231027,True,True,False
70255997,zafic-709-johnson-andre,Person,JOHNSON ANDRE,,,ht,,,"""""",,,South African Targeted Financial Sanctions,2023-12-09T12:45:01,2023-12-09T18:45:01,2023-12-09T12:45:01,20231209,True,False,False
70255999,zafic-710-renel-destina,Person,RENEL DESTINA,,,ht,,,"""""",,,South African Targeted Financial Sanctions,2023-12-09T12:45:01,2023-12-09T18:45:01,2023-12-09T12:45:01,20231209,True,False,False
70256001,zafic-711-wilson-joseph,Person,WILSON JOSEPH,,,ht,,,"""""",,,South African Targeted Financial Sanctions,2023-12-09T12:45:01,2023-12-09T18:45:01,2023-12-09T12:45:01,20231209,True,False,False


# Deduplicate

In [10]:
res_deduplicated = res.copy()
res_deduplicated.sort_values(by=['id', 'date_stamp'], inplace=True)

# Identify new entries and deletions based on the index and columns you want to compare
new_entries = ~res_deduplicated.duplicated(subset=['id'], keep='first')
deletions = ~res_deduplicated.duplicated(subset=['id'], keep='last')

# Identify unchanged rows by checking for duplicates based on the id and date_stamp
unchanged_rows = ~new_entries & ~deletions

# Create new columns for first seen and last seen dates
res_deduplicated['listing_date'] = res_deduplicated['date_stamp'].where(new_entries)
res_deduplicated['delisting_date'] = res_deduplicated['date_stamp'].where(deletions)

# Add a 'month' column
res_deduplicated['month'] = pd.to_datetime(res_deduplicated['date_stamp']).dt.to_period('M')

# Deduplicate the entries (keep the first occurrence for each entity)
res_deduplicated = res_deduplicated.drop_duplicates(subset=['id'], keep='first')

# Reset index for the final result
res_deduplicated.reset_index(drop=True, inplace=True)

# Print or further analyze the deduplicated DataFrame 'res_deduplicated'
print(res_deduplicated)

                                        id        schema  \
0                NK-223yQP6hRaMuiALDCJ6xbY  Organization   
1                NK-224TRezPqwzhQZ37exWxtX        Person   
2                NK-226GXBdQ5p6NjgrTpTQNVW       Company   
3                NK-228ZdYZVXaZBSBgVwapnks       Company   
4                NK-228jBYSTdUSvbZvsKsiHh6       Company   
...                                    ...           ...   
599860  zafic-708-protogene-ruvugayimikore        Person   
599861             zafic-709-johnson-andre        Person   
599862             zafic-710-renel-destina        Person   
599863             zafic-711-wilson-joseph        Person   
599864       zafic-712-vitelhomme-innocent        Person   

                                                     name  \
0              Limited Liability Company "Zelinsky Group"   
1                                     SANAVBARI NIKITENKO   
2       Открытое акционерное общество "Электростальски...   
3                          Private 

In [11]:
res_deduplicated

Unnamed: 0,id,schema,name,aliases,birth_date,countries,addresses,identifiers,sanctions,phones,...,first_seen,last_seen,last_change,date_stamp,new_entry,deletion,unchanged,listing_date,delisting_date,month
0,NK-223yQP6hRaMuiALDCJ6xbY,Organization,"Limited Liability Company ""Zelinsky Group""","""Общество с ограниченной ответственностью """"Зе...",,ru,"115054, Russian Federation, Moscow, 57 Dubinin...",1187746408761;7725491052,"813/2023 - valid - 2023-12-07 - 2033-12-07;""Th...",,...,2023-12-08T12:30:01,2023-12-13T18:30:01,2023-12-13T09:22:01,20231213,True,False,False,20231213,,2023-12
1,NK-224TRezPqwzhQZ37exWxtX,Person,SANAVBARI NIKITENKO,,1992-06-28,ru;tj,,,"""Red Notice - participation in the activity of...",,...,2023-05-06T06:10:15,2023-07-17T18:02:42,2023-06-06T16:26:42,20230717,True,False,False,20230717,,2023-07
2,NK-226GXBdQ5p6NjgrTpTQNVW,Company,"Открытое акционерное общество ""Электростальски...","""Open Joint-Stock Company """"Elektrostal Chemic...",,ru,"144001, Russian, Federation, Moscow region, м....",1025007108390;5053002307;505301001,"""ВАТ """"ЕХМЗ ім. М.Д. Зелінського"""" розробляє т...",,...,2022-01-01T00:00:00,2023-12-08T18:30:01,2023-12-08T12:30:01,20231208,True,False,False,20231208,,2023-12
3,NK-228ZdYZVXaZBSBgVwapnks,Company,"Private enterprise ""Master-SG""","""ПП """"МАГІСТАР-СГ"""""";""ПРИВАТНЕ ПІДПРИЄМСТВО """"...",,ua,"79034, Ukraine, Lviv region, Lviv city, str. N...",42206417,169/2021 - valid - 2021-04-15 - 2024-04-21;Ука...,,...,2023-04-20T10:50:14,2023-07-17T18:13:52,2023-07-07T12:23:04,20230717,True,False,False,20230717,,2023-07
4,NK-228jBYSTdUSvbZvsKsiHh6,Company,"АКЦИОНЕРНОЕ ОБЩЕСТВО ""ЭЛЕКТРОАГРЕГАТ""","""Joint-stock company """"Electroagregat"""""";""Акци...",,ru,"305022, Russian Federation, Kursk region, Kurs...",1024600965531;1025400524313;4631005223;4632010...,192/2023 - valid - 2023-04-01 - 2033-04-01;Ука...,,...,2023-04-20T10:50:14,2023-07-17T18:13:52,2023-05-22T10:22:17,20230717,True,False,False,20230717,,2023-07
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
599860,zafic-708-protogene-ruvugayimikore,Person,PROTOGÈNE RUVUGAYIMIKORE,,,rw,Not Known Nyiragongo North Kivu Democratic Rep...,,"""""",,...,2023-10-27T06:45:01,2023-10-27T18:45:01,2023-10-27T06:45:01,20231027,True,True,False,20231027,20231027,2023-10
599861,zafic-709-johnson-andre,Person,JOHNSON ANDRE,,,ht,,,"""""",,...,2023-12-09T12:45:01,2023-12-09T18:45:01,2023-12-09T12:45:01,20231209,True,False,False,20231209,,2023-12
599862,zafic-710-renel-destina,Person,RENEL DESTINA,,,ht,,,"""""",,...,2023-12-09T12:45:01,2023-12-09T18:45:01,2023-12-09T12:45:01,20231209,True,False,False,20231209,,2023-12
599863,zafic-711-wilson-joseph,Person,WILSON JOSEPH,,,ht,,,"""""",,...,2023-12-09T12:45:01,2023-12-09T18:45:01,2023-12-09T12:45:01,20231209,True,False,False,20231209,,2023-12


# Descriptives and clean up

In [12]:
#the countries are coded as iso2 codes -> transform to iso 3
#keep both countries? could search for "ru" over regex functions
print(res_deduplicated.countries.unique())
print(res_deduplicated.schema.unique())
print(res_deduplicated.listing_date.unique())

['ru' 'ru;tj' 'ua' ... 'ch;nl' 'es;no' 'um']
['Organization' 'Person' 'Company' 'LegalEntity' 'Airplane' 'Vessel'
 'Security' 'CryptoWallet']
['20231213' '20230717' '20231208' '20230721' '20230912' '20231206'
 '20230830' '20230722' '20230916' '20231121' '20231027' '20231106'
 '20231223' '20231214' '20230921' '20230915' '20231019' '20230904'
 '20230807' '20230817' '20231020' '20231120' '20231026' '20231212'
 '20231123' '20231128' '20231112' '20231218' '20231130' '20230911'
 '20231201' '20230924' '20231007' '20231008' '20230928' '20231101'
 '20230815' '20230902' '20231211' '20231217' '20230925' '20231109'
 '20231016' '20230723' '20231221' '20230824' '20231222' '20230729'
 '20231117' '20230923' '20231110' '20230918' '20230803' '20231118'
 '20231229' '20230825' '20231207' '20231129' '20231119' '20230818'
 '20230808' '20230930' '20231220' '20230821' '20231031' '20230917'
 '20231127' '20230919' '20231005' '20231010' '20230929' '20230801'
 '20230908' '20231108' '20230901' '20230724' '20230814

In [13]:
list_regimes = list[res_deduplicated.sanctions.unique()]
print(list_regimes)
#but it doesnt show me all

list[array(['813/2023 - valid - 2023-12-07 - 2033-12-07;"The company is the management company of Zelinsky Group Corporation, which is the main supplier of personal and collective protection equipment for the Russian army, the Russian Ministry of Defence, the Ministry of Internal Affairs, the Ministry of Emergency Situations, the Russian Guard, the Roscosmos State Corporation and Russian military-industrial complex enterprises (respirators, gas masks, chemical components of protective equipment, etc.). In addition, the company\'s activities are a source of significant revenue for the budget of the Russian Federation, and thus for the conduct of the aggressive war. in 2021, the company paid RUB 64,723,574 in taxes. Thus, the company\'s activities pose real threats to the national interests, national security, sovereignty and territorial integrity of Ukraine, facilitate terrorist activities and/or violate human and civil liberties, interests of society and the state, lead to the occupati

In [14]:
#get missingness
is_null = res_deduplicated.isnull().sum()
display(is_null)

id                     0
schema                 0
name                   0
aliases           406424
birth_date        408209
countries          30788
addresses         457545
identifiers       254687
sanctions         429475
phones            593239
emails            588881
dataset                0
first_seen        154317
last_seen              0
last_change            1
date_stamp             0
new_entry              0
deletion               0
unchanged              0
listing_date           0
delisting_date    595519
month                  0
dtype: int64

In [15]:
res_deduplicated.columns

Index(['id', 'schema', 'name', 'aliases', 'birth_date', 'countries',
       'addresses', 'identifiers', 'sanctions', 'phones', 'emails', 'dataset',
       'first_seen', 'last_seen', 'last_change', 'date_stamp', 'new_entry',
       'deletion', 'unchanged', 'listing_date', 'delisting_date', 'month'],
      dtype='object')

In [16]:
res_deduplicated = res_deduplicated.drop(['last_seen', 'first_seen','new_entry', 'deletion','unchanged'],axis = 1)

In [17]:
res_deduplicated.to_csv("default_batch5.csv")