**This is the simplified Opensantions data in csv format**

In [1]:
#import packages
import requests
import pandas as pd
import numpy as np
import datetime
import os
import re

# Retrieval of the data

In [2]:
#I can access the data via pandas very easily, output already in tabular form
#data = pd.read_csv('https://data.opensanctions.org/datasets/20240121/us_ofac_sdn/targets.simple.csv')
#data

In [3]:
#create urls for all relevant dates
date_list = pd.date_range(start='20210820',end='20231231',freq='D').strftime('%Y%m%d')
date_list

Index(['20210820', '20210821', '20210822', '20210823', '20210824', '20210825',
       '20210826', '20210827', '20210828', '20210829',
       ...
       '20231222', '20231223', '20231224', '20231225', '20231226', '20231227',
       '20231228', '20231229', '20231230', '20231231'],
      dtype='object', length=864)

In [4]:
#get all the urls for all the dates we need the data for
#make each day an individual list so we can compare and match them
websites = []

for i in date_list:
    test = 'https://data.opensanctions.org/datasets/'+(i)+'/us_ofac_cons/targets.simple.csv'
    websites.append(test)
print(websites)

['https://data.opensanctions.org/datasets/20210820/us_ofac_cons/targets.simple.csv', 'https://data.opensanctions.org/datasets/20210821/us_ofac_cons/targets.simple.csv', 'https://data.opensanctions.org/datasets/20210822/us_ofac_cons/targets.simple.csv', 'https://data.opensanctions.org/datasets/20210823/us_ofac_cons/targets.simple.csv', 'https://data.opensanctions.org/datasets/20210824/us_ofac_cons/targets.simple.csv', 'https://data.opensanctions.org/datasets/20210825/us_ofac_cons/targets.simple.csv', 'https://data.opensanctions.org/datasets/20210826/us_ofac_cons/targets.simple.csv', 'https://data.opensanctions.org/datasets/20210827/us_ofac_cons/targets.simple.csv', 'https://data.opensanctions.org/datasets/20210828/us_ofac_cons/targets.simple.csv', 'https://data.opensanctions.org/datasets/20210829/us_ofac_cons/targets.simple.csv', 'https://data.opensanctions.org/datasets/20210830/us_ofac_cons/targets.simple.csv', 'https://data.opensanctions.org/datasets/20210831/us_ofac_cons/targets.simp

#first check for missing days
for site in websites:
    try:
        response = requests.get(site)
        response.raise_for_status()  # Raises an HTTPError for bad response
    except requests.exceptions.RequestException as e:
        print(f"Error accessing {site}: {e}")
        continue

In [5]:
#now loop over the batches to get all the data and append the dataframes to one another
#this gives us a list of dataframes
#the loop must contain a date identifyer as a new column so each dataframe is marked with the retrieval date that is then needed to match the different days against each other
entities_list = []
date_pattern = r'/datasets/(\d{8})/'
for site in websites:
    response = requests.get(site)
    if response.status_code != 200:
        continue
    data = pd.read_csv(site)
    match = re.search(date_pattern, site) #extract the date from the url
    if match:
        date = match.group(1)
        data['date_stamp'] = date #set an individual date stamp for each dataframe matching the url date
        #ata =  data.set_index('date_stamp') #set as an index
    entities_list.append(data)
res = pd.concat(entities_list)  # concatenate list of dataframes

# Match the dates against each other

In [6]:
res

Unnamed: 0,id,schema,name,aliases,birth_date,countries,addresses,identifiers,sanctions,phones,emails,dataset,last_seen,first_seen,date_stamp,last_change
0,ofac-cons-15268,Organization,BANK OF KUNLUN CO LTD,KARAMAY CITY COMMERCIAL BANK CO LTD.;KARAMAY U...,,cn,"172 Xibin Rd, Ranghulu District, (Daqing, Heil...",CKLBCNBJ,CAPTA List - 561 Full Restrictions - Program -...,,,US OFAC Consolidated (non-SDN) List,2021-07-26 11:55:45,2021-08-20 20:39:54,20210820,
1,ofac-cons-17013,Organization,VTB BANK OAO,"BANK VNESHNEY TORGOVLI ROSSIYSKOY FEDERATSII, ...",,ru,"29, Bolshaya Morskaya str., St. Petersburg, Ru...",1027739609391;7702070139;VTBRRUMM,Consolidated List - 13662 Sectoral Directive 1...,,,US OFAC Consolidated (non-SDN) List,2021-07-26 11:55:45,2021-08-20 20:39:54,20210820,
2,ofac-cons-17014,Organization,Russian Agricultural Bank,OTKRYTOE AKTSIONERNOE ROSSISKI SELSKOKHOZYAIST...,,ru,"3 Gagarinsky per., Moscow, Russia, 119034;3, G...",1027700342890;RUAGRUMM,Consolidated List - 13662 Sectoral Directive 1...,,office@rshb.ru,US OFAC Consolidated (non-SDN) List,2021-07-26 11:55:45,2021-08-20 20:39:54,20210820,
3,ofac-cons-17015,Company,Vnesheconombank,Bank Razvitiya I Vneshneekonomicheskoi Deyatel...,,ru,"9 Akademika Sakharova prospekt, Moscow, Russia...",BFEARUMM,Consolidated List - 13662 Sectoral Directive 1...,,info@veb.ru,US OFAC Consolidated (non-SDN) List,2021-07-26 11:55:45,2021-08-20 20:39:54,20210820,
4,ofac-cons-17016,Organization,GAZPROMBANK OAO,"GPB, OAO;GPB, OJSC;Gazprombank Gas Industry OJ...",,ru,"16, Building 1, Nametkina St., Moscow, Russia,...",1027700167110;7744001497;GAZPRUMM,Consolidated List - 13662 Sectoral Directive 1...,,mailbox@gazprombank.ru,US OFAC Consolidated (non-SDN) List,2021-07-26 11:55:45,2021-08-20 20:39:54,20210820,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
439,Q400735,Person,Ahmad SA'DAT,,,,,,Consolidated List - General License 4 Under EO...,,,US OFAC Consolidated (non-SDN) List,2023-12-31T22:35:01,2023-04-20T10:56:14,20231231,2023-04-20T10:56:14
440,Q428203,Person,Jamil MAJDALAWI,,,,,,Consolidated List - General License 4 Under EO...,,,US OFAC Consolidated (non-SDN) List,2023-12-31T22:35:01,2023-04-20T10:56:14,20231231,2023-04-20T10:56:14
441,Q465542,Person,Mohammed ABU TEIR,Mohammad Mahmoud ABOU TAYR;Mohammed Mahmud ABU...,1951,,,,Consolidated List - General License 4 Under EO...,,,US OFAC Consolidated (non-SDN) List,2023-12-31T22:35:01,2023-04-20T10:56:14,20231231,2023-04-20T10:56:14
442,Q53641533,Person,Salah AL-BARDAWIL,Salah Mohammad EL BARDAWIL,1959,,,,Consolidated List - General License 4 Under EO...,,,US OFAC Consolidated (non-SDN) List,2023-12-31T22:35:01,2023-04-20T10:56:14,20231231,2023-04-20T10:56:14


In [7]:
#now match the different dates against each other and return an indicator per row for a new listing or delisting
# Sort the DataFrame based on date_stamp and id
res.sort_values(by=['id', 'date_stamp'], inplace=True)

# Identify new entries and deletions based on the index and columns you want to compare
new_entries = ~res.duplicated(subset=['id'], keep='first')
deletions = ~res.duplicated(subset=['id'], keep='last')

# Identify unchanged rows by checking for duplicates based on the id and date_stamp
unchanged_rows = ~new_entries & ~deletions

# Set the values of new_entry, deletion, and unchanged columns
res['new_entry'] = new_entries
res['deletion'] = deletions
res['unchanged'] = unchanged_rows

# Reset index for the final result
res.reset_index(drop=True, inplace=True)

In [8]:
res

Unnamed: 0,id,schema,name,aliases,birth_date,countries,addresses,identifiers,sanctions,phones,emails,dataset,last_seen,first_seen,date_stamp,last_change,new_entry,deletion,unchanged
0,NK-28ZcFDmHBF9L3WkDBBwH6H,Company,GAZPROMBANK LEASING ZAO,CLOSED JOINT-STOCK COMPANY GAZPROMBANK LIZING,,ru,"D.40 Ulitsa Miklukho-Maklaya, Moscow, 117342;P...",1037728033606;7728294503,13662 Sectoral Directive 1 - Sectoral Sanction...,,,US OFAC Consolidated (non-SDN) List,2021-09-26 14:52:11,2021-09-28 03:03:19,20210928,,True,False,False
1,NK-28ZcFDmHBF9L3WkDBBwH6H,Company,GAZPROMBANK LEASING ZAO,CLOSED JOINT-STOCK COMPANY GAZPROMBANK LIZING,,ru,"D.40 Ulitsa Miklukho-Maklaya, Moscow, 117342;P...",1037728033606;7728294503,13662 Sectoral Directive 1 - Program - Consoli...,,,US OFAC Consolidated (non-SDN) List,2021-09-26 14:52:11,2021-09-29 03:02:51,20210929,,False,False,True
2,NK-28ZcFDmHBF9L3WkDBBwH6H,Company,GAZPROMBANK LEASING ZAO,CLOSED JOINT-STOCK COMPANY GAZPROMBANK LIZING,,ru,"D.40 Ulitsa Miklukho-Maklaya, Moscow, 117342;P...",1037728033606;7728294503,Program - 13662 Sectoral Directive 1 - Consoli...,,,US OFAC Consolidated (non-SDN) List,2021-09-30 11:39:21,2021-09-30 11:39:21,20210930,,False,False,True
3,NK-28ZcFDmHBF9L3WkDBBwH6H,Company,GAZPROMBANK LEASING ZAO,CLOSED JOINT-STOCK COMPANY GAZPROMBANK LIZING,,ru,"D.40 Ulitsa Miklukho-Maklaya, Moscow, 117342;P...",1037728033606;7728294503,Consolidated List - Program - 13662 Sectoral D...,,,US OFAC Consolidated (non-SDN) List,2021-09-30 11:39:21,2021-10-01 03:02:43,20211001,,False,False,True
4,NK-28ZcFDmHBF9L3WkDBBwH6H,Company,GAZPROMBANK LEASING ZAO,CLOSED JOINT-STOCK COMPANY GAZPROMBANK LIZING,,ru,"D.40 Ulitsa Miklukho-Maklaya, Moscow, 117342;P...",1037728033606;7728294503,13662 Sectoral Directive 1 - Program - Consoli...,,,US OFAC Consolidated (non-SDN) List,2021-09-30 11:39:21,2021-10-02 03:03:02,20211002,,False,False,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
382195,ofac-cons-9716,Person,Qais Abdul KARIM,,,,,,Consolidated List - Reject - Program - General...,,,US OFAC Consolidated (non-SDN) List,2021-07-26 11:55:45,2021-09-24 03:02:55,20210925,,False,False,True
382196,ofac-cons-9716,Person,Qais Abdul KARIM,,,,,,Consolidated List - Program - Reject - General...,,,US OFAC Consolidated (non-SDN) List,2021-09-26 14:52:11,2021-09-26 14:52:11,20210926,,False,False,True
382197,ofac-cons-9716,Person,Qais Abdul KARIM,,,,,,Program - Reject - Consolidated List - General...,,,US OFAC Consolidated (non-SDN) List,2021-09-26 14:52:11,2021-09-27 09:09:30,20210927,,False,False,True
382198,ofac-cons-9716,Person,Qais Abdul KARIM,,,,,,Program - Consolidated List - Reject - General...,,,US OFAC Consolidated (non-SDN) List,2021-09-26 14:52:11,2021-09-28 03:03:19,20210928,,False,False,True


In [9]:
#test thematching function
test = res.query('new_entry == True')
test

Unnamed: 0,id,schema,name,aliases,birth_date,countries,addresses,identifiers,sanctions,phones,emails,dataset,last_seen,first_seen,date_stamp,last_change,new_entry,deletion,unchanged
0,NK-28ZcFDmHBF9L3WkDBBwH6H,Company,GAZPROMBANK LEASING ZAO,CLOSED JOINT-STOCK COMPANY GAZPROMBANK LIZING,,ru,"D.40 Ulitsa Miklukho-Maklaya, Moscow, 117342;P...",1037728033606;7728294503,13662 Sectoral Directive 1 - Sectoral Sanction...,,,US OFAC Consolidated (non-SDN) List,2021-09-26 14:52:11,2021-09-28 03:03:19,20210928,,True,False,False
822,NK-2BsamqL437nSYMzH9KHktg,Organization,"SZ DJI Technology Co., Ltd.",深圳市大疆创新科技有限公司,,cn,"14 F, West Block of Skyworth Semiconductor Des...",914403007954257495,Consolidated List - Program - CMIC Executive O...,,,US OFAC Consolidated (non-SDN) List,2021-12-17 03:03:07,2022-03-19 18:17:19,20220319,,True,False,False
1473,NK-2FBeAKkRfBHbfVLpuEpNZv,Person,Yahia Abdul Aziz AL-ABADSAH,Yehia Abdul Aziz EL ABADSA,1958,ps,Khan Younis,,Reject - Program - Consolidated List - General...,,,US OFAC Consolidated (non-SDN) List,2021-09-30 11:39:21,2021-09-30 11:39:21,20210930,,True,False,False
2293,NK-2FbKDZw9xDh29GoQicYG8f,Company,SBERBANK INSURANCE COMPANY LTD,LLC INSURANCE COMPANY SBERBANK INSURANCE;OBSHC...,,ru,"42 Bolshaya Yakimanka St., b. 1-2, office 209,...",1147746683479,13662 Sectoral Directive 1 - Consolidated List...,,,US OFAC Consolidated (non-SDN) List,2021-07-26 11:55:45,2021-09-18 12:23:43,20210918,,True,False,False
3125,NK-2JCKKtyfmLdhLbRRvconU8,Company,KORUS CONSULTING CIS LIMITED LIABILITY COMPANY,"KORUS KONSALTING SNG, OOO;LLL KORUS CONSULTING...",,ru,"68 Sampsonievsky Avenue, letter N, Room 1N, Sa...",1057812752502,13662 Sectoral Directive 1 - Sectoral Sanction...,,,US OFAC Consolidated (non-SDN) List,2021-09-26 14:52:11,2021-09-28 03:03:19,20210928,,True,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
381995,ofac-cons-9712,Person,Mohammed SHIHAB,Mohamed Abed Hadi Rahman SHEHAB,1956-01-01;1956-12-31,,undetermined,,Non-SDN Palestinian Legislative Council List -...,,,US OFAC Consolidated (non-SDN) List,2021-07-26 11:55:45,2021-08-20 20:39:54,20210820,,True,False,False
382036,ofac-cons-9713,Person,Ahmad SA'DAT,,,,undetermined,,Non-SDN Palestinian Legislative Council List -...,,,US OFAC Consolidated (non-SDN) List,2021-07-26 11:55:45,2021-08-20 20:39:54,20210820,,True,False,False
382077,ofac-cons-9714,Person,Jamil MAJDALAWI,,,,undetermined,,Non-SDN Palestinian Legislative Council List -...,,,US OFAC Consolidated (non-SDN) List,2021-07-26 11:55:45,2021-08-20 20:39:54,20210820,,True,False,False
382118,ofac-cons-9715,Person,Khaledah JARRAR,,,,undetermined,,Non-SDN Palestinian Legislative Council List -...,,,US OFAC Consolidated (non-SDN) List,2021-07-26 11:55:45,2021-08-20 20:39:54,20210820,,True,False,False


# Deduplicate

In [10]:
res_deduplicated = res.copy()
res_deduplicated.sort_values(by=['id', 'date_stamp'], inplace=True)

# Identify new entries and deletions based on the index and columns you want to compare
new_entries = ~res_deduplicated.duplicated(subset=['id'], keep='first')
deletions = ~res_deduplicated.duplicated(subset=['id'], keep='last')

# Identify unchanged rows by checking for duplicates based on the id and date_stamp
unchanged_rows = ~new_entries & ~deletions

# Create new columns for first seen and last seen dates
res_deduplicated['listing_date'] = res_deduplicated['date_stamp'].where(new_entries)
res_deduplicated['delisting_date'] = res_deduplicated['date_stamp'].where(deletions)

# Add a 'month' column
res_deduplicated['month'] = pd.to_datetime(res_deduplicated['date_stamp']).dt.to_period('M')

# Deduplicate the entries (keep the first occurrence for each entity)
res_deduplicated = res_deduplicated.drop_duplicates(subset=['id'], keep='first')

# Reset index for the final result
res_deduplicated.reset_index(drop=True, inplace=True)

# Print or further analyze the deduplicated DataFrame 'res_deduplicated'
print(res_deduplicated)

                            id        schema  \
0    NK-28ZcFDmHBF9L3WkDBBwH6H       Company   
1    NK-2BsamqL437nSYMzH9KHktg  Organization   
2    NK-2FBeAKkRfBHbfVLpuEpNZv        Person   
3    NK-2FbKDZw9xDh29GoQicYG8f       Company   
4    NK-2JCKKtyfmLdhLbRRvconU8       Company   
..                         ...           ...   
951             ofac-cons-9712        Person   
952             ofac-cons-9713        Person   
953             ofac-cons-9714        Person   
954             ofac-cons-9715        Person   
955             ofac-cons-9716        Person   

                                               name  \
0                           GAZPROMBANK LEASING ZAO   
1                       SZ DJI Technology Co., Ltd.   
2                       Yahia Abdul Aziz AL-ABADSAH   
3                    SBERBANK INSURANCE COMPANY LTD   
4    KORUS CONSULTING CIS LIMITED LIABILITY COMPANY   
..                                              ...   
951                                 Mo

In [11]:
res_deduplicated

Unnamed: 0,id,schema,name,aliases,birth_date,countries,addresses,identifiers,sanctions,phones,...,last_seen,first_seen,date_stamp,last_change,new_entry,deletion,unchanged,listing_date,delisting_date,month
0,NK-28ZcFDmHBF9L3WkDBBwH6H,Company,GAZPROMBANK LEASING ZAO,CLOSED JOINT-STOCK COMPANY GAZPROMBANK LIZING,,ru,"D.40 Ulitsa Miklukho-Maklaya, Moscow, 117342;P...",1037728033606;7728294503,13662 Sectoral Directive 1 - Sectoral Sanction...,,...,2021-09-26 14:52:11,2021-09-28 03:03:19,20210928,,True,False,False,20210928,,2021-09
1,NK-2BsamqL437nSYMzH9KHktg,Organization,"SZ DJI Technology Co., Ltd.",深圳市大疆创新科技有限公司,,cn,"14 F, West Block of Skyworth Semiconductor Des...",914403007954257495,Consolidated List - Program - CMIC Executive O...,,...,2021-12-17 03:03:07,2022-03-19 18:17:19,20220319,,True,False,False,20220319,,2022-03
2,NK-2FBeAKkRfBHbfVLpuEpNZv,Person,Yahia Abdul Aziz AL-ABADSAH,Yehia Abdul Aziz EL ABADSA,1958,ps,Khan Younis,,Reject - Program - Consolidated List - General...,,...,2021-09-30 11:39:21,2021-09-30 11:39:21,20210930,,True,False,False,20210930,,2021-09
3,NK-2FbKDZw9xDh29GoQicYG8f,Company,SBERBANK INSURANCE COMPANY LTD,LLC INSURANCE COMPANY SBERBANK INSURANCE;OBSHC...,,ru,"42 Bolshaya Yakimanka St., b. 1-2, office 209,...",1147746683479,13662 Sectoral Directive 1 - Consolidated List...,,...,2021-07-26 11:55:45,2021-09-18 12:23:43,20210918,,True,False,False,20210918,,2021-09
4,NK-2JCKKtyfmLdhLbRRvconU8,Company,KORUS CONSULTING CIS LIMITED LIABILITY COMPANY,"KORUS KONSALTING SNG, OOO;LLL KORUS CONSULTING...",,ru,"68 Sampsonievsky Avenue, letter N, Room 1N, Sa...",1057812752502,13662 Sectoral Directive 1 - Sectoral Sanction...,,...,2021-09-26 14:52:11,2021-09-28 03:03:19,20210928,,True,False,False,20210928,,2021-09
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
951,ofac-cons-9712,Person,Mohammed SHIHAB,Mohamed Abed Hadi Rahman SHEHAB,1956-01-01;1956-12-31,,undetermined,,Non-SDN Palestinian Legislative Council List -...,,...,2021-07-26 11:55:45,2021-08-20 20:39:54,20210820,,True,False,False,20210820,,2021-08
952,ofac-cons-9713,Person,Ahmad SA'DAT,,,,undetermined,,Non-SDN Palestinian Legislative Council List -...,,...,2021-07-26 11:55:45,2021-08-20 20:39:54,20210820,,True,False,False,20210820,,2021-08
953,ofac-cons-9714,Person,Jamil MAJDALAWI,,,,undetermined,,Non-SDN Palestinian Legislative Council List -...,,...,2021-07-26 11:55:45,2021-08-20 20:39:54,20210820,,True,False,False,20210820,,2021-08
954,ofac-cons-9715,Person,Khaledah JARRAR,,,,undetermined,,Non-SDN Palestinian Legislative Council List -...,,...,2021-07-26 11:55:45,2021-08-20 20:39:54,20210820,,True,False,False,20210820,,2021-08


# Descriptives and clean up

In [12]:
#the countries are coded as iso2 codes -> transform to iso 3
#keep both countries? could search for "ru" over regex functions
print(res_deduplicated.countries.unique())
print(res_deduplicated.schema.unique())
print(res_deduplicated.listing_date.unique())

['ru' 'cn' 'ps' nan 'cy' 'vg' 'at' 'cn;hk' 'nl' 'am' 'ch' 'cn;ky' 'by'
 'ge' 'kz' 'ky' 'mm' 'ua' 'tr' 'az' 'lu' 'gg;ru' 'cz' 'cy;vg' 'rs' 'ao']
['Company' 'Organization' 'Person']
['20210928' '20220319' '20210930' '20210918' '20220226' '20220413'
 '20220418' '20220529' '20220408' '20220608' '20230318' '20220722'
 '20220305' '20220228' '20220724' '20231031' '20220712' '20231120'
 '20220903' '20220302' '20220316' '20220411' '20220318' '20220203'
 '20220406' '20211003' '20220721' '20220421' '20220324' '20220705'
 '20220602' '20221210' '20220304' '20220623' '20220612' '20221212'
 '20220727' '20231221' '20220223' '20211217' '20211211' '20220225'
 '20210820']


In [13]:
list_regimes = list[res_deduplicated.sanctions.unique()]
print(list_regimes)
#but it doesnt show me all

list[array(['13662 Sectoral Directive 1 - Sectoral Sanctions Identifications List - Program - Executive Order 13662 (Ukraine) - 2016-09-01;Program - Consolidated List - 13662 Sectoral Directive 1 - Executive Order 13662 (Ukraine) - 2016-09-01',
       'Consolidated List - Program - CMIC Executive Order - Executive Order 14032 (CMIC) - 2022-12-16 - 2022-02-14;Non-SDN CMIC List - Program - CMIC Executive Order - Executive Order 14032 (CMIC) - 2022-12-16 - 2022-02-14',
       'Reject - Program - Consolidated List - General License 4 Under EO 13224 (Terrorism) - 2014-10-10;Reject - Program - Non-SDN Palestinian Legislative Council List - General License 4 Under EO 13224 (Terrorism) - 2006-04-12',
       '13662 Sectoral Directive 1 - Consolidated List - Program - Executive Order 13662 (Ukraine) - 2015-12-22;13662 Sectoral Directive 1 - Sectoral Sanctions Identifications List - Program - Executive Order 13662 (Ukraine) - 2015-12-22',
       '13662 Sectoral Directive 1 - Sectoral Sanctions Id

In [14]:
#get missingness
is_null = res_deduplicated.isnull().sum()
display(is_null)

id                  0
schema              0
name                0
aliases            82
birth_date        801
countries         115
addresses          61
identifiers       359
sanctions           0
phones            956
emails            662
dataset             0
last_seen           0
first_seen          0
date_stamp          0
last_change       951
new_entry           0
deletion            0
unchanged           0
listing_date        0
delisting_date    950
month               0
dtype: int64

In [15]:
res_deduplicated.columns

Index(['id', 'schema', 'name', 'aliases', 'birth_date', 'countries',
       'addresses', 'identifiers', 'sanctions', 'phones', 'emails', 'dataset',
       'last_seen', 'first_seen', 'date_stamp', 'last_change', 'new_entry',
       'deletion', 'unchanged', 'listing_date', 'delisting_date', 'month'],
      dtype='object')

In [16]:
res_deduplicated = res_deduplicated.drop([ 'last_seen', 'first_seen','new_entry', 'deletion','unchanged'],axis = 1)

In [17]:
res_deduplicated.to_csv("us_bis_batch5.csv")