In [1]:
import re
import pandas as pd
import numpy as np

In [2]:
# Read data excel file 
df = pd.read_excel('Example_Data.xlsx',sheet_name='Example_Data',header=1)
df.rename(columns={'Company ID':'Company_ID','Company Name':'Company_Name','Fiscal Year':'Fiscal_Year','SIC Code':'SIC_Code','Trading Currency':'Trading_Currency'}, inplace=True)
df.head()

Unnamed: 0,Company_ID,Company_Name,Fiscal_Year,Industry,SIC_Code,Trading_Currency,SP,CDS,APD,ARD,ADA
0,4993687,A Company,2010,Machinery,6085,USD,75,564.0,109,44,0
1,4993687,A Company,2011,Machinery,6085,USD,66,721.0,102,47,0
2,4993687,A Company,2012,Machinery,6085,USD,29,1048.0,97,45,0
3,4993687,A Company,2013,Machinery,6085,USD,56,914.0,96,40,0
4,4993687,A Company,2014,Machinery,6085,3$,55,950.0,91,34,0


In [3]:
df.describe()

Unnamed: 0,Company_ID,SP,CDS,APD,ARD,ADA
count,111.0,111.0,59.0,111.0,111.0,111.0
mean,4269428.0,92.144144,293.305085,86.900901,80.837838,0.234234
std,1666143.0,65.770655,292.946697,40.086034,41.616112,0.571361
min,498856.0,0.0,51.0,37.0,22.0,0.0
25%,4976728.0,44.5,103.0,65.0,66.5,0.0
50%,4991368.0,68.0,149.0,84.0,81.0,0.0
75%,4994275.0,123.0,349.5,98.0,91.0,0.0
max,5081187.0,277.0,1048.0,396.0,397.0,2.0


In [4]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 111 entries, 0 to 110
Data columns (total 11 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   Company_ID        111 non-null    int64  
 1   Company_Name      111 non-null    object 
 2   Fiscal_Year       111 non-null    object 
 3   Industry          111 non-null    object 
 4   SIC_Code          111 non-null    object 
 5   Trading_Currency  111 non-null    object 
 6   SP                111 non-null    int64  
 7   CDS               59 non-null     float64
 8   APD               111 non-null    int64  
 9   ARD               111 non-null    int64  
 10  ADA               111 non-null    int64  
dtypes: float64(1), int64(5), object(5)
memory usage: 9.7+ KB


### Data Cleaning and Data Processing

In [5]:
# Delete duplicate data
df.drop_duplicates(inplace=True)

In [6]:
## Method to get index of invalid data 
# Get unique company_id
def get_uniqueCompanyID_index(company_id):
    uniCompanyID_index = []
    
    for i in range(0,len(company_id)):
        uniCompanyID_index.append(df[df['Company_ID']==company_id[i]].index.tolist())
    return uniCompanyID_index

# Fiscal year
def get_fiscalYear_index(values,year_list):
    fy_index = []
    for year in values:
         if year not in year_list:
                fy_index.append(df[df['Fiscal_Year']==year].index.tolist())
    return fy_index

# Sic Code
def get_siccode_index(values):
    value_ls = []
    for value in values:
        if len(str(value)) != 4:
            value_ls.append(value)
        elif not str(value).isdigit():
            value_ls.append(value)
            
    sc_index = []
    for i in range(0,len(value_ls)):
        sc_index.append(df[df['SIC_Code']==value_ls[i]].index.tolist())
    return sc_index

# Trading Currency
def get_currency_index(values,currency_list):
    tc_index = []
    for value in values:
         if value not in currency_list:
                tc_index.append(df[df['Trading_Currency']==value].index.tolist())
    return tc_index

## Delete method to delete invalid data with index
def delete_index(index):
    index = np.array(index).flatten()
    for i in range(0,len(index)):
        df.drop(index[i],inplace=True)


- Clean data that Company ID and Company Name not corresponding

In [7]:
# filter those ID's that appear with multiple names
non_unique = df.groupby('Company_ID').Company_Name.transform('nunique').ne(1)
df_unique = df[non_unique]
unique_id = df_unique.Company_ID.unique()

# get index of those's ID that appear with multiple names
unique_index = get_uniqueCompanyID_index(unique_id)

# delete data
delete_index(unique_index)

In [8]:
# df_unique # to check result

- Fiscal Year

In [9]:
# Fiscal Year
df['Fiscal_Year'].fillna(0)

f_year = [i for i in range(1999,2022)]
fy_values = df['Fiscal_Year'].values

fy_index = get_fiscalYear_index(fy_values,f_year) # call function to get index
delete_index(fy_index) # call function to delete invalid sic code data


In [10]:
df.iloc[fy_index[0][0]] # check delete function work or not

Company_ID            4994275
Company_Name        D Company
Fiscal_Year              2012
Industry            Machinery
SIC_Code                 5722
Trading_Currency          USD
SP                        120
CDS                       NaN
APD                        68
ARD                        77
ADA                         0
Name: 31, dtype: object

- SIC Code

In [11]:
# SIC code
siccode_value = df['SIC_Code'].values
sc_index = get_siccode_index(siccode_value) # call function to get index
delete_index(sc_index) # call function to delete invalid sic code data

In [12]:
df.iloc[sc_index[0][0]] # check delete function work or not

Company_ID            5081187
Company_Name        M Company
Fiscal_Year              2011
Industry            Machinery
SIC_Code                 5886
Trading_Currency          GBP
SP                         31
CDS                       NaN
APD                        87
ARD                       123
ADA                         2
Name: 103, dtype: object

- Trading currency

In [13]:
# Trading currency
# Trading currency only should be USD,GBP
currency_list = ['USD','GBP']
tc_values = df['Trading_Currency'].values

tc_index = get_currency_index(tc_values,currency_list) # call function to get index
delete_index(tc_index) # call function to delete invalid sic code data

In [14]:
df.iloc[tc_index[0][0]] # check delete function work or not

Company_ID            4993687
Company_Name        A Company
Fiscal_Year              2015
Industry            Machinery
SIC_Code                 6085
Trading_Currency          USD
SP                         19
CDS                       956
APD                        96
ARD                        28
ADA                         0
Name: 5, dtype: object

In [15]:
# Transform Metric info data type to integer number
df[['SP','CDS','APD','ARD','ADA']] = df[['SP','CDS','APD','ARD','ADA']].fillna(0).astype(int)

### Data Comparison

In [16]:
df.to_csv('./New_Example_Data',index=False, header=True)
df_DATA = pd.read_csv('./New_Example_Data')

In [17]:
df_DATA.head()

Unnamed: 0,Company_ID,Company_Name,Fiscal_Year,Industry,SIC_Code,Trading_Currency,SP,CDS,APD,ARD,ADA
0,4993687,A Company,2010,Machinery,6085,USD,75,564,109,44,0
1,4993687,A Company,2011,Machinery,6085,USD,66,721,102,47,0
2,4993687,A Company,2012,Machinery,6085,USD,29,1048,97,45,0
3,4993687,A Company,2013,Machinery,6085,USD,56,914,96,40,0
4,4993687,A Company,2015,Machinery,6085,USD,19,956,96,28,0


In [18]:
# Read Example_DB data
df_DB = pd.read_excel('Example_Data.xlsx',sheet_name='Example_DB',header=0)
df_DB.rename(columns={'Company ID':'Company_ID','Company Name':'Company_Name','Fiscal Year':'Fiscal_Year','SIC Code':'SIC_Code','Trading Currency':'Trading_Currency','Metric Name':'Metric_Name'}, inplace=True)
df_DB.head()

Unnamed: 0,Company_ID,Company_Name,Fiscal_Year,Industry,SIC_Code,Trading_Currency,Metric_Name,Value
0,4993687,A Company,2010,Machinery,6085,USD,SP,75.0
1,4993687,A Company,2011,Machinery,6085,USD,SP,66.0
2,4993687,A Company,2012,Machinery,6085,USD,SP,29.0
3,4993687,A Company,2013,Machinery,6085,USD,SP,56.0
4,4993687,A Company,2015,Machinery,6085,USD,SP,19.0


In [19]:
df_DB.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 429 entries, 0 to 428
Data columns (total 8 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   Company_ID        429 non-null    int64  
 1   Company_Name      429 non-null    object 
 2   Fiscal_Year       429 non-null    int64  
 3   Industry          429 non-null    object 
 4   SIC_Code          429 non-null    int64  
 5   Trading_Currency  429 non-null    object 
 6   Metric_Name       429 non-null    object 
 7   Value             381 non-null    float64
dtypes: float64(1), int64(3), object(4)
memory usage: 26.9+ KB


In [20]:
def check_sp(info_data,info_db_sp_value):   
    if not(info_db_sp_value):
        if not(info_data['SP'].values):
            print('Not_in_DB',company_id,year,'SP')
        else:
            if not(info_db_sp_value == info_data['SP'].values):
                print('UnEqual',company_id,year,'SP',info_db_sp_value,info_data['SP'].values)
    else:
        if not(info_data['SP'].values):
            print(info_data['SP'].values,'...Not_in_File',company_id,year,'SP') 
        else:
            if not(info_db_sp_value == info_data['SP'].values):
                print('UnEqual',company_id,year,'SP',info_db_sp_value,info_data['SP'].values)
                                   
def check_cds(info_data,info_db_cds_value):                     
    if not(info_db_cds_value):
        if not(info_data['CDS'].values):
            print('Not_in_DB',company_id,year,'CDS')
        else:
            if not(info_db_cds_value == info_data['CDS'].values):
                print('UnEqual',company_id,year,'CDS',info_db_cds_value,info_data['CDS'].values)
    else:
        if not(info_data['CDS'].values):
            print(info_data['CDS'].values,'...Not_in_File',company_id,year,'CDS') 
        else:
            if not(info_db_cds_value == info_data['CDS'].values):
                print('UnEqual',company_id,year,'CDS',info_db_cds_value,info_data['CDS'].values)

def check_apd(info_data,info_db_apd_value): 
    if not(info_db_apd_value):
        if not(info_data['APD'].values):
            print('Not_in_DB',company_id,year,'APD')
        else:
            if not(info_db_apd_value == info_data['APD'].values):
                print('UnEqual',company_id,year,'APD',info_db_apd_value,info_data['APD'].values)
    else:
        if not(info_data['APD'].values):
            print(info_data['APD'].values,'...Not_in_File',company_id,year,'APD') 
        else:
            if not(info_db_apd_value == info_data['APD'].values):
                print('UnEqual',company_id,year,'APD',info_db_apd_value,info_data['APD'].values)                   

def check_ard(info_data,info_db_sp_value): 
    if not(info_db_ard_value):
        if not(info_data['ARD'].values):
            print('Not_in_DB',company_id,year,'ARD')
        else:
            if not(info_db_ard_value == info_data['ARD'].values):
                print('UnEqual',company_id,year,'ARD',info_db_ard_value,info_data['ARD'].values)
    else:
        if not(info_data['ARD'].values):
            print(info_data['ARD'].values,'...Not_in_File',company_id,year,'ARD') 
        else:
            if not(info_db_ard_value == info_data['ARD'].values):
                print('UnEqual',company_id,year,'ARD',info_db_ard_value,info_data['ARD'].values)

def check_ada(info_data,info_db_ada_value): 
    if not(info_db_ada_value):
        if not(info_data['ADA'].values):
            print('Not_in_DB',company_id,year,'ADA')
        else:
            if not(info_db_ada_value == info_data['ADA'].values):
                print('UnEqual',company_id,year,'ADA',info_db_ada_value,info_data['ADA'].values)
    else:
        if not(info_data['ADA'].values):
            print(info_data['ADA'].values,'...Not_in_File',company_id,year,'ADA') 
        else:
            if not(info_db_ada_value == info_data['ADA'].values):
                print('UnEqual',company_id,year,'ADA',info_db_ada_value,info_data['ADA'].values)


In [21]:
all_Company_ID= df_DB['Company_ID'].unique()

In [22]:
df = pd.DataFrame(columns=['Company_ID','Company_Name','Fiscal_Year','Industry','SIC_Code','Trading_Currency',
                          'Metric_Name','Data_in_DB','Data_in_File','ERROR_Type'])
all_metric_name = ['SP','CDS','APD','ARD','ADA']
for company_id in all_Company_ID:
    for year in range(1999,2022):
        info_data = df_DATA.loc[(df_DATA['Company_ID']==company_id) & (df_DATA['Fiscal_Year']==year)]
        info_db_sp_value = df_DB.loc[(df_DB['Company_ID']==company_id) & (df_DB['Fiscal_Year']==year) & (df_DB['Metric_Name']=='SP')]['Value'].values
        info_db_cds_value = df_DB.loc[(df_DB['Company_ID']==company_id) & (df_DB['Fiscal_Year']==year) & (df_DB['Metric_Name']=='CDS')]['Value'].values
        info_db_apd_value = df_DB.loc[(df_DB['Company_ID']==company_id) & (df_DB['Fiscal_Year']==year) & (df_DB['Metric_Name']=='APD')]['Value'].values
        info_db_ard_value = df_DB.loc[(df_DB['Company_ID']==company_id) & (df_DB['Fiscal_Year']==year) & (df_DB['Metric_Name']=='ARD')]['Value'].values
        info_db_ada_value = df_DB.loc[(df_DB['Company_ID']==company_id) & (df_DB['Fiscal_Year']==year) & (df_DB['Metric_Name']=='ADA')]['Value'].values
        
        check_sp(info_data,info_db_sp_value)
        check_cds(info_data,info_db_cds_value)
        check_apd(info_data,info_db_apd_value)
        check_ard(info_data,info_db_ard_value)
        check_ada(info_data,info_db_ada_value)
        
df

Not_in_DB 4993687 1999 SP
Not_in_DB 4993687 1999 CDS
Not_in_DB 4993687 1999 APD
Not_in_DB 4993687 1999 ARD
Not_in_DB 4993687 1999 ADA
Not_in_DB 4993687 2000 SP
Not_in_DB 4993687 2000 CDS
Not_in_DB 4993687 2000 APD
Not_in_DB 4993687 2000 ARD
Not_in_DB 4993687 2000 ADA
Not_in_DB 4993687 2001 SP
Not_in_DB 4993687 2001 CDS
Not_in_DB 4993687 2001 APD
Not_in_DB 4993687 2001 ARD
Not_in_DB 4993687 2001 ADA
Not_in_DB 4993687 2002 SP
Not_in_DB 4993687 2002 CDS
Not_in_DB 4993687 2002 APD
Not_in_DB 4993687 2002 ARD
Not_in_DB 4993687 2002 ADA
Not_in_DB 4993687 2003 SP
Not_in_DB 4993687 2003 CDS
Not_in_DB 4993687 2003 APD
Not_in_DB 4993687 2003 ARD
Not_in_DB 4993687 2003 ADA
Not_in_DB 4993687 2004 SP
Not_in_DB 4993687 2004 CDS
Not_in_DB 4993687 2004 APD
Not_in_DB 4993687 2004 ARD
Not_in_DB 4993687 2004 ADA
Not_in_DB 4993687 2005 SP
Not_in_DB 4993687 2005 CDS
Not_in_DB 4993687 2005 APD
Not_in_DB 4993687 2005 ARD
Not_in_DB 4993687 2005 ADA
Not_in_DB 4993687 2006 SP
Not_in_DB 4993687 2006 CDS
Not_in_DB

  if not(info_db_sp_value):
  if not(info_data['SP'].values):
  if not(info_db_cds_value):
  if not(info_data['CDS'].values):
  if not(info_db_apd_value):
  if not(info_data['APD'].values):
  if not(info_db_ard_value):
  if not(info_data['ARD'].values):
  if not(info_db_ada_value):
  if not(info_data['ADA'].values):
  if not(info_db_sp_value):
  if not(info_data['SP'].values):
  if not(info_db_cds_value):
  if not(info_data['CDS'].values):
  if not(info_db_apd_value):
  if not(info_data['APD'].values):
  if not(info_db_ard_value):
  if not(info_data['ARD'].values):
  if not(info_db_ada_value):
  if not(info_data['ADA'].values):
  if not(info_db_sp_value):
  if not(info_data['SP'].values):
  if not(info_db_cds_value):
  if not(info_data['CDS'].values):
  if not(info_db_apd_value):
  if not(info_data['APD'].values):
  if not(info_db_ard_value):
  if not(info_data['ARD'].values):
  if not(info_db_ada_value):
  if not(info_data['ADA'].values):
  if not(info_db_sp_value):
  if not(info_data

Not_in_DB 4994275 2002 SP
Not_in_DB 4994275 2002 CDS
Not_in_DB 4994275 2002 APD
Not_in_DB 4994275 2002 ARD
Not_in_DB 4994275 2002 ADA
Not_in_DB 4994275 2003 SP
Not_in_DB 4994275 2003 CDS
Not_in_DB 4994275 2003 APD
Not_in_DB 4994275 2003 ARD
Not_in_DB 4994275 2003 ADA
Not_in_DB 4994275 2004 SP
Not_in_DB 4994275 2004 CDS
Not_in_DB 4994275 2004 APD
Not_in_DB 4994275 2004 ARD
Not_in_DB 4994275 2004 ADA
Not_in_DB 4994275 2005 SP
Not_in_DB 4994275 2005 CDS
Not_in_DB 4994275 2005 APD
Not_in_DB 4994275 2005 ARD
Not_in_DB 4994275 2005 ADA
Not_in_DB 4994275 2006 SP
Not_in_DB 4994275 2006 CDS
Not_in_DB 4994275 2006 APD
Not_in_DB 4994275 2006 ARD
Not_in_DB 4994275 2006 ADA
Not_in_DB 4994275 2007 SP
Not_in_DB 4994275 2007 CDS
Not_in_DB 4994275 2007 APD
Not_in_DB 4994275 2007 ARD
Not_in_DB 4994275 2007 ADA
Not_in_DB 4994275 2008 SP
Not_in_DB 4994275 2008 CDS
Not_in_DB 4994275 2008 APD
Not_in_DB 4994275 2008 ARD
Not_in_DB 4994275 2008 ADA
Not_in_DB 4994275 2009 SP
Not_in_DB 4994275 2009 CDS
Not_in_DB

  if not(info_db_sp_value):
  if not(info_data['SP'].values):
  if not(info_db_cds_value):
  if not(info_data['CDS'].values):
  if not(info_db_apd_value):
  if not(info_data['APD'].values):
  if not(info_db_ard_value):
  if not(info_data['ARD'].values):
  if not(info_db_ada_value):
  if not(info_data['ADA'].values):
  if not(info_db_sp_value):
  if not(info_data['SP'].values):
  if not(info_db_cds_value):
  if not(info_data['CDS'].values):
  if not(info_db_apd_value):
  if not(info_data['APD'].values):
  if not(info_db_ard_value):
  if not(info_data['ARD'].values):
  if not(info_db_ada_value):
  if not(info_data['ADA'].values):
  if not(info_db_sp_value):
  if not(info_data['SP'].values):
  if not(info_db_cds_value):
  if not(info_data['CDS'].values):
  if not(info_db_apd_value):
  if not(info_data['APD'].values):
  if not(info_db_ard_value):
  if not(info_data['ARD'].values):
  if not(info_db_ada_value):
  if not(info_data['ADA'].values):
  if not(info_db_sp_value):
  if not(info_data

Not_in_DB 499609 1999 SP
Not_in_DB 499609 1999 CDS
Not_in_DB 499609 1999 APD
Not_in_DB 499609 1999 ARD
Not_in_DB 499609 1999 ADA
Not_in_DB 499609 2000 SP
Not_in_DB 499609 2000 CDS
Not_in_DB 499609 2000 APD
Not_in_DB 499609 2000 ARD
Not_in_DB 499609 2000 ADA
Not_in_DB 499609 2001 SP
Not_in_DB 499609 2001 CDS
Not_in_DB 499609 2001 APD
Not_in_DB 499609 2001 ARD
Not_in_DB 499609 2001 ADA
Not_in_DB 499609 2002 SP
Not_in_DB 499609 2002 CDS
Not_in_DB 499609 2002 APD
Not_in_DB 499609 2002 ARD
Not_in_DB 499609 2002 ADA
Not_in_DB 499609 2003 SP
Not_in_DB 499609 2003 CDS
Not_in_DB 499609 2003 APD
Not_in_DB 499609 2003 ARD
Not_in_DB 499609 2003 ADA
Not_in_DB 499609 2004 SP
Not_in_DB 499609 2004 CDS
Not_in_DB 499609 2004 APD
Not_in_DB 499609 2004 ARD
Not_in_DB 499609 2004 ADA
Not_in_DB 499609 2005 SP
Not_in_DB 499609 2005 CDS
Not_in_DB 499609 2005 APD
Not_in_DB 499609 2005 ARD
Not_in_DB 499609 2005 ADA
Not_in_DB 499609 2006 SP
Not_in_DB 499609 2006 CDS
Not_in_DB 499609 2006 APD
Not_in_DB 499609 200

  if not(info_data['SP'].values):
  if not(info_db_cds_value):
  if not(info_data['CDS'].values):
  if not(info_db_apd_value):
  if not(info_data['APD'].values):
  if not(info_db_ard_value):
  if not(info_data['ARD'].values):
  if not(info_db_ada_value):
  if not(info_data['ADA'].values):
  if not(info_db_sp_value):
  if not(info_data['SP'].values):
  if not(info_db_cds_value):
  if not(info_data['CDS'].values):
  if not(info_db_apd_value):
  if not(info_data['APD'].values):
  if not(info_db_ard_value):
  if not(info_data['ARD'].values):
  if not(info_db_ada_value):
  if not(info_data['ADA'].values):
  if not(info_db_sp_value):
  if not(info_data['SP'].values):
  if not(info_db_cds_value):
  if not(info_data['CDS'].values):
  if not(info_db_apd_value):
  if not(info_data['APD'].values):
  if not(info_db_ard_value):
  if not(info_data['ARD'].values):
  if not(info_db_ada_value):
  if not(info_data['ADA'].values):
  if not(info_db_sp_value):
  if not(info_data['SP'].values):
  if not(inf

Not_in_DB 4977221 1999 SP
Not_in_DB 4977221 1999 CDS
Not_in_DB 4977221 1999 APD
Not_in_DB 4977221 1999 ARD
Not_in_DB 4977221 1999 ADA
Not_in_DB 4977221 2000 SP
Not_in_DB 4977221 2000 CDS
Not_in_DB 4977221 2000 APD
Not_in_DB 4977221 2000 ARD
Not_in_DB 4977221 2000 ADA
Not_in_DB 4977221 2001 SP
Not_in_DB 4977221 2001 CDS
Not_in_DB 4977221 2001 APD
Not_in_DB 4977221 2001 ARD
Not_in_DB 4977221 2001 ADA
Not_in_DB 4977221 2002 SP
Not_in_DB 4977221 2002 CDS
Not_in_DB 4977221 2002 APD
Not_in_DB 4977221 2002 ARD
Not_in_DB 4977221 2002 ADA
Not_in_DB 4977221 2003 SP
Not_in_DB 4977221 2003 CDS
Not_in_DB 4977221 2003 APD
Not_in_DB 4977221 2003 ARD
Not_in_DB 4977221 2003 ADA
Not_in_DB 4977221 2004 SP
Not_in_DB 4977221 2004 CDS
Not_in_DB 4977221 2004 APD
Not_in_DB 4977221 2004 ARD
Not_in_DB 4977221 2004 ADA
Not_in_DB 4977221 2005 SP
Not_in_DB 4977221 2005 CDS
Not_in_DB 4977221 2005 APD
Not_in_DB 4977221 2005 ARD
Not_in_DB 4977221 2005 ADA
Not_in_DB 4977221 2006 SP
Not_in_DB 4977221 2006 CDS
Not_in_DB

  if not(info_data['SP'].values):
  if not(info_db_cds_value):
  if not(info_data['CDS'].values):
  if not(info_db_apd_value):
  if not(info_data['APD'].values):
  if not(info_db_ard_value):
  if not(info_data['ARD'].values):
  if not(info_db_ada_value):
  if not(info_data['ADA'].values):
  if not(info_db_sp_value):
  if not(info_data['SP'].values):
  if not(info_db_cds_value):
  if not(info_data['CDS'].values):
  if not(info_db_apd_value):
  if not(info_data['APD'].values):
  if not(info_db_ard_value):
  if not(info_data['ARD'].values):
  if not(info_db_ada_value):
  if not(info_data['ADA'].values):
  if not(info_db_sp_value):
  if not(info_data['SP'].values):
  if not(info_db_cds_value):
  if not(info_data['CDS'].values):
  if not(info_db_apd_value):
  if not(info_data['APD'].values):
  if not(info_db_ard_value):
  if not(info_data['ARD'].values):
  if not(info_db_ada_value):
  if not(info_data['ADA'].values):
  if not(info_db_sp_value):
  if not(info_data['SP'].values):
  if not(inf

 2000 CDS
Not_in_DB 4996197 2000 APD
Not_in_DB 4996197 2000 ARD
Not_in_DB 4996197 2000 ADA
Not_in_DB 4996197 2001 SP
Not_in_DB 4996197 2001 CDS
Not_in_DB 4996197 2001 APD
Not_in_DB 4996197 2001 ARD
Not_in_DB 4996197 2001 ADA
Not_in_DB 4996197 2002 SP
Not_in_DB 4996197 2002 CDS
Not_in_DB 4996197 2002 APD
Not_in_DB 4996197 2002 ARD
Not_in_DB 4996197 2002 ADA
Not_in_DB 4996197 2003 SP
Not_in_DB 4996197 2003 CDS
Not_in_DB 4996197 2003 APD
Not_in_DB 4996197 2003 ARD
Not_in_DB 4996197 2003 ADA
Not_in_DB 4996197 2004 SP
Not_in_DB 4996197 2004 CDS
Not_in_DB 4996197 2004 APD
Not_in_DB 4996197 2004 ARD
Not_in_DB 4996197 2004 ADA
Not_in_DB 4996197 2005 SP
Not_in_DB 4996197 2005 CDS
Not_in_DB 4996197 2005 APD
Not_in_DB 4996197 2005 ARD
Not_in_DB 4996197 2005 ADA
Not_in_DB 4996197 2006 SP
Not_in_DB 4996197 2006 CDS
Not_in_DB 4996197 2006 APD
Not_in_DB 4996197 2006 ARD
Not_in_DB 4996197 2006 ADA
Not_in_DB 4996197 2007 SP
Not_in_DB 4996197 2007 CDS
Not_in_DB 4996197 2007 APD
Not_in_DB 4996197 2007 AR

  if not(info_db_apd_value):
  if not(info_data['APD'].values):
  if not(info_db_ard_value):
  if not(info_data['ARD'].values):
  if not(info_db_ada_value):
  if not(info_data['ADA'].values):
  if not(info_db_sp_value):
  if not(info_data['SP'].values):
  if not(info_db_cds_value):
  if not(info_data['CDS'].values):
  if not(info_db_apd_value):
  if not(info_data['APD'].values):
  if not(info_db_ard_value):
  if not(info_data['ARD'].values):
  if not(info_db_ada_value):
  if not(info_data['ADA'].values):
  if not(info_db_sp_value):
  if not(info_data['SP'].values):
  if not(info_db_cds_value):
  if not(info_data['CDS'].values):
  if not(info_db_apd_value):
  if not(info_data['APD'].values):
  if not(info_db_ard_value):
  if not(info_data['ARD'].values):
  if not(info_db_ada_value):
  if not(info_data['ADA'].values):
  if not(info_db_sp_value):
  if not(info_data['SP'].values):
  if not(info_db_cds_value):
  if not(info_data['CDS'].values):
  if not(info_db_apd_value):
  if not(info_dat

Not_in_DB 4986826 2021 CDS
Not_in_DB 4986826 2021 APD
Not_in_DB 4986826 2021 ARD
Not_in_DB 4986826 2021 ADA
Not_in_DB 5081187 1999 SP
Not_in_DB 5081187 1999 CDS
Not_in_DB 5081187 1999 APD
Not_in_DB 5081187 1999 ARD
Not_in_DB 5081187 1999 ADA
Not_in_DB 5081187 2000 SP
Not_in_DB 5081187 2000 CDS
Not_in_DB 5081187 2000 APD
Not_in_DB 5081187 2000 ARD
Not_in_DB 5081187 2000 ADA
Not_in_DB 5081187 2001 SP
Not_in_DB 5081187 2001 CDS
Not_in_DB 5081187 2001 APD
Not_in_DB 5081187 2001 ARD
Not_in_DB 5081187 2001 ADA
Not_in_DB 5081187 2002 SP
Not_in_DB 5081187 2002 CDS
Not_in_DB 5081187 2002 APD
Not_in_DB 5081187 2002 ARD
Not_in_DB 5081187 2002 ADA
Not_in_DB 5081187 2003 SP
Not_in_DB 5081187 2003 CDS
Not_in_DB 5081187 2003 APD
Not_in_DB 5081187 2003 ARD
Not_in_DB 5081187 2003 ADA
Not_in_DB 5081187 2004 SP
Not_in_DB 5081187 2004 CDS
Not_in_DB 5081187 2004 APD
Not_in_DB 5081187 2004 ARD
Not_in_DB 5081187 2004 ADA
Not_in_DB 5081187 2005 SP
Not_in_DB 5081187 2005 CDS
Not_in_DB 5081187 2005 APD
Not_in_D

  if not(info_db_cds_value):
  if not(info_data['CDS'].values):
  if not(info_db_apd_value):
  if not(info_data['APD'].values):
  if not(info_db_ard_value):
  if not(info_data['ARD'].values):
  if not(info_db_ada_value):
  if not(info_data['ADA'].values):
  if not(info_db_sp_value):
  if not(info_data['SP'].values):
  if not(info_db_cds_value):
  if not(info_data['CDS'].values):
  if not(info_db_apd_value):
  if not(info_data['APD'].values):
  if not(info_db_ard_value):
  if not(info_data['ARD'].values):
  if not(info_db_ada_value):
  if not(info_data['ADA'].values):
  if not(info_db_sp_value):
  if not(info_data['SP'].values):
  if not(info_db_cds_value):
  if not(info_data['CDS'].values):
  if not(info_db_apd_value):
  if not(info_data['APD'].values):
  if not(info_db_ard_value):
  if not(info_data['ARD'].values):
  if not(info_db_ada_value):
  if not(info_data['ADA'].values):
  if not(info_db_sp_value):
  if not(info_data['SP'].values):
  if not(info_db_cds_value):
  if not(info_dat

Unnamed: 0,Company_ID,Company_Name,Fiscal_Year,Industry,SIC_Code,Trading_Currency,Metric_Name,Data_in_DB,Data_in_File,ERROR_Type
