## SimFin Test All Datasets

This Notebook performs automated testing of all the bulk datasets from SimFin. The datasets are first downloaded from the SimFin server and then various tests are performed on the data. An exception is raised if any problems are found.

This Notebook can be run as usual if you have `simfin` installed, by running the following command from the directory where this Notebook is located:

    jupyter notebook

This Notebook can also be run using `pytest` which makes automated testing easier. You need to have the Python packages `simfin` and `nbval` installed. Then execute the following command from the directory where this Notebook is located:

    pytest --nbval-lax test_bulk_data.ipynb
    
This runs the entire Notebook and outputs error messages for all the cells that raised an exception.

## Imports

In [1]:
import pandas as pd
import numpy as np
import sys

In [2]:
import simfin as sf
from simfin.names import *
from simfin.datasets import *

## Configure SimFin

In [3]:
sf.set_data_dir('~/simfin_data/')

In [4]:
sf.load_api_key(path='~/simfin_api_key.txt',
                default_key='free')

## Load All Datasets

In [5]:
%%time
# Use refresh_days=0 to always download new data.
data = AllDatasets(refresh_days=0)

Loading "income-annual" from disk (0 days old).
Loading "income-quarterly" from disk (0 days old).
Loading "income-ttm" from disk (0 days old).
Loading "income-banks-annual" from disk (0 days old).
Loading "income-banks-quarterly" from disk (0 days old).
Loading "income-banks-ttm" from disk (0 days old).
Loading "income-insurance-annual" from disk (0 days old).
Loading "income-insurance-quarterly" from disk (0 days old).
Loading "income-insurance-ttm" from disk (0 days old).
Loading "balance-annual" from disk (0 days old).
Loading "balance-quarterly" from disk (0 days old).
Loading "balance-ttm" from disk (0 days old).
Loading "balance-banks-annual" from disk (0 days old).
Loading "balance-banks-quarterly" from disk (0 days old).
Loading "balance-banks-ttm" from disk (0 days old).
Loading "balance-insurance-annual" from disk (0 days old).
Loading "balance-insurance-quarterly" from disk (0 days old).
Loading "balance-insurance-ttm" from disk (0 days old).
Loading "cashflow-annual" from 

In [6]:
# Example for annual Income Statements.
data.get(dataset='income', variant='annual').head()

Unnamed: 0,Ticker,SimFinId,Currency,Fiscal Year,Fiscal Period,Report Date,Publish Date,Shares (Basic),Shares (Diluted),Revenue,...,Non-Operating Income (Loss),"Interest Expense, net","Pretax Income (Loss), Adjusted",Abnormal Gains (Losses),Pretax Income (Loss),"Income Tax (Expense) Benefit, net",Income (Loss) from Continuing Operations,Net Extraordinary Gains (Losses),Net Income,Net Income Available to Common Shareholders
0,DAI.DE,825113,USD,1996,FY,1996-12-31,2019-01-15,,,5548000000.0,...,,,607000000.0,371000000.0,978000000,,978000000,,976000000,976000000
1,DAI.DE,825113,EUR,1997,FY,1997-12-31,2019-01-15,,,117572000000.0,...,633000000.0,,6180000000.0,6547000000.0,12727000000,482000000.0,13209000000,,13094000000,13094000000
2,DAI.DE,825113,USD,1998,FY,1998-12-31,2019-01-15,,,154615000000.0,...,896000000.0,,10370000000.0,5004000000.0,15374000000,,15374000000,,15221000000,15221000000
3,DAI.DE,825113,USD,1999,FY,1999-12-31,2019-01-15,,,151035000000.0,...,335000000.0,,9724000000.0,646000000.0,10370000000,-3747000000.0,6623000000,,6623000000,6623000000
4,DAI.DE,825113,USD,2000,FY,2000-12-31,2019-01-15,,,152446000000.0,...,,,4056000000.0,4923300000.0,8979300000,-1630000000.0,7349300000,,7349300000,7349300000


## Function for Testing Datasets

In [7]:
def test_datasets(test_name, datasets=None, variants=None,
                  test_func=None, test_func_rows=None,
                  raise_exception=True):
    """
    Helper-function for running tests on many Pandas DataFrames.
    
    :param test_name:
        String with the name of the test.
        
    :param datasets:
        By default (datasets=None) all possible datasets
        will be tested. Otherwise datasets is a list of
        strings with dataset names to be tested.
        
    :param variants:
        By default (variants=None) all possible variants
        for each dataset will be tested, as defined in
        simfin.datasets.valid_variants. Otherwise variants
        is a list of strings and only those variants
        will be tested.
        
    :param test_func:
        Function to be called on the Pandas DataFrame for
        each dataset. If there are problems with the DataFrame
        then return True, otherwise return False.
        
        This is generally used for testing problems with the
        entire DataFrame. For example, if the dataset is empty:

        test_func = lambda df: len(df) == 0
        
        If this returns True then there is a problem with df.
                
    :param test_func_rows:
        Similar to test_func but for testing individual rows
        of a DataFrame. For example, test if SHARES_BASIC is
        None, zero or negative:
        
        test_func_rows = lambda df: (df[SHARES_BASIC] is None or
                                     df[SHARES_BASIC] <= 0)

    :param raise_exception:
        Boolean. If True then raise an exception if there were
        any problems, but wait until all datasets have been
        tested, so we can print the list of datasets with problems.
        
    :return:
        None
    """
    
    # Convert test_func_rows to test_func.
    if test_func_rows is not None:
        test_func = lambda df: test_func_rows(df).any()

    # Empty list of dataset/variant names that have problems.
    problems = []

    # For all datasets and variants.
    for dataset, variant, df in data.iter(datasets=datasets,
                                          variants=variants):
        try:
            # Perform the user-supplied test.
            problem_found = test_func(df)
        except:
            # An exception occurred so we consider
            # that to be a problem.
            problem_found = True

        if problem_found:
            # If the test found a problem on a
            # dataset and variant, add the names to
            # a list so we can print them later.
            pair = (dataset, variant)
            problems.append(pair)

    # If any problems were found.
    if len(problems) > 0:
        # Print the test's name.
        print("{}".format(test_name), file=sys.stderr)

        # Print all dataset-variant names that had problems.
        for dataset, variant in problems:
            msg = "dataset='{}', variant='{}'"
            msg = msg.format(dataset, variant)
            print(msg, file=sys.stderr)

        # Raise exception?
        if raise_exception:
            raise Exception(test_name)

## Function for Getting Rows with Problems

When a test has found problems in a dataset, it does not show which specific rows have the problem. You can get all the problematic rows using this function:

In [8]:
def get_problem_rows(df, test_func_rows):
    """
    Perform the given test on all rows of the given DataFrame
    and return a DataFrame with only the problematic rows.
    
    :param df:
        Pandas DataFrame.

    :param test_func_rows:
        Function used for testing each row. This takes
        a Pandas DataFrame as an argument and returns
        a Pandas Series of booleans whether each row
        in the original DataFrame has the error.
        
        For example:
        
        test_func_rows = lambda df: (df[SHARES_BASIC] is None or
                                     df[SHARES_BASIC] <= 0)

    :return:
        Pandas DataFrame with only the problematic rows.
    """

    # Index of the rows with problems.
    idx = test_func_rows(df)
    
    # Extract the rows with problems.
    df2 = df[idx]
    
    return df2

## Function for Testing Equality with Tolerance

This function is useful when comparing floating point numbers, or when comparing accounting numbers that are supposed to have a strict relationship (e.g. Assets = Liabilities + Equity) but we might tolerate a small degree of error in the data e.g. 1%.

In [9]:
def isclose(x, y, tolerance=0.01):
    """
    Compare whether x and y are approximately equal within
    the given tolerance, which is a ratio so tolerance=0.01
    means that we tolerate max 1% difference between x and y.
    
    This is similar to numpy.isclose() but is a more efficient
    implementation for Pandas which apparently does not have
    this built-in already (v. 0.25.1)
    
    :param x:
        Pandas DataFrame or Series.

    :param y:
        Pandas DataFrame or Series.

    :param tolerance:
        Max allowed difference as a ratio e.g. 0.01 = 1%.

    :return:
        Pandas DataFrame or Series with booleans whether
        x and y are approx. equal.
    """
    return (x-y).abs() <= tolerance * y.abs()

## Dataset is empty

In [10]:
test_name = "Dataset is empty"
test_func = lambda df: len(df) == 0
test_datasets(datasets=datasets_all,
              test_name=test_name, test_func=test_func)

Dataset is empty
dataset='balance', variant='annual'
dataset='balance-banks', variant='annual'
dataset='balance-insurance', variant='annual'


Exception: Dataset is empty

In [11]:
data.get(dataset='balance', variant='annual')

Unnamed: 0,Ticker,SimFinId,Currency,Fiscal Year,Fiscal Period,Report Date,Publish Date,Shares (Basic),Shares (Diluted),"Cash, Cash Equivalents & Short Term Investments",...,Short Term Debt,Total Current Liabilities,Long Term Debt,Total Noncurrent Liabilities,Total Liabilities,Share Capital & Additional Paid-In Capital,Treasury Stock,Retained Earnings,Total Equity,Total Liabilities & Equity


## Shares Basic is None or <= 0

In [12]:
test_name = "SHARES_BASIC is None or <= 0"
test_func_rows = lambda df: (df[SHARES_BASIC] is None or
                             df[SHARES_BASIC] <= 0)
test_datasets(datasets=datasets_all,
              test_name=test_name, test_func_rows=test_func_rows)

SHARES_BASIC is None or <= 0
dataset='income', variant='annual'
dataset='income', variant='quarterly'
dataset='income', variant='ttm'
dataset='balance', variant='quarterly'
dataset='balance', variant='ttm'
dataset='cashflow', variant='annual'
dataset='cashflow', variant='quarterly'
dataset='cashflow', variant='ttm'
dataset='shareprices', variant='daily'
dataset='companies', variant='None'
dataset='industries', variant='None'


Exception: SHARES_BASIC is None or <= 0

In [13]:
# Show the problematic rows for a dataset.
df = data.get(dataset='income', variant='annual')
get_problem_rows(df=df, test_func_rows=test_func_rows)

Unnamed: 0,Ticker,SimFinId,Currency,Fiscal Year,Fiscal Period,Report Date,Publish Date,Shares (Basic),Shares (Diluted),Revenue,...,Non-Operating Income (Loss),"Interest Expense, net","Pretax Income (Loss), Adjusted",Abnormal Gains (Losses),Pretax Income (Loss),"Income Tax (Expense) Benefit, net",Income (Loss) from Continuing Operations,Net Extraordinary Gains (Losses),Net Income,Net Income Available to Common Shareholders
938,UPL,700786,USD,2008,FY,2008-12-31,2009-08-04,0.0,0.0,1084400000.0,...,-20443000.0,-21276000.0,621563000.0,33216000.0,654779000,-240504000.0,414275000,,414275000,414275000
1100,AAL,68568,USD,2009,FY,2009-12-31,2010-07-21,0.0,0.0,19917000000.0,...,-748000000.0,-710000000.0,-1752000000.0,,-1752000000,284000000.0,-1468000000,,-1468000000,-1468000000
2082,UPL,700786,USD,2009,FY,2009-12-31,2010-11-04,0.0,0.0,666762000.0,...,-40055000.0,-37167000.0,194294000.0,-890483000.0,-696189000,245136000.0,-451053000,,-451053000,-451053000
2427,AAL,68568,USD,2010,FY,2010-12-31,2011-02-16,0.0,0.0,22170000000.0,...,-814000000.0,-797000000.0,-506000000.0,0.0,-506000000,35000000.0,-471000000,,-471000000,-471000000
3034,SPB,357600,USD,2010,FY,2010-09-30,2011-08-11,0.0,0.0,2567011000.0,...,-289315000.0,-277015000.0,-82085000.0,-42098000.0,-124183000,-63189000.0,-187372000,-2735000.0,-190107000,-190107000
3088,CHTR,378110,USD,2010,FY,2010-12-31,2011-05-03,0.0,0.0,7059000000.0,...,-881000000.0,-877000000.0,143000000.0,-85000000.0,58000000,-295000000.0,-237000000,,-237000000,-237000000
3359,SSNT,511627,USD,2010,FY,2010-12-31,2011-08-04,0.0,0.0,7486703.0,...,-114469.0,-114469.0,-1051586.0,483081.0,-568505,0.0,-568505,,-468921,-468921
3563,CYCA,660121,USD,2010,FY,2010-09-30,2011-09-28,0.0,0.0,,...,,,-458106.0,57018.0,-401088,,-401088,,-401088,-401088
3577,FONU,663685,USD,2010,FY,2010-09-30,2011-09-14,0.0,,230599.0,...,-71105.0,-71105.0,-431536.0,-360431.0,-791967,,-791967,,-791967,-791967
3662,UPL,700786,USD,2010,FY,2010-12-31,2011-02-24,0.0,0.0,979386000.0,...,-48772000.0,-49032000.0,407524000.0,315550000.0,723074000,-258615000.0,464459000,,464459000,464459000


## Shares Diluted is None or <= 0

In [14]:
test_name = "SHARES_DILUTED is None or <= 0"
test_func_rows = lambda df: (df[SHARES_DILUTED] is None or
                             df[SHARES_DILUTED] <= 0)
test_datasets(datasets=datasets_fundamental,
              test_name=test_name, test_func_rows=test_func_rows)

SHARES_DILUTED is None or <= 0
dataset='income', variant='annual'
dataset='income', variant='quarterly'
dataset='income', variant='ttm'
dataset='balance', variant='quarterly'
dataset='balance', variant='ttm'
dataset='cashflow', variant='annual'
dataset='cashflow', variant='quarterly'
dataset='cashflow', variant='ttm'


Exception: SHARES_DILUTED is None or <= 0

In [15]:
# Show the problematic rows for a dataset.
df = data.get(dataset='income', variant='quarterly')
get_problem_rows(df=df, test_func_rows=test_func_rows)

Unnamed: 0,Ticker,SimFinId,Currency,Fiscal Year,Fiscal Period,Report Date,Publish Date,Shares (Basic),Shares (Diluted),Revenue,...,Non-Operating Income (Loss),"Interest Expense, net","Pretax Income (Loss), Adjusted",Abnormal Gains (Losses),Pretax Income (Loss),"Income Tax (Expense) Benefit, net",Income (Loss) from Continuing Operations,Net Extraordinary Gains (Losses),Net Income,Net Income Available to Common Shareholders
613,ENS,35886,USD,2010,Q4,2011-03-31,2011-02-09,-1.248976e+09,-1.266011e+09,4.505370e+08,...,-6217000.0,-5991000.0,30983000.0,-6163000.0,24820000,-6991000.0,17829000,,17829000,17829000
614,ENS,35886,USD,2011,Q4,2012-03-31,2011-05-31,-3.893758e+08,-3.943303e+08,5.480540e+08,...,-5907000.0,-4361000.0,49197000.0,-9741000.0,39456000,-9365000.0,30091000,,30091000,30091000
851,JOY,39382,USD,2009,Q4,2009-10-31,2010-06-08,-3.068902e+08,-3.085706e+08,9.635280e+08,...,-5322000.0,-5322000.0,178780000.0,5585000.0,184365000,-60340000.0,124025000,,124025000,124025000
1037,AXON,41996,USD,2018,Q4,2019-03-31,2019-06-11,-2.930225e+08,-2.930225e+08,,...,-1447000.0,-1722000.0,-14753000.0,,-9137000,91000.0,-9046000,,-9046000,-9046000
1410,A,45846,USD,2013,Q4,2013-10-31,2013-12-19,-7.940000e+07,-8.160000e+07,-1.170000e+09,...,-32000000.0,-28000000.0,-312000000.0,,-312000000,24000000.0,-288000000,,221000000,221000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
71131,BTU,918412,USD,2013,Q4,2013-12-31,2014-02-21,-7.298000e+08,-7.309000e+08,1.742800e+09,...,-85300000.0,-80700000.0,-70500000.0,-72500000.0,-143000000,248700000.0,105700000,-160100000.0,-58900000,-58900000
71132,BTU,918412,USD,2014,Q4,2014-12-31,2015-02-25,1.720000e+07,-4.822000e+08,1.684500e+09,...,-165100000.0,-101500000.0,-109100000.0,-45000000.0,-154100000,-116500000.0,-270600000,-10400000.0,-282600000,-282600000
71502,TRNX,920932,USD,2011,Q4,2011-12-31,2012-05-14,-7.438220e+05,-7.438220e+05,6.904200e+07,...,383000.0,430000.0,-367000.0,-704000.0,-1071000,692000.0,-379000,,-379000,-379000
71503,TRNX,920932,USD,2012,Q4,2012-12-31,2012-11-09,-1.184547e+06,-1.184547e+06,7.903300e+07,...,1929000.0,2269000.0,-2205000.0,-9238000.0,-11443000,-12240000.0,-23683000,,-23683000,-23683000


## Revenue is negative

In [16]:
test_name = "REVENUE < 0"
test_func_rows = lambda df: (df[REVENUE] < 0)
test_datasets(datasets=datasets_income,
              test_name=test_name, test_func_rows=test_func_rows)

REVENUE < 0
dataset='income', variant='annual'
dataset='income', variant='quarterly'
dataset='income', variant='ttm'
dataset='income-banks', variant='annual'
dataset='income-banks', variant='quarterly'
dataset='income-banks', variant='ttm'
dataset='income-insurance', variant='annual'
dataset='income-insurance', variant='quarterly'
dataset='income-insurance', variant='ttm'


Exception: REVENUE < 0

In [17]:
# Show the problematic rows for a dataset.
df = data.get(dataset='income', variant='quarterly')
get_problem_rows(df=df, test_func_rows=test_func_rows)

Unnamed: 0,Ticker,SimFinId,Currency,Fiscal Year,Fiscal Period,Report Date,Publish Date,Shares (Basic),Shares (Diluted),Revenue,...,Non-Operating Income (Loss),"Interest Expense, net","Pretax Income (Loss), Adjusted",Abnormal Gains (Losses),Pretax Income (Loss),"Income Tax (Expense) Benefit, net",Income (Loss) from Continuing Operations,Net Extraordinary Gains (Losses),Net Income,Net Income Available to Common Shareholders
59,FOX,242,USD,2011,Q4,2011-06-30,2011-08-15,2.626000e+09,2.647000e+09,-2.110000e+08,...,-175000000.0,-266000000.0,278000000.0,-3000000.0,275000000,-16000000.0,259000000,,683000000,683000000
60,FOX,242,USD,2012,Q4,2012-06-30,2012-08-14,2.413000e+09,2.414000e+09,-2.850000e+08,...,-101000000.0,-273000000.0,663000000.0,-47000000.0,616000000,-163000000.0,453000000,,-1553000000,-1553000000
275,PPL,26853,USD,2013,Q4,2013-12-31,2014-02-24,5.061656e+08,5.272955e+08,-1.718000e+09,...,-98000000.0,-25000000.0,166000000.0,,166000000,-19000000.0,147000000,,-98000000,-98000000
1410,A,45846,USD,2013,Q4,2013-10-31,2013-12-19,-7.940000e+07,-8.160000e+07,-1.170000e+09,...,-32000000.0,-28000000.0,-312000000.0,,-312000000,24000000.0,-288000000,,221000000,221000000
1537,HPE,47023,USD,2015,Q4,2015-10-31,2016-09-08,-2.069368e+09,3.146594e+09,-7.582000e+09,...,33000000.0,35000000.0,401000000.0,174000000.0,575000000,989000000.0,1564000000,,1385000000,1385000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
72316,AI,939571,USD,2018,Q4,2018-12-31,2019-02-19,3.041900e+07,3.041900e+07,-3.691800e+07,...,-37156000.0,-37174000.0,-77390000.0,137802000.0,60412000,-33639000.0,26773000,,26773000,26926000
72317,AI,939571,USD,2010,Q1,2010-03-31,2011-07-29,7.733000e+06,7.846000e+06,-8.974000e+06,...,-9202000.0,-9202000.0,-22571000.0,-539000.0,-23110000,112000.0,-22998000,,-22998000,-22998000
72318,AI,939571,USD,2010,Q4,2010-12-31,2012-02-23,7.632000e+06,7.780000e+06,-9.839000e+06,...,-10166000.0,-10166000.0,-65335000.0,-2863000.0,-68198000,705000.0,-67493000,,-67493000,-67493000
72319,AI,939571,USD,2011,Q4,2011-12-31,2012-04-20,7.748000e+06,7.799000e+06,-1.290000e+07,...,-13727000.0,-13727000.0,-64758000.0,529000.0,-64229000,419000.0,-63810000,,-63810000,-63810000


## Assets != Liabilities + Equity (Exact Comparison)

In [18]:
test_name = "Assets != Liabilities + Equity (Exact Comparison)"
test_func_rows = lambda df: (df[TOTAL_ASSETS] != df[TOTAL_LIABILITIES] + df[TOTAL_EQUITY])
test_datasets(datasets=datasets_balance,
              test_name=test_name, test_func_rows=test_func_rows)

Assets != Liabilities + Equity (Exact Comparison)
dataset='balance', variant='quarterly'
dataset='balance', variant='ttm'
dataset='balance-banks', variant='quarterly'
dataset='balance-banks', variant='ttm'
dataset='balance-insurance', variant='quarterly'
dataset='balance-insurance', variant='ttm'


Exception: Assets != Liabilities + Equity (Exact Comparison)

In [19]:
# Get the problematic rows for a dataset.
df = data.get(dataset='balance', variant='quarterly')
df2 = get_problem_rows(df=df, test_func_rows=test_func_rows)

# Only show the relevant columns.
df2[[TICKER, SIMFIN_ID, REPORT_DATE, TOTAL_ASSETS, TOTAL_LIABILITIES, TOTAL_EQUITY]]

Unnamed: 0,Ticker,SimFinId,Report Date,Total Assets,Total Liabilities,Total Equity
120,BLK,914,2011-02-28,1.784590e+11,1.521250e+11,2.633300e+10
123,BLK,914,2010-02-28,1.781240e+11,1.535220e+11,2.460100e+10
134,BLK,914,2019-08-31,1.607500e+11,1.306870e+11,3.266000e+10
135,BLK,914,2019-05-31,1.628120e+11,1.326340e+11,3.237100e+10
136,BLK,914,2019-02-28,1.558310e+11,1.260330e+11,3.354000e+10
...,...,...,...,...,...,...
65089,TBIO,941303,2019-03-31,1.288570e+08,1.826280e+08,9.510800e+07
65090,TBIO,941303,2018-12-31,1.496020e+08,1.623560e+08,1.252950e+08
65091,TBIO,941303,2018-09-30,1.663190e+08,1.751710e+08,1.294170e+08
65092,TBIO,941303,2018-06-30,2.428700e+07,1.112060e+08,5.512600e+07


## Assets != Liabilities + Equity (1% Tolerance)

The above test used exact comparison. We now allow for 1% error.

In [20]:
def test_func_rows(df):
    x = df[TOTAL_ASSETS]
    y = df[TOTAL_LIABILITIES] + df[TOTAL_EQUITY]
    
    # Compare x and y within 1% tolerance. Note the resulting
    # boolean array is negated because we want to indicate
    # which rows are problematic so x and y are not close.
    return ~isclose(x=x, y=y, tolerance=0.01)

In [21]:
test_name = "Assets != Liabilities + Equity (1% Tolerance)"
test_datasets(datasets=datasets_balance,
              test_name=test_name, test_func_rows=test_func_rows)

Assets != Liabilities + Equity (1% Tolerance)
dataset='balance', variant='quarterly'
dataset='balance', variant='ttm'
dataset='balance-banks', variant='quarterly'
dataset='balance-banks', variant='ttm'
dataset='balance-insurance', variant='quarterly'
dataset='balance-insurance', variant='ttm'


Exception: Assets != Liabilities + Equity (1% Tolerance)

In [22]:
# Get the problematic rows for a dataset.
df = data.get(dataset='balance', variant='quarterly')
df2 = get_problem_rows(df=df, test_func_rows=test_func_rows)

# Only show the relevant columns.
df2[[TICKER, SIMFIN_ID, REPORT_DATE, TOTAL_ASSETS, TOTAL_LIABILITIES, TOTAL_EQUITY]]

Unnamed: 0,Ticker,SimFinId,Report Date,Total Assets,Total Liabilities,Total Equity
134,BLK,914,2019-08-31,1.607500e+11,1.306870e+11,3.266000e+10
135,BLK,914,2019-05-31,1.628120e+11,1.326340e+11,3.237100e+10
136,BLK,914,2019-02-28,1.558310e+11,1.260330e+11,3.354000e+10
2614,TPB,58275,2019-03-31,3.690460e+08,2.371970e+08,8.884100e+07
2615,TPB,58275,2018-12-31,3.473770e+08,2.487540e+08,8.262300e+07
...,...,...,...,...,...,...
65089,TBIO,941303,2019-03-31,1.288570e+08,1.826280e+08,9.510800e+07
65090,TBIO,941303,2018-12-31,1.496020e+08,1.623560e+08,1.252950e+08
65091,TBIO,941303,2018-09-30,1.663190e+08,1.751710e+08,1.294170e+08
65092,TBIO,941303,2018-06-30,2.428700e+07,1.112060e+08,5.512600e+07


## Dates are invalid (Fundamentals)

In [23]:
# Lambda function for converting strings to dates. Format: YYYY-MM-DD
# This will raise an exception if invalid dates are encountered.
date_parser = lambda column: pd.to_datetime(column, yearfirst=True, dayfirst=False)

In [24]:
# Test function for the entire DataFrame.
# This cannot show which individual rows have problems.
def test_func(df):
    result1 = date_parser(df[REPORT_DATE])
    result2 = date_parser(df[PUBLISH_DATE])
    
    # We only get to this point if date_parser() does not
    # raise any exceptions, in which case we assume the
    # data did not have any problems.
    return False

In [25]:
test_name = "REPORT_DATE or PUBLISH_DATE is invalid"
test_datasets(datasets=datasets_fundamental,
              test_name=test_name, test_func=test_func)

REPORT_DATE or PUBLISH_DATE is invalid
dataset='income', variant='annual'
dataset='income', variant='quarterly'


Exception: REPORT_DATE or PUBLISH_DATE is invalid

## Dates are invalid (Share-Prices)

In [26]:
# Test function for the entire DataFrame.
# This cannot show which individual rows have problems.
def test_func(df):
    result1 = date_parser(df[DATE])
    
    # We only get to this point if date_parser() does not
    # raise any exceptions, in which case we assume the
    # data did not have any problems.
    return False

In [27]:
test_name = "DATE is invalid"
test_datasets(datasets=datasets_shareprices,
              test_name=test_name, test_func=test_func)

## Duplicate Tickers

In [28]:
def get_duplicate_tickers(df):
    """
    Return the rows of `df` where multiple SIMFIN_ID
    have the same TICKER.
    
    :param df: Pandas DataFrame with TICKER column.
    :return: Pandas DataFrame.
    """

    # Remove duplicate rows of [TICKER, SIMFIN_ID] pairs.
    # For the 'companies' dataset this is not necessary,
    # but for e.g. the 'income' dataset we have many rows
    # for each [TICKER, SIMFIN_ID] pair because there are
    # many financial reports for each of these ID pairs.
    idx = df[[TICKER, SIMFIN_ID]].duplicated()
    df2 = df[~idx]

    # Now the DataFrame df2 only contains unique rows of
    # [TICKER, SIMFIN_ID] so we need to check if there are
    # any duplicate TICKER.

    # Index for rows where TICKER is a duplicate.
    idx1 = df2[TICKER].duplicated()

    # Index for rows where TICKER is not NaN.
    # These would otherwise show up as duplicates.
    idx2 = df2[TICKER].notna()

    # Index for rows where TICKER is a duplicate but not NaN.
    idx = idx1 & idx2

    # Get those rows from the DataFrame.
    df2 = df2[idx]

    return df2

In [29]:
# Test-function whether a DataFrame has duplicate tickers.
test_func = lambda df: (len(get_duplicate_tickers(df=df)) > 0)

In [30]:
# Test whether these datasets have duplicate tickers.
# It might only be necessary for 'companies' but we
# might as well test all datasets that use tickers.
_datasets = ['companies'] + datasets_fundamental + datasets_shareprices

In [31]:
test_name = "Duplicate Tickers"
test_datasets(datasets=_datasets,
              test_name=test_name, test_func=test_func)

Duplicate Tickers
dataset='companies', variant='None'
dataset='income', variant='annual'
dataset='income', variant='quarterly'
dataset='income', variant='ttm'
dataset='balance', variant='quarterly'
dataset='balance', variant='ttm'
dataset='cashflow', variant='annual'
dataset='cashflow', variant='quarterly'
dataset='cashflow', variant='ttm'
dataset='shareprices', variant='daily'


Exception: Duplicate Tickers

In [None]:
# Show duplicate tickers in the 'companies' dataset.
df = data.get(dataset='companies')
get_duplicate_tickers(df=df)

In [32]:
# Show duplicate tickers in the 'income-annual' dataset.
df = data.get(dataset='income', variant='annual')
get_duplicate_tickers(df=df)

Unnamed: 0,Ticker,SimFinId,Currency,Fiscal Year,Fiscal Period,Report Date,Publish Date,Shares (Basic),Shares (Diluted),Revenue,...,Non-Operating Income (Loss),"Interest Expense, net","Pretax Income (Loss), Adjusted",Abnormal Gains (Losses),Pretax Income (Loss),"Income Tax (Expense) Benefit, net",Income (Loss) from Continuing Operations,Net Extraordinary Gains (Losses),Net Income,Net Income Available to Common Shareholders
274,FE,447804,USD,2007,FY,2007-12-31,2009-08-03,306000000.0,310000000.0,12802000000.0,...,-655000000.0,-775000000.0,1639000000.0,556000000.0,2195000000,-883000000.0,1312000000,,1309000000,1309000000
290,DUK,709706,USD,2007,FY,2007-12-31,2009-11-06,84810000.0,,9153000000.0,...,-1234000000.0,-1227000000.0,312000000.0,-34000000.0,278000000,-334000000.0,-56000000,-206000000.0,-270000000,-270000000
1008,LSI,901127,USD,2008,FY,2008-12-31,2010-08-06,,,196286000.0,...,-37671000.0,-37775000.0,35994000.0,,35994000,,35994000,3689000.0,37399000,37399000
1815,ES,446632,USD,2009,FY,2009-12-31,2010-08-05,176636000.0,176872800.0,1623893000.0,...,-23791000.0,-30403000.0,66551000.0,,66551000,-14588000.0,51963000,,50832000,50832000
2072,EXC,692600,USD,2009,FY,2009-12-31,2011-08-03,221000000.0,221000000.0,5019000000.0,...,-359000000.0,-338000000.0,-4513000000.0,-4000000.0,-4517000000,-12000000.0,-4529000000,-12000000.0,-4541000000,-4541000000
3979,SHO,39593,USD,2011,FY,2012-02-29,2012-10-09,23100000.0,23100000.0,2347387000.0,...,-214000.0,-421000.0,82248000.0,,82248000,-32492000.0,49756000,,49756000,49756000
4043,AGN,61474,USD,2011,FY,2011-12-31,2014-02-25,124500000.0,126500000.0,4584400000.0,...,-67400000.0,-66900000.0,456000000.0,,456000000,-196900000.0,259100000,,260900000,260900000
4956,GGP,442374,USD,2011,FY,2011-12-31,2012-02-29,943669000.0,981136000.0,2396960000.0,...,-861494000.0,-861494000.0,-242192000.0,54126000.0,-188066000,8723000.0,-179343000,-112913000.0,-298624000,-298624000
5896,MDT,73420,USD,2012,FY,2012-04-30,2015-06-23,1019300000.0,1027500000.0,16590000000.0,...,-259000000.0,-151000000.0,4619000000.0,-368000000.0,4251000000,-784000000.0,3467000000,,3467000000,3467000000
6052,WFT,117829,USD,2012,FY,2012-12-31,2014-07-25,765000000.0,765000000.0,15215000000.0,...,-586000000.0,-486000000.0,577000000.0,-865000000.0,-288000000,-462000000.0,-750000000,,-778000000,-778000000


## Missing Annual Reports

In [33]:
def missing_annual_reports(df):
    """
    Return a list of the SIMFIN_ID's from the given DataFrame
    that have missing annual reports.
    
    :param df:
        Pandas DataFrame with a dataset e.g. 'income-annual'.
        It must have columns SIMFIN_ID and FISCAL_YEAR.

    :return:
        List of integers with SIMFIN_ID's that have missing reports.
    """
    
    # The idea is to test for each SIMFIN_ID individually,
    # whether the DataFrame has all the expected reports for
    # consecutive Fiscal Years between the min/max years.
    
    # Helper-function for processing a DataFrame for one SIMFIN_ID.
    def _missing(df):
        # Get the Fiscal Years from the DataFrame.
        fiscal_years = df[FISCAL_YEAR]

        # How many years between min and max fiscal years.
        num_years = fiscal_years.max() - fiscal_years.min() + 1

        # We expect the Series to have the same length, otherwise
        # some reports must be missing between min and max years.
        missing = (num_years != len(fiscal_years))

        return missing
    
    # Process all companies individually and get a Pandas
    # DataFrame with a boolean for each SIMFIN_ID whether
    # it has some missing Fiscal Years.
    idx = df.groupby(SIMFIN_ID).apply(_missing)

    # List of the SIMFIN_ID's that have missing reports.
    simfin_ids = list(idx[idx].index.values)

    return simfin_ids

In [34]:
test_name = "Missing annual reports"
test_func = lambda df: len(missing_annual_reports(df=df)) > 0
test_datasets(datasets=datasets_fundamental,
              variants=['annual'],
              test_name=test_name, test_func=test_func)

Missing annual reports
dataset='income', variant='annual'
dataset='income-insurance', variant='annual'
dataset='cashflow', variant='annual'


Exception: Missing annual reports

In [None]:
# Get list of SIMFIN_ID's that have missing reports for a dataset.
df = data.get(dataset='income', variant='annual')
missing_annual_reports(df=df)

In [35]:
def sort_annual_reports(df, simfin_id):
    """
    Get the data for a given SIMFIN_ID and set the index to be
    the sorted Fiscal Year so it is easier to see which are missing.
    """
    return df.set_index([SIMFIN_ID, FISCAL_YEAR]).sort_index().loc[simfin_id]

In [36]:
# Show all the reports for a given SIMFIN_ID sorted by
# Fiscal Year so it is easier to see which are missing.
sort_annual_reports(df=df, simfin_id=37910)

Unnamed: 0_level_0,Ticker,Currency,Fiscal Period,Report Date,Publish Date,Shares (Basic),Shares (Diluted),Revenue,Cost of Revenue,Gross Profit,...,Non-Operating Income (Loss),"Interest Expense, net","Pretax Income (Loss), Adjusted",Abnormal Gains (Losses),Pretax Income (Loss),"Income Tax (Expense) Benefit, net",Income (Loss) from Continuing Operations,Net Extraordinary Gains (Losses),Net Income,Net Income Available to Common Shareholders
Fiscal Year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2012,CK00015847,USD,FY,2012-12-31,2015-03-26,,,2365339000.0,-1700756000.0,664583000.0,...,-58893000.0,-58893000.0,330289000.0,-351831000.0,-21542000,-1408000.0,-22950000,,-22950000,-22950000
2014,CK00015847,USD,FY,2014-12-31,2015-03-26,,,2508315000.0,-1801798000.0,706517000.0,...,-83379000.0,-83379000.0,323574000.0,-345764000.0,-22190000,7523000.0,-14667000,,-14667000,-14667000
2015,CK00015847,USD,FY,2015-12-31,2016-03-23,,,2471949000.0,-1743660000.0,728289000.0,...,-84053000.0,-84053000.0,326540000.0,-386805000.0,-60265000,-1978000.0,-62243000,,-62243000,-62243000
2016,CK00015847,USD,FY,2016-12-31,2017-03-30,,,2456767000.0,-1730774000.0,725993000.0,...,-80889000.0,-80889000.0,324562000.0,-366913000.0,-42351000,-2212000.0,-44563000,,-44563000,-44563000


## Missing Quarterly Reports

In [37]:
def missing_quarterly_reports(df):
    """
    Return a list of the SIMFIN_ID's from the given DataFrame
    that have missing quarterly or ttm reports.
    
    :param df:
        Pandas DataFrame with a dataset e.g. 'income-annual'.
        It must have columns SIMFIN_ID, FISCAL_YEAR, FISCAL_PERIOD.

    :return:
        List of integers with SIMFIN_ID's that have missing reports.
    """
    
    # The idea is to test for each SIMFIN_ID individually,
    # whether the DataFrame has all the expected reports for
    # consecutive Fiscal Years and Periods between the min/max.
    
    # Helper-function for processing a DataFrame for one SIMFIN_ID.
    def _missing(df):
        # Get the Fiscal Years and Periods from the DataFrame.
        fiscal_years_periods = df[[FISCAL_YEAR, FISCAL_PERIOD]]

        # The first Fiscal Year and Period.
        min_year = fiscal_years_periods[FISCAL_YEAR].min()
        min_idx = (fiscal_years_periods[FISCAL_YEAR] == min_year)
        min_period = fiscal_years_periods[min_idx][FISCAL_PERIOD].min()

        # The last Fiscal Year and Period.
        max_year = fiscal_years_periods[FISCAL_YEAR].max()
        max_idx = (fiscal_years_periods[FISCAL_YEAR] == max_year)
        max_period = fiscal_years_periods[max_idx][FISCAL_PERIOD].max()

        # How many years between min and max fiscal years.
        num_years = max_year - min_year + 1

        # Total number of Fiscal Periods between first and
        # last Fiscal Years - if all Fiscal Periods were included.
        num_periods = num_years * 4

        # Used to map from Fiscal Period strings to ints.
        # This is safer and easier to understand than
        # e.g. def map_period(x): int(x[1])
        map_period = \
        {
            'Q1': 1,
            'Q2': 2,
            'Q3': 3,
            'Q4': 4
        }

        # Number of Fiscal Periods missing in the first year.
        adj_min_period = map_period[min_period] - 1

        # Number of Fiscal Periods missing in the last year.
        adj_max_period = 4 - map_period[max_period]

        # Adjust the number of Fiscal Periods between the min/max
        # Fiscal Years and Periods by subtracting those periods
        # missing in the first and last years.
        expected_periods = num_periods - adj_min_period - adj_max_period

        # If the expected number of Fiscal Periods between the
        # min and max dates, is different from the actual number
        # of Fiscal Periods in the DataFrame, then some are missing.
        missing = (expected_periods != len(fiscal_years_periods))

        return missing

    # Process all companies individually and get a Pandas
    # DataFrame with a boolean for each SIMFIN_ID whether
    # it has some missing Fiscal Years.
    idx = df.groupby(SIMFIN_ID).apply(_missing)

    # List of the SIMFIN_ID's that have missing reports.
    simfin_ids = list(idx[idx].index.values)

    return simfin_ids

In [38]:
test_name = "Missing quarterly reports"
test_func = lambda df: len(missing_quarterly_reports(df=df)) > 0
test_datasets(datasets=datasets_fundamental,
              variants=['quarterly'],
              test_name=test_name, test_func=test_func)

Missing quarterly reports
dataset='income', variant='quarterly'
dataset='income-banks', variant='quarterly'
dataset='income-insurance', variant='quarterly'
dataset='balance', variant='quarterly'
dataset='balance-banks', variant='quarterly'
dataset='balance-insurance', variant='quarterly'
dataset='cashflow', variant='quarterly'
dataset='cashflow-banks', variant='quarterly'
dataset='cashflow-insurance', variant='quarterly'


Exception: Missing quarterly reports

In [39]:
# Get list of SIMFIN_ID's that have missing reports for a dataset.
df = data.get(dataset='income', variant='quarterly')
missing_quarterly_reports(df=df)

[36205,
 60906,
 82671,
 82753,
 86808,
 89750,
 98663,
 122208,
 122759,
 123623,
 124551,
 133865,
 139560,
 144111,
 164920,
 181659,
 189248,
 210709,
 217619,
 230667,
 231124,
 233893,
 243720,
 258154,
 258455,
 259098,
 286194,
 286372,
 333440,
 334921,
 337852,
 340222,
 340525,
 350160,
 353395,
 353811,
 358146,
 359467,
 360213,
 362372,
 368990,
 378121,
 378130,
 378142,
 378179,
 378185,
 378197,
 378218,
 378225,
 419436,
 436223,
 495816,
 511749,
 515845,
 546690,
 548204,
 549638,
 569369,
 592569,
 609619,
 617429,
 626019,
 627775,
 637325,
 639819,
 640898,
 641822,
 646016,
 652264,
 652800,
 653045,
 653156,
 653863,
 654226,
 654363,
 658466,
 658467,
 658986,
 659172,
 659371,
 660712,
 662405,
 662407,
 662568,
 662984,
 663635,
 664342,
 664525,
 665991,
 666027,
 666100,
 666570,
 666925,
 667156,
 667670,
 667728,
 668565,
 670859,
 671043,
 678892,
 684097,
 687589,
 687925,
 689588,
 689895,
 692917,
 700411,
 703911,
 704560,
 705165,
 705845,
 706380,

In [40]:
def sort_quarterly_reports(df, simfin_id):
    """
    Get the data for a given SIMFIN_ID and set the index to be
    the sorted Fiscal Year and Period so it is easier to see
    which ones are missing.
    """
    return df.set_index([SIMFIN_ID, FISCAL_YEAR, FISCAL_PERIOD]).sort_index().loc[simfin_id]

In [41]:
# Show all the reports for a given SIMFIN_ID sorted by
# Fiscal Year and Period so it is easier to see which are missing.
sort_quarterly_reports(df=df, simfin_id=139560)

Unnamed: 0_level_0,Unnamed: 1_level_0,Ticker,Currency,Report Date,Publish Date,Shares (Basic),Shares (Diluted),Revenue,Cost of Revenue,Gross Profit,Operating Expenses,...,Non-Operating Income (Loss),"Interest Expense, net","Pretax Income (Loss), Adjusted",Abnormal Gains (Losses),Pretax Income (Loss),"Income Tax (Expense) Benefit, net",Income (Loss) from Continuing Operations,Net Extraordinary Gains (Losses),Net Income,Net Income Available to Common Shareholders
Fiscal Year,Fiscal Period,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
2012,Q1,REXR,USD,2012-03-31,2013-10-03,,,7968000.0,-1986000.0,5982000.0,-4607000.0,...,-4101000.0,-4158000.0,-2726000.0,544000.0,-2182000,,-2182000,77000.0,-172000,-172000
2012,Q2,REXR,USD,2012-06-30,2013-10-03,,,8234000.0,-2184000.0,6050000.0,-4382000.0,...,-4436000.0,-4346000.0,-2768000.0,445000.0,-2323000,,-2323000,-145000.0,-1459000,-1459000
2013,Q1,REXR,USD,2013-03-31,2013-10-03,,,9317000.0,-2120000.0,7197000.0,-4651000.0,...,-4070000.0,-3857000.0,-1524000.0,1284000.0,-240000,,-240000,2294000.0,328000,328000
2013,Q2,REXR,USD,2013-06-30,2013-10-03,,,10822000.0,-2442000.0,8380000.0,-5404000.0,...,-5179000.0,-4467000.0,-2203000.0,-624000.0,-2827000,,-2827000,2359000.0,-2286000,-2286000
2013,Q3,REXR,USD,2013-09-30,2013-11-13,24574432.0,24574432.0,8980000.0,-2527000.0,6453000.0,-5525000.0,...,-634000.0,-717000.0,294000.0,-119000.0,175000,,175000,120000.0,256000,256000
2014,Q1,REXR,USD,2014-03-31,2014-05-14,25419418.0,25419418.0,13691000.0,-4134000.0,9557000.0,-8735000.0,...,-1206000.0,-1251000.0,-384000.0,-333000.0,-717000,,-717000,2146000.0,1277000,1261000
2014,Q2,REXR,USD,2014-06-30,2014-08-11,25419757.0,25419757.0,14996000.0,-3892000.0,11104000.0,-8783000.0,...,-1588000.0,-1537000.0,733000.0,-652000.0,81000,,81000,,73000,49000
2014,Q3,REXR,USD,2014-09-30,2014-11-06,33527183.0,33527183.0,18036000.0,-4879000.0,13157000.0,-11305000.0,...,-1955000.0,-1957000.0,-103000.0,-576000.0,-679000,,-679000,,-599000,-623000
2014,Q4,REXR,USD,2014-12-31,2015-03-09,43447666.0,43447666.0,19858000.0,-5477000.0,14381000.0,-11929000.0,...,-1680000.0,-1655000.0,772000.0,-627000.0,145000,,145000,,145000,107000
2015,Q1,REXR,USD,2015-03-31,2015-05-11,50683528.0,50683528.0,21340000.0,-5771000.0,15569000.0,-13430000.0,...,-1825000.0,-1826000.0,314000.0,-233000.0,81000,,81000,,77000,27000


## Missing TTM Reports

Trailing-Twelve-Months (TTM) data is also quarterly so we can use the same helper-functions from above.

In [42]:
test_name = "Missing ttm reports"
test_func = lambda df: len(missing_quarterly_reports(df=df)) > 0
test_datasets(datasets=datasets_fundamental,
              variants=['ttm'],
              test_name=test_name, test_func=test_func)

Missing ttm reports
dataset='income', variant='ttm'
dataset='income-banks', variant='ttm'
dataset='income-insurance', variant='ttm'
dataset='balance', variant='ttm'
dataset='balance-banks', variant='ttm'
dataset='balance-insurance', variant='ttm'
dataset='cashflow', variant='ttm'


Exception: Missing ttm reports

In [43]:
# Get list of SIMFIN_ID's that have missing reports for a dataset.
df = data.get(dataset='income', variant='ttm')
missing_quarterly_reports(df=df)

[89750,
 98663,
 133865,
 181659,
 189248,
 233893,
 258154,
 337852,
 340222,
 353395,
 378179,
 419436,
 436223,
 548204,
 569369,
 592569,
 609619,
 626019,
 637325,
 640898,
 646016,
 653156,
 654363,
 659172,
 659371,
 662568,
 664525,
 689588,
 689895,
 703911,
 705165,
 728722,
 733746,
 748002,
 748226,
 748227,
 778892,
 815913,
 825113,
 903526,
 910562,
 928017]

In [44]:
# Show all the reports for a given SIMFIN_ID sorted by
# Fiscal Year and Period so it is easier to see which are missing.
sort_quarterly_reports(df=df, simfin_id=89750)

Unnamed: 0_level_0,Unnamed: 1_level_0,Ticker,Currency,Report Date,Publish Date,Shares (Basic),Shares (Diluted),Revenue,Cost of Revenue,Gross Profit,Operating Expenses,...,Non-Operating Income (Loss),"Interest Expense, net","Pretax Income (Loss), Adjusted",Abnormal Gains (Losses),Pretax Income (Loss),"Income Tax (Expense) Benefit, net",Income (Loss) from Continuing Operations,Net Extraordinary Gains (Losses),Net Income,Net Income Available to Common Shareholders
Fiscal Year,Fiscal Period,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
2008,Q4,DHR,USD,2008-12-31,2009-10-22,843114360.0,886680300.0,12697460000.0,-6757262000.0,5940194000.0,-4070717000.0,...,-120170000.0,-120170000.0,1749307000.0,,1749307000,-431676000.0,1317631000,,1317631000,1317631000
2009,Q1,DHR,USD,2009-03-31,2010-04-22,843466140.0,885034260.0,12296330000.0,-6515239000.0,5781087000.0,-3984613000.0,...,-106415000.0,-106415000.0,1690059000.0,,1690059000,-411221000.0,1278838000,,1278838000,1278838000
2009,Q2,DHR,USD,2009-06-30,2009-07-23,843916920.0,883707000.0,11686040000.0,-6202983000.0,5483057000.0,-3853101000.0,...,-104176000.0,-104176000.0,1525780000.0,,1525780000,-314696000.0,1211084000,,1211084000,1211084000
2009,Q3,DHR,USD,2009-09-30,2009-10-22,844712880.0,883003440.0,11228550000.0,-5935108000.0,5293444000.0,-3806149000.0,...,-20158000.0,-105276000.0,1467137000.0,,1467137000,-276682000.0,1190455000,,1190455000,1190455000
2010,Q4,DHR,USD,2010-12-31,2011-02-24,862224000.0,901956000.0,12550000000.0,-6145500000.0,6404500000.0,-4377700000.0,...,-88200000.0,-111000000.0,1938600000.0,,2229600000,-511400000.0,1718200000,74800000.0,1793000000,1793000000
2011,Q1,DHR,USD,2011-03-31,2011-04-21,866392230.0,904663980.0,12834370000.0,-6149731000.0,6684638000.0,-4509005000.0,...,-73558000.0,-110833000.0,2102075000.0,,2393075000,-550847000.0,1842228000,79897000.0,1922125000,1922125000
2011,Q2,DHR,USD,2011-06-30,2011-07-27,871252800.0,908710110.0,13252810000.0,-6256175000.0,6996637000.0,-4743273000.0,...,-60325000.0,-112060000.0,2193039000.0,,2484039000,-564384000.0,1919655000,278758000.0,2198413000,2198413000
2011,Q3,DHR,USD,2011-09-30,2011-10-20,882030600.0,917610870.0,14605760000.0,-7037980000.0,7567783000.0,-5229818000.0,...,-67383000.0,-124559000.0,2270582000.0,,2237658000,-436399000.0,1801259000,274147000.0,2075406000,2075406000
2011,Q4,DHR,USD,2011-12-31,2012-02-24,892584000.0,925584000.0,16090500000.0,-7913900000.0,8176600000.0,-5626200000.0,...,-69700000.0,-136500000.0,2480700000.0,,2447800000,-512500000.0,1935300000,237000000.0,2172300000,2172300000
2012,Q1,DHR,USD,2012-03-31,2012-04-19,902451330.0,934055760.0,17114500000.0,-8451252000.0,8663250000.0,-5962728000.0,...,-80151000.0,-146876000.0,2620371000.0,,2587471000,-548946000.0,2038525000,317317000.0,2355842000,2355842000
