In [1]:
# Import liberaries and functions
import pandas as pd
import numpy as np
import re
import pickle

In [2]:
# Load financials from WRDS
financials = pd.read_csv(
    filepath_or_buffer="Data\Financials.csv",
    decimal=".", 
    thousands=',',
    parse_dates=["datadate"]
).drop_duplicates()

financials.sort_values(['cik', 'datadate'], inplace=True)

financials.fillna({"seq": financials["teq"]}, inplace=True)
financials.dropna(subset=["cik"], inplace=True)

In [3]:
# Load financials from EIKON
financials2 = pd.read_csv("Data/Financials2.csv")

In [4]:
financials2.dropna(inplace=True)

financials2.columns = [
    'cik', 'datadate', 'TOTALASSETS', 'TOTALDEBT', 'NETINCOME', 'TOTALREVENUE', 'TOTALEQUITY',
    'INTANGIBLESNET', 'RnD', 'TOTALOPERATINGEXPENSE', 'TOTALCURRENTASSETS', 'TOTALCURRLIABILITIES'
    ]

financials2['datadate'] = pd.to_datetime(financials2['datadate'], errors='coerce').dt.tz_localize(None)

financials2.sort_values(by=['cik', 'datadate'], inplace=True)

# scale all values to 10^6 similar to WRDS
financials2[financials2.columns[2:]] = financials2[financials2.columns[2:]]/1000000

In [5]:
financials['ryear'] = financials['datadate'].dt.year
financials['rmonth'] = financials['datadate'].dt.month

financials2['ryear'] = financials2['datadate'].dt.year
financials2['rmonth'] = financials2['datadate'].dt.month

In [6]:
merged_fin = pd.merge(
    left=financials, 
    right=financials2, 
    on=['cik', 'ryear', 'rmonth'], 
    how='outer'
)

In [7]:
merged_fin.fillna({
    'datadate_x': merged_fin['datadate_y'], 
    'at': merged_fin['TOTALASSETS'], 
    'dt': merged_fin['TOTALDEBT'],
    'ni': merged_fin['NETINCOME'], 
    'revt': merged_fin['TOTALREVENUE'], 
    'seq': merged_fin['TOTALEQUITY'], 
    'intan': merged_fin['INTANGIBLESNET'], 
    'xrd': merged_fin['RnD'], 
    'xopr': merged_fin['TOTALOPERATINGEXPENSE'], 
    'act': merged_fin['TOTALCURRENTASSETS'], 
    'lct': merged_fin['TOTALCURRLIABILITIES']
}, inplace=True)

In [9]:
merged_fin.sort_values(by=['cik', 'ryear', 'rmonth'], inplace=True)

In [10]:
cols = ['act', 'at', 'dt', 'ebit', 'ebitda', 'intan', 'lct', 'lt', 'ni', 'revt', 'seq', 
        'teq', 'xopr', 'xrd', 'xt', 'naicsh', 'sich', 'mkvalt', 'naics', 'sic']

merged_fin[cols] = merged_fin.groupby('cik')[cols].ffill(limit=1)

In [11]:
merged_fin.dropna(
    subset=['act', 'at', 'dt', 'lct', 'ni', 'revt', 'seq'], 
    how='all', 
    inplace=True
)

merged_fin.rename(columns={'datadate_x': 'datadate'}, inplace=True)

In [12]:
merged_fin[[
    'cik', 'datadate', 'act', 'at', 'dt', 'ebit', 'ebitda', 'intan', 'lct', 'lt',
    'ni', 'revt', 'seq', 'teq', 'xopr', 'xrd', 'xt', 'naicsh', 'sich', 'mkvalt', 'naics', 'sic'
]].to_csv("Data/Financials3.csv", index=False)