# Setup

In [246]:
!pip install eikon
!pip install more_itertools

Collecting more_itertools
  Downloading more_itertools-8.13.0-py3-none-any.whl (51 kB)
Installing collected packages: more-itertools
Successfully installed more-itertools-8.13.0


In [310]:
import eikon as ek
import pandas as pd
import numpy as np
import time
import os
import glob
from tqdm.auto import tqdm
from more_itertools import chunked
from math import ceil, floor

In [51]:
#"api_key_lk"
ek.set_app_key("716a396b553b441680427f612d2f89735c88bf9e")

## Screen companies

In [52]:
oil_osebx_screen = 'SCREEN(U(IN(Equity(active,public,primary))), TR.CompanyMarketCap>=500000, IN(TR.ExchangeMarketIdCode,"XOSL"), IN(TR.TRBCBusinessSectorCode,"5010","5020","5030"), CURN=USD)'
company_names = ["TR.CommonName"]#["TR.CommonName","TR.CompanyMarketCap","TR.ExchangeName","TR.TRBCBusinessSector","TR.TotalReturn3Mo"]

osbx_companies, e = ek.get_data(oil_osebx_screen, company_names)
osbx_companies = osbx_companies.set_index("Instrument")

In [53]:
oil_global_screen = 'SCREEN(U(IN(Equity(active,public,primary))), TR.CompanyMarketCap>=500000, IN(TR.TRBCBusinessSectorCode,"5010","5020","5030"), CURN=USD)'
fields_oil_global_screen = ["TR.CommonName"]

global_oil, e = ek.get_data(oil_global_screen, fields_oil_global_screen)
global_oil = global_oil.set_index("Instrument")


In [55]:
financial_global_screen = 'SCREEN(U(IN(Equity(active,public,primary))), TR.CompanyMarketCap>=500000, IN(TR.TRBCBusinessSectorCode,"5510"), CURN=USD)'
fields_financial_global_screen = ["TR.CommonName"]

global_financial, e = ek.get_data(financial_global_screen, fields_financial_global_screen)
global_financial = global_financial.set_index("Instrument")

Now we have dataframe of all noted oil companies (with mcap > USD 5m) in eikon refinitives entire database

## Below are some interesting fields regarding public companies


In [None]:
#Market capitalization fields 
fields = ['TR.CompanyMarketCap.Date','TR.CompanyMarketCap', 'TR.PriceClose', 'TR.CompanyMarketCap.Currency'] 

#Fundamental stock data fields
profits = ['TR.TotalRevenue', 'TR.GrossProfit','TR.EBITDA','TR.EBIT', 'TR.F.NetIncAfterTax']#, 'TR.EV','MKT_CAP']
balance = ['TR.F.TotAssets','TR.F.TotCurrAssets','TR.F.TotLiab','TR.F.TotCurrLiab','TR.F.LTDebtPctofTotAssets','TR.F.STDebtPctofTotAssets',"TR.InvtrPriceToBook"]#TR.F.TotLiab(Period=FY0)
cash_flow = ['TR.F.LeveredFOCF']

fundamental_data = profits + balance + cash_flow 
reported_dates = ['TR.TotalRevenue.periodenddate']

#Company meta data fields
geography = ['TR.ExchangeMarketIdCode', 'TR.HeadquartersRegionAlt', 'TR.HeadquartersCountry', 'TR.HQStateProvince']
sectors = ['TR.TRBCEconomicSector', 'TR.TRBCBusinessSector', 'TR.TRBCIndustryGroup', 'TR.TRBCIndustry', 'TR.TRBCActivity']
founded = ['TR.OrgFoundedYear']

meta_data = geography + founded + sectors

#Broker estimates
#params_new["Period"] = "FY1"    
    
fields = ["TR.EPSMean","TR.EPSMean.periodenddate","TR.EBITMean",'TR.RevenueMean',
              "TR.ROAMean","TR.ROEMean","TR.FCFMean","TR.TotalAssets","TR.MeanPctChg(Period=FY1,WP=60d)"]

# Collect data from eikon refinitiv

In [158]:
def _decide_get_function(fields, params):
    
    timeseries_fields = ["TIMESTAMP", "VALUE", "VOLUME", "HIGH", "LOW", "OPEN", "CLOSE", "COUNT"]
    
    if isinstance(fields, str):
        fields = [fields]
    
    if all([field in timeseries_fields for field in fields]) or fields is None: 
         
        return ek.get_timeseries
    
    if params["Frq"] in ["tick", "minute", "hour"]:
        raise ValueError("You requested data of ek.get_data() requesting timeperiods it does not support")
    
    return ek.get_data

In [379]:
def correct_params(chosen_function, params):
    
    if chosen_function == ek.get_timeseries:
        params["start_date"] = params["SDate"]
        params["end_date"] = params["EDate"]
        params["interval"] =  params["Frq"]

In [396]:
def _get_many_params(params, date_range, chosen_function):
    if date_range is not None:
        params_copies = []       
        for i in range(len(date_range)-1):
            new_params = params.copy()
            new_params["SDate"] = date_range[i].strftime('%Y-%m-%d')
            new_params["EDate"] = date_range[i+1].strftime('%Y-%m-%d')
            correct_params(new_params, chosen_function)
            params_copies.append(new_params)
        return params_copies
    return [params]

In [397]:
def _decide_tickers_and_params(chosen_function, fields, lst_of_tickers, params):
    
    data_limit = {ek.get_timeseries:2_500, ek.get_data:9_000}
    date_range = None
    tickers_params_pairs = []
    
    #If there this is not a timeseries problem
    if "SDate" not in params.keys():
        return lst_of_tickers
    
    number_of_timepoints = chosen_function(lst_of_tickers[0], fields[0], params)[0].shape[0]
    print(f"number_of_timepoints {number_of_timepoints}")
    
    number_of_tickers_at_once = floor(data_limit[chosen_function] / (len(fields) * number_of_timepoints))
    print(f"number_of_tickers_at_once {number_of_tickers_at_once}")
    
    if number_of_tickers_at_once < 1: 
        number_of_timepoints_at_once = floor(data_limit[chosen_function] / len(fields))
        intervals_needed = ceil(number_of_timepoints / number_of_timepoints_at_once)
        
        time_delta = (pd.to_datetime(params["EDate"]) - pd.to_datetime(params["SDate"])) / intervals_needed
        date_range = [pd.to_datetime(params["SDate"]) + i*time_delta for i in range(intervals_needed-1)] + [pd.to_datetime(params["EDate"])]
        number_of_tickers_at_once = 1
    
    sub_ticker_lst = list(chunked(lst_of_tickers, number_of_tickers_at_once))
    params_copies = _get_many_params(params, date_range, chosen_function)
    
    for sub_tickers in sub_ticker_lst:
        for params_copy in params_copies:
            tickers_params_pairs.append((sub_tickers, params_copy))
            
    
    return tickers_params_pairs

In [398]:
#Function to maximize data per http request. 
def _divide_pull_request(lst_of_tickers, fields, params):
    #batch, deduce get_data or get_timeseries size given
    #lst_of_tickers x fields x (params[start]-params[end])*freq < 10,000 or 3,000
    #also adjust params so that they fit selected function 
    
    chosen_function = _decide_get_function(fields, params)
    params_adjusted = params
    
    tickers_params_pairs = _decide_tickers_and_params(chosen_function, fields, lst_of_tickers, params)#TODO
    
    return tickers_params_pairs, chosen_function

In [399]:
def wait_time(server_side_problems):
    return 3**(server_side_problems-1)

In [404]:
def handle_and_update_server_side_problem(server_side_problems, err):
    server_side_problems += 1
    print(err.message)
    seconds_to_wait = wait_time(server_side_problems)
    time.sleep(wait_time(seconds_to_wait))
    print(f"waiting {seconds_to_wait} second(s) before new pull request...")

In [408]:
def get_data_and_handle_errors(getting_function, tickers, fields, params):
    ticker_and_error = {}
    
    server_side_problems = 0
    
    while server_side_problems < 5: 
        
        try:
            #if there is only one ticker causing the error should get_and_log perhaps split call into separate tickers?
            #fields not avalible should (and I think are) set to NA 
            new_df, err = getting_function(tickers, fields, params)
            return new_df, ticker_and_error
        
        except ek.EikonError as err:
            if err.code ==-1:
                #Just general error not specified                        
                handle_and_update_server_side_problem(server_side_problems, err)

            if err.code == 408: 
                #HTTP Timeout exception, this just happens frequently... think it can be server side problem
                handle_and_update_server_side_problem(server_side_problems, err)
            
            if err.code == 400 or err.code == 2504: #backend error, eikon suggest waiting this one out 
                handle_and_update_server_side_problem(server_side_problems, err)
            
            if err.code == 429:
                print(err.message)
                print("If this error message keeps happening, your subscription has probably collected to much data today")
                #either daily limit is reached or call based limit reached latter can be a problem from get_datas en
        
    for ticker in tickers:
        ticker_and_error[ticker] = err 
    
    return new_df, ticker_and_error

In [429]:
def save_if_criteria_met(df, filename, count):
    if not count % 10 or count == -1:
        df.to_csv(filename)

In [431]:
def format_filename(filename):
    if filename[-4:] == ".csv":
        return filename
    return filename + ".csv"

In [434]:
def get_data(lst_of_tickers, fields, params, filename=None):
    #(return or save) requested data and (return or save) non-retreived-data 
    saved = False
    saved_df = None
    filename = format_filename(filename)
    tickers_params_pairs, chosen_function = _divide_pull_request(lst_of_tickers, fields, params)
    
    for i, (tickers, param ) in enumerate(tqdm(tickers_params_pairs)):  

        new_df, ticker_and_error = get_data_and_handle_errors(chosen_function, tickers, fields, param)
        
      
        if saved_df is None:
            saved_df = new_df 

        else:
            saved_df = pd.concat([saved_df, new_df])
        
        save_if_criteria_met(saved_df, filename, i)
    
    save_if_criteria_met(saved_df, filename,-1)
    
    return saved_df

In [435]:
lst_of_tickers = global_financial.index.to_list()[:5]
fields = ["TR.TotalRevenue.Date",  "TR.TotalRevenue"]
#Eikon parameters
start_date = '2000-01-01'
end_date = '2022-04-21'
ek_params = {'SDate': start_date, 'EDate': end_date,'Frq': 'FQ', "Curn":"USD", 'period':"FQ0"}

df = get_data(lst_of_tickers, fields, ek_params, "test1")
df

number_of_timepoints 1
number_of_tickers_at_once 4500


  0%|          | 0/1 [00:00<?, ?it/s]

Unnamed: 0,Instrument,Date,Total Revenue
0,KAER.VI,,
1,TIRO.VI,,
2,000416.SZ,2003-03-31T00:00:00Z,
3,000416.SZ,2003-06-30T00:00:00Z,11633826.638478
4,000416.SZ,2003-09-30T00:00:00Z,15210341.911321
...,...,...,...
74,000416.SZ,2021-03-31T00:00:00Z,1494615.220245
75,000416.SZ,2021-06-30T00:00:00Z,1389533.190843
76,000416.SZ,2021-09-30T00:00:00Z,1170244.523056
77,NEDJ.J,,


In [375]:
lst_of_tickers = global_financial.index.to_list()[:10]
fields = ['TR.CompanyMarketCap.Date','TR.CompanyMarketCap']
#Eikon parameters
start_date = pd.to_datetime('2000-01-01')
end_date = pd.to_datetime('2022-04-21')
params = {'SDate': start_date, 'EDate': end_date,'Frq': 'D', "Curn":"USD"}

ek.get_data("VAR.OL", fields, params)

(     None  TR.COMPANYMARKETCAP.DATE  TR.COMPANYMARKETCAP
 0  VAR.OL                      <NA>                 <NA>,
 [{'code': 234,
   'col': 1,
   'message': "The '00' is unexpected in formula. A delimiter is probably missing before the lexeme.",
   'row': 0},
  {'code': 234,
   'col': 2,
   'message': "The '00' is unexpected in formula. A delimiter is probably missing before the lexeme.",
   'row': 0}])

In [395]:
ek.get_data(['KAER.VI'], fields,{'SDate': '2000-01-01 00:00:00', 'EDate': '2022-04-21 00:00:00', 'Frq': 'D', 'Curn': 'USD'})

(     Instrument                  Date  Company Market Cap
 0       KAER.VI  2000-01-03T00:00:00Z    404170472.154094
 1       KAER.VI  2000-01-04T00:00:00Z    405939471.405599
 2       KAER.VI  2000-01-05T00:00:00Z    406173934.568257
 3       KAER.VI  2000-01-06T00:00:00Z    406370925.301055
 4       KAER.VI  2000-01-07T00:00:00Z    405191838.750501
 ...         ...                   ...                 ...
 5541    KAER.VI  2022-04-13T00:00:00Z    677775171.437901
 5542    KAER.VI  2022-04-14T00:00:00Z    669515341.807237
 5543    KAER.VI  2022-04-19T00:00:00Z    671612449.171098
 5544    KAER.VI  2022-04-20T00:00:00Z    689576329.666037
 5545    KAER.VI  2022-04-21T00:00:00Z     693340423.68749
 
 [5546 rows x 3 columns],
 None)

## Legacy code, perhaps something relevant

In [None]:
def save_data(file_name, save_per_n_http_request, lst_of_tickers, params):
    
    non_collected_tickers = []

    name_to_index = {}
    dfs = {}
    for i, possible_key in enumerate(["stock_data", "meta_data", "fundamental_data", "broker_data"]):
        if params[possible_key]:
            name_to_index[possible_key] = i
            dfs[possible_key] = []
    
    partioned_lst_of_tickers = _sub_lists(lst_of_tickers, params["limit"])        
    for i, sub_ticker_lst in enumerate(tqdm(partioned_lst_of_tickers, "saving loop")):    
        
        try:
            raw_data_dfs = get_data(sub_ticker_lst, params)
            
            for key in name_to_index:
           
                dfs[key] = dfs[key] + [raw_data_dfs[name_to_index[key]]]
                
                
                
            if not (i % save_per_n_http_request):
                for key in name_to_index:
                    df = pd.concat(dfs[key], axis=0)
                    df = df.reset_index()
                    
                    df.to_feather(f"{file_name}_save={i}_type={key}.feather")
                    
                    
                    dfs[key] = []

        except ek.EikonError as err:
            for key in name_to_index:
                dfs[key] = []
                
            non_collected_tickers += sub_ticker_lst
        except Exception as e:
            print(e)
            for key in name_to_index:
                dfs[key] = []
                
            non_collected_tickers += sub_ticker_lst
            
    #Write crashes to file       
    with open(f"{file_name}.txt", "w") as f:
        f.write("\n".join(non_collected_tickers))
    
    #Save last data if there are rests
    for key in name_to_index:
            break
            if dfs[key] != []:
                df = pd.concat(dfs[key], axis=0)

                df = df.reset_index()

                #wtfffff
                df.to_feather(f"{file_name}_save={len(partioned_lst_of_tickers)}_type={key}.feather")

            

In [21]:
def _time_interval(start_date, end_date):
    y0 = int(start_date.split("-")[0])
    yn = int(end_date.split("-")[0])
    in_between_dates = [f"{str(year)}-01-01" for year in range(y0+1,yn,7)]
    return [start_date] + in_between_dates  + [end_date]
    
        
#This is code for getting macro timeseries data, as Refinitivs get_data method was hard to work with. 
def macro_data(lst_of_tickers, ek_get_timeseries_fields, params):
        start_and_ends = _time_interval(params["SDate"],params["EDate"])
        
        tickers_to_serie = {}
        for ticker in lst_of_tickers:
            tickers_to_serie[ticker] = []
            for i in range(len(start_and_ends)-1):
                try: 
                    time_series = ek.get_timeseries(ticker, fields=ek_get_timeseries_fields,
                                                    start_date=start_and_ends[i], end_date=start_and_ends[i+1], interval=params["interval"])
                                 
                except ek.EikonError as err:
                    if err.code ==-1:
                        time_series = ek.get_timeseries("BRT-", fields=ek_get_timeseries_fields, start_date=start_and_ends[i], end_date=start_and_ends[i+1],interval=params["interval"])
                        time_series[ek_get_timeseries_fields] = np.nan
                        
                    if err.code ==  2504:
                        print("backend error")
                        time.sleep(2)
                        time_series = ek.get_timeseries(ticker, fields=ek_get_timeseries_fields,
                                                    start_date=start_and_ends[i], end_date=start_and_ends[i+1], interval=params["interval"])
                        
                tickers_to_serie[ticker] = tickers_to_serie[ticker] + [time_series]
            
            tickers_to_serie[ticker] = pd.concat(tickers_to_serie[ticker], axis=0)
            
        return tickers_to_serie