In [None]:
import json
import os
import pandas as pd
import requests
import time

from datetime import datetime
from pathlib import Path
from QuantConnect import Globals
from QuantConnect.Configuration import Config
from QuantConnect.Securities import SecurityDefinitionSymbolResolver

### Required environment variables:
 * __`QC_DATAFLEET_DEPLOYMENT_DATE`__ (date formatted as "%Y%m%d")
 * __`KAVOUT_API_KEY`__
 * __`KAVOUT_API_HOST`__ (e.g. `google.com:443`)

In [None]:
deployment_date = datetime.strptime(os.environ['QC_DATAFLEET_DEPLOYMENT_DATE'], '%Y%m%d')
deployment_date_str = deployment_date.strftime('%Y%m%d')

api_key = os.environ['KAVOUT_API_KEY']
api_host = os.environ['KAVOUT_API_HOST']

output_directory = Path(Config.Get('temp-output-directory', '/temp-output-directory'))
existing_data_directory = Path(Config.Get('processed-data-directory', Globals.DataFolder))

output_file_directory = output_directory / 'alternative' / 'kavout' / 'composite_factor_bundles'
existing_data_file_directory = existing_data_directory / 'alternative' / 'kavout' / 'composite_factor_bundles'

output_file_directory.mkdir(parents=True, exist_ok=True)

symbol_resolver = SecurityDefinitionSymbolResolver()

In [None]:
response = requests.get(f'https://{api_host}/v2/usa/stock/kavout_quantconnect_factors', verify=False, params={
    'token': api_key,
    'trade_date__eq': deployment_date.strftime('%Y-%m-%d'),
    'size': 10000 # Maximum number of results that can be returned per call, which is more than the size of the universe.
})

if response.status_code != 200:
    raise Exception(f'Failed to download data from host: {api_host} - received {response.status_code} error. Contents: {response.text}')

In [None]:
json_response = response.json()
if len(json_response) == 0:
    raise Exception(f'No results returned from API for date: {deployment_date.date()}')

In [None]:
previous_count = len(json_response)
output_json = []

for entry in json_response:
    isin = entry['isin_code']
    lean_symbol = symbol_resolver.ISIN(isin, deployment_date)
    if lean_symbol is None:
        continue
    
    entry['ticker'] = lean_symbol.Value
    output_json.append(entry)

In [None]:
if len(output_json) == 0:
    raise Exception('No Symbols found with ISIN lookup')
    
filtered_count = previous_count - len(output_json)
if filtered_count != 0:
    print(f'Filtering {filtered_count}/{previous_count} symbols from output data, since no matching ISIN entries were found in the security definitions database')

In [None]:
for entry in output_json:
    filename = f'{entry["ticker"].lower()}.csv'
    
    output_file_path = output_file_directory / filename
    existing_data_file_path = existing_data_file_directory / filename
    
    if not existing_data_file_path.exists():
        existing_data_file_path = output_file_path
    
    output_data = {}
    
    if existing_data_file_path.exists():
        with open(existing_data_file_path, 'r') as existing_data_file:
            existing_lines = existing_data_file.read().strip('\n').split('\n')
            output_data = {line.split(',')[0]: line for line in existing_lines}
    
    output_data[deployment_date_str] = ','.join([
        deployment_date_str,
        str(entry['growth']),
        str(entry['value']),
        str(entry['quality']),
        str(entry['momentum']),
        str(entry['low_volatility'])
    ])
    
    # Sort the lines we have and output them all to disk, making sure all data is de-duplicated by date.
    content = '\n'.join([i[1] for i in sorted(output_data.items())])
    
    with open(output_file_path, 'w') as output_file:
        print(f'Writing data for {entry["ticker"]} to: {output_file_path}')
        output_file.write(content)