In [None]:
import json
import os
import pandas as pd
import requests
import time

from datetime import datetime
from pathlib import Path
from QuantConnect import Globals
from QuantConnect.Configuration import Config
from QuantConnect.Securities import SecurityDefinitionSymbolResolver

### Required environment variables:
 * __`QC_DATAFLEET_DEPLOYMENT_DATE`__ (date formatted as "%Y%m%d")
 * __`KAVOUT_API_KEY`__
 * __`KAVOUT_API_HOST`__ (e.g. `google.com:443`)

In [None]:
deployment_date = datetime.strptime(os.environ.get('QC_DATAFLEET_DEPLOYMENT_DATE', f'{datetime.utcnow():%Y%m%d}'), '%Y%m%d')
save_raw_output = os.environ.get('KAVOUT_SAVE_RAW_OUTPUT', False)

api_key = os.environ.get('KAVOUT_API_KEY', '')
api_host = os.environ.get('KAVOUT_API_HOST', '')

output_directory = Path(Config.Get('temp-output-directory', '/temp-output-directory'))
existing_data_directory = Path(Config.Get('processed-data-directory', Globals.DataFolder))

output_file_directory = output_directory / 'alternative' / 'kavout' / 'composite_factor_bundles'
existing_data_file_directory = existing_data_directory / 'alternative' / 'kavout' / 'composite_factor_bundles'

output_file_directory.mkdir(parents=True, exist_ok=True)

symbol_resolver = SecurityDefinitionSymbolResolver()

In [None]:
def GetData(deployment_date):
    response = requests.get(f'https://{api_host}/v2/usa/stock/kavout_quantconnect_factors', verify=False, params={
        'token': api_key,
        'trade_date__eq': f'{deployment_date:%Y-%m-%d}',
        'size': 10000 # Maximum number of results that can be returned per call, which is more than the size of the universe.
    })

    if response.status_code != 200:
        raise Exception(f'Failed to download data from host: {api_host} - received {response.status_code} error. Contents: {response.text}')

    json_response = response.json()

    if save_raw_output:
        with open(f"{deployment_date:%Y-%m-%d}_response.json", 'w') as output_file:
            output_file.write(str(json_response))
    
    return json_response

In [None]:
def SaveData(deployment_date, json_response):
    if len(json_response) != 0:
        output_json = []

        for entry in json_response:
            isin = entry['isin_code']
            lean_symbol = symbol_resolver.ISIN(isin, deployment_date)
            # If we can match ISIN we use the ticker since this dataset is not point-in-time
            entry['ticker'] = lean_symbol.Value if lean_symbol else entry['symbol']
            output_json.append(entry)

        for entry in output_json:
            filename = f'{entry["ticker"].lower()}.csv'

            output_file_path = output_file_directory / filename
            existing_data_file_path = existing_data_directory / filename

            if not existing_data_file_path.exists():
                existing_data_file_path = output_file_path

            output_data = {}

            if existing_data_file_path.exists():
                with open(existing_data_file_path, 'r') as existing_data_file:
                    existing_lines = existing_data_file.read().strip('\n').split('\n')
                    output_data = {line.split(',')[0]: line for line in existing_lines}

            deployment_date_str = f'{deployment_date:%Y%m%d}'
            output_data[deployment_date_str] = ','.join([
                deployment_date_str,
                str(entry['growth']),
                str(entry['value']),
                str(entry['quality']),
                str(entry['momentum']),
                str(entry['low_volatility'])
            ])

            # Sort the lines we have and output them all to disk, making sure all data is de-duplicated by date.
            content = '\n'.join([i[1] for i in sorted(output_data.items())])

            with open(output_file_path, 'w') as output_file:
                print(f'Writing data for {entry["ticker"]} to: {output_file_path}')
                output_file.write(content)

In [None]:
json_response = GetData(deployment_date)
SaveData(deployment_date, json_response)
