In [None]:
import clr
clr.AddReference("qc-synchronizer")

from QuantConnect.DataFleet.Sync import ProducerSynchronizer

In [None]:
import requests
import io
import json
import os
import sys
import pathlib

from zipfile import ZipFile
from datetime import datetime, timedelta

In [None]:
URLS = [
    #'http://api.eia.gov/bulk/INTL.zip',
    #'http://api.eia.gov/bulk/NUC_STATUS.zip',
    'https://api.eia.gov/bulk/PET.zip',
    #'http://api.eia.gov/bulk/NG.zip',
    #'http://api.eia.gov/bulk/TOTAL.zip',
    #'http://api.eia.gov/bulk/SEDS.zip',
    #'http://api.eia.gov/bulk/ELEC.zip',
    #'http://api.eia.gov/bulk/EBA.zip',
    #'http://api.eia.gov/bulk/COAL.zip',
    #'http://api.eia.gov/bulk/STEO.zip',
    #'http://api.eia.gov/bulk/PET_IMPORTS.zip',
    #'http://api.eia.gov/bulk/EMISS.zip',
    #'http://api.eia.gov/bulk/IEO.zip'
]

destination_dir = pathlib.Path('/temp-output-directory/alternative/usenergy')
destination_dir.mkdir(parents=True, exist_ok=True)

name_to_cat = {
    'INTL': 'International Energy',
    'NUC_STATUS': 'U.S. Nuclear Outages',
    'PET': 'Petroleum',
    'NG': 'Natural Gas',
    'TOTAL': 'Total Energy',
    'SEDS': 'State Energy Data System',
    'ELEC': 'Electricity',
    'EBA': 'U.S. Electric System Operating Data',
    'COAL': 'Coal',
    'STEO': 'Short-Term Energy Outlook',
    'PET_IMPORTS': 'Crude Oil Imports',
    'EMISS': 'CO2 Emissions',
    'IEO': 'International Energy Outlook'
}

ISO3186 = {'AFG': 'Afghanistan',
     'ALB': 'Albania',
     'DZA': 'Algeria',
     'ASM': 'American Samoa',
     'AND': 'Andorra',
     'AGO': 'Angola',
     'AIA': 'Anguilla',
     'ATA': 'Antarctica',
     'ATG': 'Antigua and Barbuda',
     'ARG': 'Argentina',
     'ARM': 'Armenia',
     'ABW': 'Aruba',
     'AUS': 'Australia',
     'AUT': 'Austria',
     'AZE': 'Azerbaijan',
     'BHS': 'Bahamas',
     'BHR': 'Bahrain',
     'BGD': 'Bangladesh',
     'BRB': 'Barbados',
     'BLR': 'Belarus',
     'BEL': 'Belgium',
     'BLZ': 'Belize',
     'BEN': 'Benin',
     'BMU': 'Bermuda',
     'BTN': 'Bhutan',
     'BOL': 'Bolivia',
     'BIH': 'Bosnia and Herzegovina',
     'BWA': 'Botswana',
     'BVT': 'Bouvet Island',
     'BRA': 'Brazil',
     'IOT': 'British Indian Ocean Territory',
     'BRN': 'Brunei',
     'BGR': 'Bulgaria',
     'BFA': 'Burkina Faso',
     'BDI': 'Burundi',
     'KHM': 'Cambodia',
     'CMR': 'Cameroon',
     'CAN': 'Canada',
     'CPV': 'Cape Verde',
     'CYM': 'Cayman Islands',
     'CAF': 'Central African Republic',
     'TCD': 'Chad',
     'CHL': 'Chile',
     'CHN': 'China',
     'CXR': 'Christmas Island',
     'CCK': 'Cocos (Keeling) Islands',
     'COL': 'Colombia',
     'COM': 'Comoros',
     'COG': 'Congo',
     'COD': 'Congo, the Democratic Republic of the',
     'COK': 'Cook Islands',
     'CRI': 'Costa Rica',
     'CIV': 'Ivory Coast',
     'HRV': 'Croatia',
     'CUB': 'Cuba',
     'CYP': 'Cyprus',
     'CZE': 'Czech Republic',
     'DNK': 'Denmark',
     'DJI': 'Djibouti',
     'DMA': 'Dominica',
     'DOM': 'Dominican Republic',
     'ECU': 'Ecuador',
     'EGY': 'Egypt',
     'SLV': 'El Salvador',
     'GNQ': 'Equatorial Guinea',
     'ERI': 'Eritrea',
     'EST': 'Estonia',
     'ETH': 'Ethiopia',
     'FLK': 'Falkland Islands (Malvinas)',
     'FRO': 'Faroe Islands',
     'FJI': 'Fiji',
     'FIN': 'Finland',
     'FRA': 'France',
     'GUF': 'French Guiana',
     'PYF': 'French Polynesia',
     'ATF': 'French Southern Territories',
     'GAB': 'Gabon',
     'GMB': 'Gambia',
     'GEO': 'Georgia',
     'DEU': 'Germany',
     'GHA': 'Ghana',
     'GIB': 'Gibraltar',
     'GRC': 'Greece',
     'GRL': 'Greenland',
     'GRD': 'Grenada',
     'GLP': 'Guadeloupe',
     'GUM': 'Guam',
     'GTM': 'Guatemala',
     'GGY': 'Guernsey',
     'GIN': 'Guinea',
     'GNB': 'Guinea-Bissau',
     'GUY': 'Guyana',
     'HTI': 'Haiti',
     'HMD': 'Heard Island and McDonald Islands',
     'VAT': 'Holy See (Vatican City State)',
     'HND': 'Honduras',
     'HKG': 'Hong Kong',
     'HUN': 'Hungary',
     'ISL': 'Iceland',
     'IND': 'India',
     'IDN': 'Indonesia',
     'IRN': 'Iran, Islamic Republic of',
     'IRQ': 'Iraq',
     'IRL': 'Ireland',
     'IMN': 'Isle of Man',
     'ISR': 'Israel',
     'ITA': 'Italy',
     'JAM': 'Jamaica',
     'JPN': 'Japan',
     'JEY': 'Jersey',
     'JOR': 'Jordan',
     'KAZ': 'Kazakhstan',
     'KEN': 'Kenya',
     'KIR': 'Kiribati',
     'PRK': "Korea, Democratic People's Republic of",
     'KOR': 'South Korea',
     'KWT': 'Kuwait',
     'KGZ': 'Kyrgyzstan',
     'LAO': "Lao People's Democratic Republic",
     'LVA': 'Latvia',
     'LBN': 'Lebanon',
     'LSO': 'Lesotho',
     'LBR': 'Liberia',
     'LBY': 'Libya',
     'LIE': 'Liechtenstein',
     'LTU': 'Lithuania',
     'LUX': 'Luxembourg',
     'MAC': 'Macao',
     'MKD': 'Macedonia, the former Yugoslav Republic of',
     'MDG': 'Madagascar',
     'MWI': 'Malawi',
     'MYS': 'Malaysia',
     'MDV': 'Maldives',
     'MLI': 'Mali',
     'MLT': 'Malta',
     'MHL': 'Marshall Islands',
     'MTQ': 'Martinique',
     'MRT': 'Mauritania',
     'MUS': 'Mauritius',
     'MYT': 'Mayotte',
     'MEX': 'Mexico',
     'FSM': 'Micronesia, Federated States of',
     'MDA': 'Moldova, Republic of',
     'MCO': 'Monaco',
     'MNG': 'Mongolia',
     'MNE': 'Montenegro',
     'MSR': 'Montserrat',
     'MAR': 'Morocco',
     'MOZ': 'Mozambique',
     'MMR': 'Burma',
     'NAM': 'Namibia',
     'NRU': 'Nauru',
     'NPL': 'Nepal',
     'NLD': 'Netherlands',
     'ANT': 'Netherlands Antilles',
     'NCL': 'New Caledonia',
     'NZL': 'New Zealand',
     'NIC': 'Nicaragua',
     'NER': 'Niger',
     'NGA': 'Nigeria',
     'NIU': 'Niue',
     'NFK': 'Norfolk Island',
     'MNP': 'Northern Mariana Islands',
     'NOR': 'Norway',
     'OMN': 'Oman',
     'PAK': 'Pakistan',
     'PLW': 'Palau',
     'PSE': 'Palestinian Territory, Occupied',
     'PAN': 'Panama',
     'PNG': 'Papua New Guinea',
     'PRY': 'Paraguay',
     'PER': 'Peru',
     'PHL': 'Philippines',
     'PCN': 'Pitcairn',
     'POL': 'Poland',
     'PRT': 'Portugal',
     'PRI': 'Puerto Rico',
     'QAT': 'Qatar',
     'REU': 'RÃ©union',
     'ROU': 'Romania',
     'RUS': 'Russia',
     'RWA': 'Rwanda',
     'SHN': 'Saint Helena, Ascension and Tristan da Cunha',
     'KNA': 'Saint Kitts and Nevis',
     'LCA': 'Saint Lucia',
     'SPM': 'Saint Pierre and Miquelon',
     'VCT': 'St. Vincent and the Grenadines',
     'WSM': 'Samoa',
     'SMR': 'San Marino',
     'STP': 'Sao Tome and Principe',
     'SAU': 'Saudi Arabia',
     'SEN': 'Senegal',
     'SRB': 'Serbia',
     'SCG': 'Serbia and Montenegro',
     'SYC': 'Seychelles',
     'SLE': 'Sierra Leone',
     'SGP': 'Singapore',
     'SVK': 'Slovakia',
     'SVN': 'Slovenia',
     'SLB': 'Solomon Islands',
     'SOM': 'Somalia',
     'ZAF': 'South Africa',
     'SGS': 'South Georgia and the South Sandwich Islands',
     'ESP': 'Spain',
     'LKA': 'Sri Lanka',
     'SDN': 'Sudan',
     'SUR': 'Suriname',
     'SJM': 'Svalbard and Jan Mayen',
     'SWZ': 'Swaziland',
     'SWE': 'Sweden',
     'CHE': 'Switzerland',
     'SYR': 'Syrian Arab Republic',
     'TWN': 'Taiwan',
     'TJK': 'Tajikistan',
     'TZA': 'Tanzania, United Republic of',
     'THA': 'Thailand',
     'TLS': 'Timor-Leste',
     'TGO': 'Togo',
     'TKL': 'Tokelau',
     'TON': 'Tonga',
     'TTO': 'Trinidad & Tobago',
     'TUN': 'Tunisia',
     'TUR': 'Turkey',
     'TKM': 'Turkmenistan',
     'TCA': 'Turks and Caicos Islands',
     'TUV': 'Tuvalu',
     'UGA': 'Uganda',
     'UKR': 'Ukraine',
     'ARE': 'United Arab Emirates',
     'GBR': 'United Kingdom',
     'USA': 'United States',
     'UMI': 'United States Minor Outlying Islands',
     'URY': 'Uruguay',
     'UZB': 'Uzbekistan',
     'VUT': 'Vanuatu',
     'VEN': 'Venezuela',
     'VNM': 'Vietnam',
     'VGB': 'Virgin Islands, British',
     'VIR': 'Virgin Islands, U.S.',
     'WLF': 'Wallis and Futuna',
     'ESH': 'Western Sahara',
     'YEM': 'Yemen',
     'YUG': 'Yugoslavia',
     'ZMB': 'Zambia',
     'ZWE': 'Zimbabwe'
}

us_state_to_abbrev = {
    'Alabama': 'AL',
    'Alaska': 'AK',
    'Arizona': 'AZ',
    'Arkansas': 'AR',
    'California': 'CA',
    'Colorado': 'CO',
    'Connecticut': 'CT',
    'Delaware': 'DE',
    'District of Columbia': 'DC',
    'Florida': 'FL',
    'Georgia': 'GA',
    'Hawaii': 'HI',
    'Idaho': 'ID',
    'Illinois': 'IL',
    'Indiana': 'IN',
    'Iowa': 'IA',
    'Kansas': 'KS',
    'Kentucky': 'KY',
    'Louisiana': 'LA',
    'Maine': 'ME',
    'Maryland': 'MD',
    'Massachusetts': 'MA',
    'Michigan': 'MI',
    'Minnesota': 'MN',
    'Mississippi': 'MS',
    'Missouri': 'MO',
    'Montana': 'MT',
    'Nebraska': 'NE',
    'Nevada': 'NV',
    'New Hampshire': 'NH',
    'New Jersey': 'NJ',
    'New Mexico': 'NM',
    'New York': 'NY',
    'North Carolina': 'NC',
    'North Dakota': 'ND',
    'Northern Mariana Islands':'MP',
    'Ohio': 'OH',
    'Oklahoma': 'OK',
    'Oregon': 'OR',
    'Palau': 'PW',
    'Pennsylvania': 'PA',
    'Puerto Rico': 'PR',
    'Rhode Island': 'RI',
    'South Carolina': 'SC',
    'South Dakota': 'SD',
    'Tennessee': 'TN',
    'Texas': 'TX',
    'Utah': 'UT',
    'Vermont': 'VT',
    'Virgin Islands': 'VI',
    'Virginia': 'VA',
    'Washington': 'WA',
    'West Virginia': 'WV',
    'Wisconsin': 'WI',
    'Wyoming': 'WY',
}

us_abbrev_to_state = {v: k for k, v in us_state_to_abbrev.items()}

In [None]:
def download(url):
    # Takes the name of the ZIP file that we download from the web.
    # EIA says that the file contained within the ZIP has the same
    # name as the ZIP file itself, but with a .txt extension
    name = url.split('/')[-1].split('.zip')[0] + '.txt'

    headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36 Edg/123.0.0.0'}
    zipped_data = requests.get(url, headers=headers)
    # Reads zip file to memory
    zip_file = ZipFile(io.BytesIO(zipped_data.content))
    # Normalize the JSON by converting newlines into commas, and encapsulating it inside
    # an array. We strip the final newline just in case
    return json.loads('[' + zip_file.read(name).decode('utf-8').replace('\n', ',')[:-1] + ']')

In [None]:
def groupby_country(data):
    countries = {}
    for series in data:
        if 'iso3166' not in series or series['copyright'] != 'None':
            print(f'Skipping {series["description"]}')
            continue
        else:
            iso = series['iso3166'].split('-')

        if not any(iso):
            print(f"Skipping {series['description']}")
            continue

        if len(iso) > 1 and iso[0].startswith('USA'):
            print(f"Skipping {series['description']}")
            continue

        country = iso[0]

        if country not in countries:
            countries[country] = []

        countries[country].append(series)

    return countries

In [None]:
def should_skip(country):
    return country == 'MMU' or len(country) != 3

def filter_and_write(grouped_countries):
    for country in grouped_countries:
        if should_skip(country):
            print(f"Skipping country {country}")
            continue

        for series in grouped_countries[country]:
            ticker = series['series_id']
            unit = series['units']
            unit_short = series['unitsshort']
            desc = series['description']
            iso = series['iso3166'].split('-')
            state = None

            if not any(iso):
                print(f"Skipping {series['description']} because no ISO code was found")
                continue

            country = ISO3186[iso[0]]

            # For whoever maintains this, please have mercy
            # This converts CSV data contained in a list simiar to: [[20190101, 50.01], [20190102, 50.02]]
            # into a CSV string like:
            # 20190101,50.01
            # 20190102,50.02
            # We chose to use list comprehension to vectorize the conversion process
            csv = '\n'.join([','.join([str(j) for j in i]) for i in sorted(series['data'], key=lambda x: x[0]) if i[1] != None])

            with open(f'{destination_dir}/{ticker.lower()}.csv', 'w') as output_file:
                print(f'Writing {destination_dir}/{ticker.lower()}.csv')
                output_file.write(csv)

In [None]:
for url in URLS:
    content = download(url)
    # Filters the data out by checking if we have three fields first: Frequency, Description, Copyright.
    # We then filter out any data that is not of the resolution "Weekly (W)", "Daily (D)", or "Hourly (H)"
    data = [i for i in content if 'f' in i and 'data' in i and 'description' in i and 'copyright' in i and (i['f'] =='W' or i['f'] == 'D' or i['f'] == 'H')]
    # Similar to the groupby linq operator, we key by country, and include all data for the given country in the dictionary
    filter_and_write(groupby_country(data))

    # Try to save some memory usage by reducing the refcount per loop
    del content

In [None]:
!aws s3 sync /temp-output-directory/ s3://cache.quantconnect.com/