# FRED Import work

In [1]:
import pandas as pd
import pandas_datareader as pdr
from fredapi import Fred

In [2]:
fred_api_key = 'd19d462c8ce42c8e0a58db98eb416e0c'

In [5]:
def get_fred_data(param_list, start_date, end_date):
    df = pdr.DataReader(param_list, 'fred', start_date, end_date)
    return df#.reset_index()

def multi_fred(param_list, fred_api_key): # , start_date, end_date
    fred = Fred(api_key = fred_api_key)
    df = pd.DataFrame()
    for item in param_list:
        tdf = pd.DataFrame(fred.get_series(item)).rename(columns={0:param_list[item]})
        df = pd.concat([df, tdf], axis=1)
    return df

In [None]:
series = 'DHHNGSP' # henry hub

df = get_fred_data([series], '2015-01-01', '2022-05-13')

In [4]:
series = { 
    'DHHNGSP':'Henry Hub Natural Gas Spot Price' ,
    'DCOILBRENTEU': 'Crude Oil Prices Brent  Europe',
    'DCOILWTICO': 'Crude Oil Prices West Texas Intermediate (WTI) Cushing Oklahoma', 
    'PNGASJPUSDM': 'Global price of LNG Asia', 
    'DJFUELUSGULF': 'Kerosene Type Jet Fuel Prices US Gulf Coast', 
    'DDFUELUSGULF' : 'Ultra Low Sulfur No 2 Diesel Fuel Prices US Gulf Coast' , 
    'DGASUSGULF':'Conventional Gasoline Prices US Gulf Coast Regular'}

In [6]:
df = multi_fred(series, fred_api_key )

In [7]:
df

Unnamed: 0,Henry Hub Natural Gas Spot Price,Crude Oil Prices Brent Europe,Crude Oil Prices West Texas Intermediate (WTI) Cushing Oklahoma,Global price of LNG Asia,Kerosene Type Jet Fuel Prices US Gulf Coast,Ultra Low Sulfur No 2 Diesel Fuel Prices US Gulf Coast,Conventional Gasoline Prices US Gulf Coast Regular
1986-01-02,,,25.56,,,,
1986-01-03,,,26.00,,,,
1986-01-06,,,26.53,,,,
1986-01-07,,,25.85,,,,
1986-01-08,,,25.87,,,,
...,...,...,...,...,...,...,...
2022-05-04,8.30,110.53,107.84,,4.326,4.344,3.617
2022-05-05,8.42,112.11,108.17,,4.180,4.355,3.596
2022-05-06,8.35,113.86,109.72,,4.155,4.120,3.748
2022-05-09,8.06,106.67,103.08,,3.961,3.901,3.546


# ICIS Direct API

In [289]:
import pandas as pd
import requests
from requests.auth import HTTPBasicAuth
import xml.etree.cElementTree as et

def icis_api(series, uname, passwd, constraints="", options="", structure_export=False, verbose=False):
    """" 
    icis_api utilizes the ICIS RESTful API to query commodity data and return it into a dataframe.
    The data structure of the API returned XML file is faily complexe and can be returned for additional use if needed (off by default)

    INPUT
        series = full URL for the series to be returned
        uname = ICIS configured username to authenticate
        passwd = ICIS configured password. This is passed in plaintext to the API
        constraints = additional constraints to be passed into the API. Should be multi-line text surrounted with tripe quotes (single or double)
        options = additonal options to pass into the API request. max-results is hardcoded to 99999 to ensure all data is returned.

    OUTPUT
        df = data frame with columns [date, low, high]
        struct = (optional) fully parsed xml.etree.cElementTree (et) structure of the API return

    EXAMPLE
    df = icis_api("http://iddn.icis.com/series/petchem/6002007", 
                'your_user_name', 
                'you_password', 
                constraints='''
                    <compare field="c:series-order" op="ge" value="2016-01-01"/>
                    <compare field="c:series-order" op="le" value="2016-05-01"/>
                    '''
            )

    """
    #Define API URL to handle authentication and request handoff
    API_URL = 'https://api.icis.com/v1/search'

    # Assemble scope using standard and user defined components
    scope = """
    <request xmlns="http://iddn.icis.com/ns/search">
        <scope>
            <series>""" + series + """</series>
        </scope>
        <constraints>
            """ + constraints + """
        </constraints>
        <options>
            <max-results>99999</max-results>
            """ + options + """ 
        </options>
    </request>"""

    if verbose: print(scope)

    # Execute API post request
    try:
        response = requests.post(API_URL, # + icis_url_2, 
            auth=HTTPBasicAuth(uname, passwd)
            ,headers = {"Content-Type": "application/xml"}
            ,data = scope
        )
    except requests.exceptions.RequestException as e:  
        raise SystemExit(e)

    # extract content from the reponse using its own encoding
    resp_data = response.content.decode(response.encoding)

    # Generate a tree from the decoded XML contents above
    root = et.fromstring(resp_data)

    ## Parse File
    # Iteratively extract date, low, and high data from the XML tree above

    # Pre define the temporary list which will house the extracted data
    temp_list = []

    for child in root: # Step through each entry (data point) in the tree
        if child.tag == "{http://www.w3.org/2005/Atom}entry":
            for schild in child: # Within each entry access the content (data)
                if schild.tag == '{http://www.w3.org/2005/Atom}content':
                    for tchild in schild: # Within the current Entry Conents append the date, low, and high values to the previous entryes
                        temp_list.append([tchild[8].text, tchild[9].text, tchild[10].text])
    
    # Convert the list into a DataFrame with the correct column names and assign date as the index
    df = pd.DataFrame(temp_list, columns={'low','date','high'}).set_index('date')

    if structure_export:
        return df, root
    else:
        return df

In [293]:
data, struct = icis_api("http://iddn.icis.com/series/petchem/6002007", 
                'ben.anderson@lyondellbasell.com', 
                'MyICISPassword1234', 
                constraints="""
                    <compare field="c:series-order" op="ge" value="2015-01-01"/>
                    """,
                structure_export=True
            )
data.shape

(1912, 2)

In [294]:
struct

<Element '{http://www.w3.org/2005/Atom}feed' at 0x0000015A7538CD18>

In [1]:

# ICIS API Login Credentials
icis_uname = 'ben.anderson@lyondellbasell.com'
icis_passw = 'MyICISPassword1234'

In [123]:
icis_url_1 = 'https://api.icis.com/v1/search'
#icis_url_1 = 'https://api.icis.com/v1/entities/ref-data/currency'
#icis_url_1 = 'https://api.icis.com/v1/entities/ref-data?max-results=20&first-result=11' 
#icis_url_1 = 'https://api.icis.com/v1/entities/ref-data?series=4021285'#&max-results=20'

#icis_url_2 = 'series/petchem/4021285' #'entities/ref-data/currency'

scope = """<request xmlns="http://iddn.icis.com/ns/search">
  <scope>
    <series>http://iddn.icis.com/series/petchem/6002007</series>
  </scope>
  <constraints>
    <compare field="c:series-order" op="ge" value="2016-01-01"/>
    <compare field="c:series-order" op="le" value="2016-05-01"/>
  </constraints>
</request>"""

#scope = """<request xmlns='http://iddn.icis.com/ns/search'>
#  <scope>
#    <type>series</type>
#  </scope>
#</request>"""

In [163]:
response = requests.post(icis_url_1, # + icis_url_2, 
  auth=HTTPBasicAuth(icis_uname, icis_passw)
  ,headers = {"Content-Type": "application/xml"}
  ,data = scope
  )
response.text

txtfile = open("C:/Users/baanders/Desktop/6002007.xml", "w")
n=txtfile.write(response.text)
txtfile.close()

resp_data = response.content.decode(response.encoding)

root = et.fromstring(resp_data)
#pd.read_xml(resp_data)


In [278]:
#Create empty dataframe to iteratively populate with values from xml 
df = pd.DataFrame(columns={'date','low','high'})
temp_list = []

for child in root:
    if child.tag == "{http://www.w3.org/2005/Atom}entry":
        #print(child.tag)
        for schild in child:
            if schild.tag == '{http://www.w3.org/2005/Atom}content':
                for tchild in schild:
                    temp_list.append([tchild[8].text, tchild[9].text, tchild[10].text])

df = pd.DataFrame(temp_list, columns={'low','date','high'}).set_index('date')
df

Unnamed: 0_level_0,low,high
date,Unnamed: 1_level_1,Unnamed: 2_level_1
2016-01-04T00:00:00Z,43.97,44.37
2016-01-05T00:00:00Z,44.91,45.31
2016-01-06T00:00:00Z,43.22,43.62
2016-01-07T00:00:00Z,39.74,40.14
2016-01-08T00:00:00Z,40.47,40.87
...,...,...
2016-04-25T00:00:00Z,42.83,43.23
2016-04-26T00:00:00Z,42.63,43.03
2016-04-27T00:00:00Z,44.41,44.81
2016-04-28T00:00:00Z,43.44,43.84


In [258]:
for child in root:
    if child.tag == "{http://www.w3.org/2005/Atom}entry":
        #print(child.tag)
        for schild in child:
            if schild.tag == '{http://www.w3.org/2005/Atom}content':
                for tchild in schild:
                    #print(tchild[9].text)
                    for fchild in tchild:
                        print(fchild.tag)
                        #if fchild.tag == '{http://iddn.icis.com/ns/fields}assessment-high':
                        #    print('\t', fchild.text)
                        

{http://iddn.icis.com/ns/core}id
{http://iddn.icis.com/ns/core}version
{http://iddn.icis.com/ns/core}type
{http://iddn.icis.com/ns/core}created-on
{http://iddn.icis.com/ns/core}descriptor
{http://iddn.icis.com/ns/core}domain
{http://iddn.icis.com/ns/core}released-on
{http://iddn.icis.com/ns/core}series
{http://iddn.icis.com/ns/core}series-order
{http://iddn.icis.com/ns/fields}assessment-low
{http://iddn.icis.com/ns/fields}assessment-high
{http://iddn.icis.com/ns/fields}mid
{http://iddn.icis.com/ns/fields}assessment-low-delta
{http://iddn.icis.com/ns/fields}assessment-high-delta
{http://iddn.icis.com/ns/core}id
{http://iddn.icis.com/ns/core}version
{http://iddn.icis.com/ns/core}type
{http://iddn.icis.com/ns/core}created-on
{http://iddn.icis.com/ns/core}descriptor
{http://iddn.icis.com/ns/core}domain
{http://iddn.icis.com/ns/core}released-on
{http://iddn.icis.com/ns/core}series
{http://iddn.icis.com/ns/core}series-order
{http://iddn.icis.com/ns/fields}assessment-low
{http://iddn.icis.com

# ICIS Excel

In [2]:
import pandas as pd
import os

In [14]:
icis_path = '../../Data/ICIS Excel/'
icis_file = 'ICIS Naphtha Pricing with Plugin.xlsx'
os.path.isdir(icis_path)


True

In [30]:
pd.read_excel(icis_path+icis_file, usecols='B:Z', sheet_name='Sheet1', skiprows=12, skipfooter=11).melt(id_vars='Date').rename(columns={'variable':'description', 'Date':'date'})

  **kwds,


Unnamed: 0,date,description,value
0,2015-01-02,Naphtha FOB Singapore Assessment Spot Closing ...,49.22
1,2015-01-05,Naphtha FOB Singapore Assessment Spot Closing ...,46.67
2,2015-01-06,Naphtha FOB Singapore Assessment Spot Closing ...,44.78
3,2015-01-07,Naphtha FOB Singapore Assessment Spot Closing ...,42.61
4,2015-01-08,Naphtha FOB Singapore Assessment Spot Closing ...,42.94
...,...,...,...
3761,2022-05-18,Naphtha Reforming FOB US Assessment Barges Spo...,206.25
3762,2022-05-19,Naphtha Reforming FOB US Assessment Barges Spo...,218.50
3763,2022-05-20,Naphtha Reforming FOB US Assessment Barges Spo...,233.25
3764,2022-05-23,Naphtha Reforming FOB US Assessment Barges Spo...,229.00


In [37]:
def icis_excel_import(file_loc, sheet, keep_cols='B:Z', header=12, footer=11):
    # Import file_location and skip defined hearder and footer rows
    df_wide = pd.read_excel(file_loc, usecols=keep_cols, sheet_name=sheet, skiprows=header, skipfooter=footer)#.rename(columns={'Date':'date'})
    # Melt data 
    df_long = df_wide.melt(id_vars='Date').rename(columns={'variable':'description', 'Date':'date'})

    return df_wide, df_long

In [39]:
naphtha_w, naphtha_l = icis_excel_import(icis_path+icis_file, sheet='Sheet1')
naphtha_w.rename(columns={'Date':'date'})

  **kwds,


Unnamed: 0,date,Naphtha FOB Singapore Assessment Spot Closing Value Daily (Mid) : USD/bbl,Naphtha Reforming FOB US Assessment Barges Spot 4 Weeks Closing Value Daily (Mid) : US CTS/US gal
0,2015-01-02,49.22,120.75
1,2015-01-05,46.67,117.00
2,2015-01-06,44.78,116.75
3,2015-01-07,42.61,113.25
4,2015-01-08,42.94,115.50
...,...,...,...
1878,2022-05-18,99.89,206.25
1879,2022-05-19,98.00,218.50
1880,2022-05-20,98.22,233.25
1881,2022-05-23,98.72,229.00


# Weekly parquet to appended single

In [41]:
import pandas as pd
import datetime as dt
import os

In [42]:
comb = pd.DataFrame()

In [49]:
# Define default storage location for files
dataroot = '../../Data/Parquet/SKLearn Data/'
#ifilename = '20220426_weekly_for_modeling'; index_date = '2022-04-26'
#ifilename = '20220513_weekly_for_modeling'; index_date = '2022-05-13'
ifilename = '20220527_weekly_for_modeling'; index_date = '2022-05-27'

# Check if data location above exists. If it does import dataset.
# All datasets imported with name df so that we can generically 

if os.path.isdir(dataroot):
    df = pd.read_parquet(dataroot+ifilename+'.parquet')
    print(ifilename + ' dataset loaded with shape', df.shape, 'and', df.isna().sum().sum(), 'NaN values')
    
else:
    print('Storage location does not exist. Please update directory and try again.')

20220527_weekly_for_modeling dataset loaded with shape (373, 5432) and 0 NaN values


In [50]:
df = pd.concat([df], keys=[index_date], names=['run_date'], axis=1)
df

run_date,2022-05-27,2022-05-27,2022-05-27,2022-05-27,2022-05-27,2022-05-27,2022-05-27,2022-05-27,2022-05-27,2022-05-27,2022-05-27,2022-05-27,2022-05-27,2022-05-27,2022-05-27,2022-05-27,2022-05-27,2022-05-27,2022-05-27,2022-05-27,2022-05-27
Unnamed: 0_level_1,AUD/USD,Benzene CFR Taiwan MAvg (USD/MT)-AVERAGE,Benzene CFR Taiwan MAvg (USD/MT)-CLOSE,Benzene CFR Taiwan MAvg (USD/MT)-HIGH,Benzene CFR Taiwan MAvg (USD/MT)-HIGHLOW2,Benzene CFR Taiwan MAvg (USD/MT)-LOW,Benzene CFR Taiwan Weekly (USD/MT)-AVERAGE,Benzene CFR Taiwan Weekly (USD/MT)-CLOSE,Benzene CFR Taiwan Weekly (USD/MT)-HIGH,Benzene CFR Taiwan Weekly (USD/MT)-HIGHLOW2,...,"Xylenes (mixed)-Spot, Next Month, Low-N/A-Cents per Gallon-Houston, TX-North America_lag_4","Xylenes (mixed)-Spot, Next Month, Low-N/A-Cents per Gallon-Houston, TX-North America_lag_5","Xylenes (mixed)-Spot, Next Month, Low-N/A-Cents per Gallon-Houston, TX-North America_lag_6","Xylenes (mixed)-Spot, Next Month, Low-N/A-Cents per Gallon-Houston, TX-North America_lag_7","Xylenes (mixed)-Spot, Next Month, Low-N/A-Cents per Gallon-Houston, TX-North America_lag_8","Xylenes (mixed)-Spot, Next Month, Low-N/A-Cents per Gallon-Houston, TX-North America_lag_9","Xylenes (mixed)-Spot, Next Month, Low-N/A-Cents per Gallon-Houston, TX-North America_lag_10","Xylenes (mixed)-Spot, Next Month, Low-N/A-Cents per Gallon-Houston, TX-North America_lag_11","Xylenes (mixed)-Spot, Next Month, Low-N/A-Cents per Gallon-Houston, TX-North America_lag_12","Xylenes (mixed)-Spot, Next Month, Low-N/A-Cents per Gallon-Houston, TX-North America_lag_13"
date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
2015-04-12 00:00:00+00:00,0.76796,833.30,0.00,834.20,833.30,832.40,780.000000,780.000000,781.000000,780.000000,...,1953.280,1919.76,1880.80,1815.20,1702.08,1631.44,1571.60,1511.84,1549.44,1580.800
2015-04-19 00:00:00+00:00,0.76798,833.30,0.00,834.20,833.30,832.40,816.600000,816.600000,817.600000,816.600000,...,1796.280,1953.28,1919.76,1880.80,1815.20,1702.08,1631.44,1571.60,1511.84,1549.440
2015-04-26 00:00:00+00:00,0.77692,833.30,0.00,834.20,833.30,832.40,870.100000,870.100000,871.000000,870.100000,...,1687.700,1796.28,1953.28,1919.76,1880.80,1815.20,1702.08,1631.44,1571.60,1511.840
2015-05-03 00:00:00+00:00,0.79242,823.79,0.00,824.66,823.79,822.92,865.100000,865.100000,865.800000,865.100000,...,1815.120,1687.70,1796.28,1953.28,1919.76,1880.80,1815.20,1702.08,1631.44,1571.600
2015-05-10 00:00:00+00:00,0.79226,785.75,0.00,786.50,785.75,785.00,841.600000,841.600000,842.600000,841.600000,...,1795.136,1815.12,1687.70,1796.28,1953.28,1919.76,1880.80,1815.20,1702.08,1631.440
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2022-05-01 00:00:00+00:00,0.73600,1002.57,1002.57,1003.57,1002.57,1001.57,1170.443333,1170.443333,1171.443333,1170.443333,...,2571.760,2638.74,2840.04,2708.02,2507.47,2192.50,1784.40,1978.02,2167.76,1395.804
2022-05-08 00:00:00+00:00,0.73600,1002.57,1002.57,1003.57,1002.57,1001.57,1195.330000,1195.330000,1196.330000,1195.330000,...,2623.800,2571.76,2638.74,2840.04,2708.02,2507.47,2192.50,1784.40,1978.02,2167.760
2022-05-15 00:00:00+00:00,0.73600,1002.57,1002.57,1003.57,1002.57,1001.57,1195.330000,1195.330000,1196.330000,1195.330000,...,2024.080,2623.80,2571.76,2638.74,2840.04,2708.02,2507.47,2192.50,1784.40,1978.020
2022-05-22 00:00:00+00:00,0.73600,1002.57,1002.57,1003.57,1002.57,1001.57,1210.730000,1210.730000,1211.730000,1210.730000,...,1864.440,2024.08,2623.80,2571.76,2638.74,2840.04,2708.02,2507.47,2192.50,1784.400


In [51]:
comb = pd.concat([comb, df], axis=1)
comb

run_date,2022-04-26,2022-04-26,2022-04-26,2022-04-26,2022-04-26,2022-04-26,2022-04-26,2022-04-26,2022-04-26,2022-04-26,...,2022-05-27,2022-05-27,2022-05-27,2022-05-27,2022-05-27,2022-05-27,2022-05-27,2022-05-27,2022-05-27,2022-05-27
Unnamed: 0_level_1,AUD/USD,Benzene CFR Taiwan MAvg (USD/MT)-AVERAGE,Benzene CFR Taiwan MAvg (USD/MT)-CLOSE,Benzene CFR Taiwan MAvg (USD/MT)-HIGH,Benzene CFR Taiwan MAvg (USD/MT)-HIGHLOW2,Benzene CFR Taiwan MAvg (USD/MT)-LOW,Benzene CFR Taiwan Weekly (USD/MT)-AVERAGE,Benzene CFR Taiwan Weekly (USD/MT)-CLOSE,Benzene CFR Taiwan Weekly (USD/MT)-HIGH,Benzene CFR Taiwan Weekly (USD/MT)-HIGHLOW2,...,"Xylenes (mixed)-Spot, Next Month, Low-N/A-Cents per Gallon-Houston, TX-North America_lag_4","Xylenes (mixed)-Spot, Next Month, Low-N/A-Cents per Gallon-Houston, TX-North America_lag_5","Xylenes (mixed)-Spot, Next Month, Low-N/A-Cents per Gallon-Houston, TX-North America_lag_6","Xylenes (mixed)-Spot, Next Month, Low-N/A-Cents per Gallon-Houston, TX-North America_lag_7","Xylenes (mixed)-Spot, Next Month, Low-N/A-Cents per Gallon-Houston, TX-North America_lag_8","Xylenes (mixed)-Spot, Next Month, Low-N/A-Cents per Gallon-Houston, TX-North America_lag_9","Xylenes (mixed)-Spot, Next Month, Low-N/A-Cents per Gallon-Houston, TX-North America_lag_10","Xylenes (mixed)-Spot, Next Month, Low-N/A-Cents per Gallon-Houston, TX-North America_lag_11","Xylenes (mixed)-Spot, Next Month, Low-N/A-Cents per Gallon-Houston, TX-North America_lag_12","Xylenes (mixed)-Spot, Next Month, Low-N/A-Cents per Gallon-Houston, TX-North America_lag_13"
date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
2015-04-12 00:00:00+00:00,0.76796,833.30,0.00,834.20,833.30,832.40,780.0,780.0,781.0,780.0,...,1953.280,1919.76,1880.80,1815.20,1702.08,1631.44,1571.60,1511.84,1549.44,1580.800
2015-04-19 00:00:00+00:00,0.76798,833.30,0.00,834.20,833.30,832.40,816.6,816.6,817.6,816.6,...,1796.280,1953.28,1919.76,1880.80,1815.20,1702.08,1631.44,1571.60,1511.84,1549.440
2015-04-26 00:00:00+00:00,0.77692,833.30,0.00,834.20,833.30,832.40,870.1,870.1,871.0,870.1,...,1687.700,1796.28,1953.28,1919.76,1880.80,1815.20,1702.08,1631.44,1571.60,1511.840
2015-05-03 00:00:00+00:00,0.79242,823.79,0.00,824.66,823.79,822.92,865.1,865.1,865.8,865.1,...,1815.120,1687.70,1796.28,1953.28,1919.76,1880.80,1815.20,1702.08,1631.44,1571.600
2015-05-10 00:00:00+00:00,0.79226,785.75,0.00,786.50,785.75,785.00,841.6,841.6,842.6,841.6,...,1795.136,1815.12,1687.70,1796.28,1953.28,1919.76,1880.80,1815.20,1702.08,1631.440
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2022-05-01 00:00:00+00:00,0.73600,1002.57,1002.57,1003.57,1002.57,1001.57,1158.0,1158.0,1159.0,1158.0,...,2571.760,2638.74,2840.04,2708.02,2507.47,2192.50,1784.40,1978.02,2167.76,1395.804
2022-05-08 00:00:00+00:00,,,,,,,,,,,...,2623.800,2571.76,2638.74,2840.04,2708.02,2507.47,2192.50,1784.40,1978.02,2167.760
2022-05-15 00:00:00+00:00,,,,,,,,,,,...,2024.080,2623.80,2571.76,2638.74,2840.04,2708.02,2507.47,2192.50,1784.40,1978.020
2022-05-22 00:00:00+00:00,,,,,,,,,,,...,1864.440,2024.08,2623.80,2571.76,2638.74,2840.04,2708.02,2507.47,2192.50,1784.400


In [40]:
comb.to_parquet(path=dataroot+'weekly_for_modeling.parquet', engine='pyarrow', compression=None, index=True)

# Datetime format work

In [10]:
from datetime import datetime
datetime.today().strftime('%Y%m%d %H:%M')

'20220609_17'

In [8]:
from datetime import datetime
datetime.now()

datetime.datetime(2022, 6, 9, 17, 55, 58, 665109)

# Correcting Values in RF_xpred_output and RF_param parquet files

In [1]:
import pandas as pd



## '../../Data/Models/RF_lpred_output.parquet'

In [47]:
lpred_loc = '../../Data/Models/RF_lpred_output.parquet'
df = pd.read_parquet(lpred_loc)

In [48]:
df.head(3)

Unnamed: 0,pred_date,value,lag,run_date,model_date
0,20190106,1626.14528,1,20181230,20220426
1,20190106,1617.258805,2,20181230,20220426
2,20190113,1617.258805,2,20181230,20220426


In [49]:
df['model_date'].unique()

array(['20220426', '20220515', '20220527', '20220604', '20220609 20:00',
       '20220609 21:00', '20220614 01:26', '20220614 18:01',
       '20220614 19:26'], dtype=object)

In [22]:
df['model_date'].replace('20220609_20', '20220609 20:00', inplace=True)
df['model_date'].unique()

array(['20220426', '20220515', '20220527', '20220604', '20220609 20:00',
       '20220609 21:00', '20220614 01:26', '20220614 18:01',
       '20220614 19:26'], dtype=object)

In [23]:
df.to_parquet(path=lpred_loc, engine='pyarrow', compression=None, index=True)

## '../../Data/Models/RF_pred_output.parquet'

In [45]:
pred_loc = '../../Data/Models/RF_pred_output.parquet'
df = pd.read_parquet(pred_loc)
df.head(3)

Unnamed: 0,run_date,lag,20190106,20190113,20190120,20190127,20190203,20190210,20190217,20190224,...,20220703,20220710,20220717,20220724,model_date,20220731,20220807,20220814,20220821,20220828
0,20181230,1,1626.14528,,,,,,,,...,,,,,20220426,,,,,
1,20181230,2,1617.258805,1617.258805,,,,,,,...,,,,,20220426,,,,,
2,20181230,3,1495.927518,1511.926887,1513.967181,,,,,,...,,,,,20220426,,,,,


In [46]:
df['model_date'].unique()

array(['20220426', '20220515', '20220527', '20220604', '20220609 20:00',
       '20220609 21:00', '20220614 01:26', '20220614 18:01',
       '20220614 19:26'], dtype=object)

In [26]:
df['model_date'].replace('20220609_20', '20220609 20:00', inplace=True)
df['model_date'].replace('20220609_21', '20220609 21:00', inplace=True)
df['model_date'].unique()

array(['20220426', '20220515', '20220527', '20220604', '20220609 20:00',
       '20220609 21:00', '20220614 01:26', '20220614 18:01',
       '20220614 19:26'], dtype=object)

In [27]:
df.to_parquet(path=pred_loc, engine='pyarrow', compression=None, index=True)

## '../../Data/Models/RF_lpred_output.parquet'

In [50]:
lpred_loc = '../../Data/Models/RF_lpred_output.parquet'
df = pd.read_parquet(lpred_loc)
df.head(3)

Unnamed: 0,pred_date,value,lag,run_date,model_date
0,20190106,1626.14528,1,20181230,20220426
1,20190106,1617.258805,2,20181230,20220426
2,20190113,1617.258805,2,20181230,20220426


In [51]:
df['model_date'].unique()

array(['20220426', '20220515', '20220527', '20220604', '20220609 20:00',
       '20220609 21:00', '20220614 01:26', '20220614 18:01',
       '20220614 19:26'], dtype=object)

## '../../Data/Models/Random_Forest_Models_df.parquet'

In [52]:
pred_loc = '../../Data/Models/Random_Forest_Models_df.parquet'
df = pd.read_parquet(pred_loc)
df.head(3)

run_date,20220427,20220427,20220427,20220427,20220427,20220427,20220427,20220427,20220427,20220427,...,20220614 02:00,20220614 02:00,20220614 02:00,20220614 02:00,20220614 02:00,20220614 02:00,20220614 02:00,20220614 02:00,20220614 02:00,20220614 02:00
Unnamed: 0_level_1,Benzene CFR Taiwan MAvg (USD/MT)-AVERAGE,Benzene CFR Taiwan Weekly (USD/MT)-AVERAGE,Benzene ENEOS Corporation CP Nomination (USD/MT)-AVERAGE,Benzene ENEOS Corporation CP Settlement (USD/MT)-AVERAGE,Benzene FOB Brazil Weekly (USD/MT)-AVERAGE,Benzene FOB Korea Marker (USD/MT)-AVERAGE,Benzene FOB Korea Marker MAvg (USD/MT)-AVERAGE,Benzene FOB Korea Marker WAvg (USD/MT)-AVERAGE,Benzene FOB Korea Paper BalMo (USD/MT)-AVERAGE,Benzene FOB Korea Paper BalMo MAvg (USD/MT)-AVERAGE,...,Naphtha Reforming FOB US Assessment Barges Spot 4 Weeks Closing Value Daily (Mid) : US CTS/US gal_lag_4,Naphtha Reforming FOB US Assessment Barges Spot 4 Weeks Closing Value Daily (Mid) : US CTS/US gal_lag_5,Naphtha Reforming FOB US Assessment Barges Spot 4 Weeks Closing Value Daily (Mid) : US CTS/US gal_lag_6,Naphtha Reforming FOB US Assessment Barges Spot 4 Weeks Closing Value Daily (Mid) : US CTS/US gal_lag_7,Naphtha Reforming FOB US Assessment Barges Spot 4 Weeks Closing Value Daily (Mid) : US CTS/US gal_lag_8,Naphtha Reforming FOB US Assessment Barges Spot 4 Weeks Closing Value Daily (Mid) : US CTS/US gal_lag_9,Naphtha Reforming FOB US Assessment Barges Spot 4 Weeks Closing Value Daily (Mid) : US CTS/US gal_lag_10,Naphtha Reforming FOB US Assessment Barges Spot 4 Weeks Closing Value Daily (Mid) : US CTS/US gal_lag_11,Naphtha Reforming FOB US Assessment Barges Spot 4 Weeks Closing Value Daily (Mid) : US CTS/US gal_lag_12,Naphtha Reforming FOB US Assessment Barges Spot 4 Weeks Closing Value Daily (Mid) : US CTS/US gal_lag_13
date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
2015-04-12 00:00:00+00:00,833.3,780.0,820.0,770.0,731.8,789.7,826.61,762.74,796.6,800.9,...,152.85,157.55,146.083333,155.3,155.45,155.65,141.875,132.1,128.9,120.25
2015-04-19 00:00:00+00:00,833.3,816.6,820.0,770.0,761.6,836.6,826.61,799.08,835.4,800.9,...,151.9,152.85,157.55,146.083333,155.3,155.45,155.65,141.875,132.1,128.9
2015-04-26 00:00:00+00:00,833.3,870.1,820.0,770.0,795.2,864.9,826.61,842.26,843.0,800.9,...,158.35,151.9,152.85,157.55,146.083333,155.3,155.45,155.65,141.875,132.1


In [53]:
df.columns.levels[0]

Index(['20220427', '20220513', '20220603', '20220609 18:00', '20220610 16:00',
       '20220613 16:00', '20220614 02:00'],
      dtype='object', name='run_date')

In [42]:
#df.index.set_levels(df.index.levels[0].str.replace('20220609_18', '20220609 18:00'), level=0)

df.columns.set_levels(df.columns.levels[0].str.replace('20220610_16', '20220610 16:00'), level=0, inplace=True)
df.columns.set_levels(df.columns.levels[0].str.replace('20220613_16', '20220613 16:00'), level=0, inplace=True)
df.columns.set_levels(df.columns.levels[0].str.replace('20220614_02', '20220614 02:00'), level=0, inplace=True)

  This is separate from the ipykernel package so we can avoid doing imports until
  after removing the cwd from sys.path.
  """


In [43]:
df.columns.levels[0]

Index(['20220427', '20220513', '20220603', '20220609 18:00', '20220610 16:00',
       '20220613 16:00', '20220614 02:00'],
      dtype='object', name='run_date')

In [44]:
df.to_parquet(path=pred_loc, engine='pyarrow', compression=None, index=True)

## ../../Data/Models/RF_fit_params_output.parquet

In [54]:
pred_loc = '../../Data/Models/RF_fit_params_output.parquet'
df = pd.read_parquet(pred_loc)
df.head(3)

Unnamed: 0,Feature,1,2,3,4,5,6,7,8,9,10,11,12,13,run_date,model_date
0,Benzene CFR Taiwan MAvg (USD/MT)-AVERAGE,0.0,0.012535,0.02405,0.020991,0.64662,0.638081,0.016718,0.013663,0.011388,0.012154,0.012819,0.002101,0.003372,20181230,20220426
1,Benzene CFR Taiwan Weekly (USD/MT)-AVERAGE,0.003305,0.012177,0.011374,0.009259,0.0,0.0,0.007802,0.005767,0.001858,0.002508,0.000147,0.000183,0.000453,20181230,20220426
2,Benzene ENEOS Corporation CP Nomination (USD/M...,0.0,0.0,9.3e-05,3.8e-05,2.3e-05,0.0,0.000396,0.0,0.000217,0.00049,0.00033,0.000492,0.000141,20181230,20220426


In [56]:
df['model_date'].unique()

array(['20220426', '20220515', '20220527', '20220604', '20220609_20',
       '20220609_21', '20220614 01:26', '20220614 18:01',
       '20220614 19:26'], dtype=object)

In [57]:
df['model_date'].replace('20220609_20', '20220609 20:00', inplace=True)
df['model_date'].replace('20220609_21', '20220609 21:00', inplace=True)
df['model_date'].unique()

array(['20220426', '20220515', '20220527', '20220604', '20220609 20:00',
       '20220609 21:00', '20220614 01:26', '20220614 18:01',
       '20220614 19:26'], dtype=object)

In [None]:
df.to_parquet(path=pred_loc, engine='pyarrow', compression=None, index=True)

# New iteration model calls

In [10]:
from sklearn.ensemble import RandomForestRegressor
#params = {'n_estimators': 100}
#alg = RandomForestRegressor
#alg.set_params(**params)
#alg.get_params(deep=True)
#RandomForestRegressor.get_params(alg)

RandomForestRegressor.get_params(deep=False).items()

TypeError: get_params() missing 1 required positional argument: 'self'

In [17]:
alg=RandomForestRegressor
alg.get_params()

TypeError: get_params() missing 1 required positional argument: 'self'

# KERAS

In [1]:
import pandas as pd

In [7]:
rf_params = {0: 'First string'}
rf_params[1] = 'second string'
rf_params[2] = 'Thid String'

In [9]:
max(rf_params)

2

In [10]:
rf_params[max(rf_params)]

'Thid String'

# Next