# Market Data
> Updated NEM data
- toc: true 
- badges: false
- comments: true
- categories: [jupyter]
- image: images/chart-preview.png

In [1]:
#hide_input
from datetime import datetime, date, timedelta
import requests
import re
from tqdm import tqdm
from collections import namedtuple
from io import BytesIO
from zipfile import ZipFile

import pandas as  pd
import numpy as np

import plotly
import plotly.express as px
from plotly.subplots import make_subplots
import plotly.graph_objects as go

from IPython.display import display_html, HTML

import ipywidgets as widgets

In [2]:
#hide_input

# Global variables #
current_date = datetime.date(datetime.now())
yesterday = current_date - timedelta(days=1)
states = ["QLD","NSW","VIC","SA","TAS","ACT","WA","NT"]
capital_cities = ["Brisbane","Sydney","Melbourne","Adelaide","Hobart","Canberra","Perth","Darwin"]

In [3]:
#hide_input
print('Last ran: ', datetime.now().strftime('%Y-%m-%d %H:%M:%S'))

Last ran:  2021-02-04 12:29:18


# NEM Price Data

In [4]:
#hide_input
base_url = "http://www.nemweb.com.au"
section = "Reports/CURRENT"


start_date=datetime.today().strftime("%Y%m%d")
start_date = datetime.strptime(start_date, '%Y%m%d')

end_date='30001225'
end_date = datetime.strptime(end_date, '%Y%m%d')




CurrentDataset = namedtuple("NemwebCurrentFile",
                            ["dataset_name",
                             "nemfile_pattern",
                             "datetime_format",
                             "datetime_column",
                             "tables"])

DATASETS = {
    "pd7day_gpg": CurrentDataset(
        dataset_name="PD7DAY",
        nemfile_pattern="PUBLIC_PD7DAY_GPG_([0-9]{14})_[0-9]{16}.zip",
        datetime_format="%Y%m%d%H%M%S",
        datetime_column="INTERVAL_DATETIME",
        tables=['GPG_PRICESOLUTION'])   
}



dataset = DATASETS['pd7day_gpg']

In [5]:
#hide_input
class ZipFileStreamer(ZipFile):
    """ZipFile subclass, with method to extract ZipFile as byte stream to memory"""

    def __init__(self, filename):
        """Initialises ZipFile object, and adds member_count attribute"""
        ZipFile.__init__(self, filename)
        self.member_count = len(self.filelist)

    def extract_stream(self, member):
        """Extract a member from the archive as a byte stream or string steam, using
        its full name. 'member' may be a filename or a ZipInfo object. """
        return BytesIO(self.read(member))






page = requests.get("{0}/{1}/{2}/".format(base_url,
                                          section,
                                          "PD7DAY"))

regex = re.compile("/{0}/{1}/{2}".format(section,
                                         "PD7DAY",
                                         "PUBLIC_PD7DAY_GPG_([0-9]{14})_[0-9]{16}.zip"))




results_df = []     # initi empty dict to store results
results_table = []



for match in tqdm(regex.finditer(page.text)):
    file_datetime = datetime.strptime(match.group(1), dataset.datetime_format)
    final_match = match
    

# add function for timing code
import pprofile
profiler = pprofile.Profile()
    
''' seperate the for loop so that we only get the most recent file'''
''' only do this for the pd7day file '''
''' TO DO: re-attach this so that the daily reports show how the PD7Day has changed throughout the day '''
with profiler:
    if end_date > file_datetime > start_date:
        ''' unpack the download function here'''
        response = requests.get("{0}{1}".format(base_url, final_match.group(0)))
        zip_bytes = BytesIO(response.content)
        ''' function then jumps to nemfile_reader.nemzip_reader(zip_bytes) '''
        with ZipFileStreamer(zip_bytes) as zipfile:
            if zipfile.member_count == 1:
                filename = zipfile.namelist()[0]    # extracts the name of the csv file we want
                nemfile_object = zipfile.extract_stream(filename)      # io.BytesIO object
        ''' nemfile_object is then passed into nemfile_reader.nemfile_reader(nemfile_object) '''
        table_dict = {}
        table_dict = nemfile_object.readlines()
                        
    

# Use list comprehension to extract price_solution
zz = [x for x in table_dict if b'PRICESOLUTION' in x]


# Convert the above list of bytest (zz) to a pd.DF 
table_dict = {}
for line in zz:
    rows = line.decode().split(',')
    table = "{0}_{1}".format(rows[1], rows[2])
    
    #new table
    if rows[0] == "I":
        table_dict[table] = line

    #append data to each table
    elif rows[0] == "D":
        table_dict[table] += line
        
price_dict = {table:pd.read_csv(BytesIO(table_dict[table]))}    # convert list of bytes to dict
price_frame = price_dict['GPG_PRICESOLUTION']
price_frame['INTERVAL_DATETIME'] = pd.to_datetime(price_frame['INTERVAL_DATETIME'], format='%Y/%m/%d %H:%M:%S')
price_frame['INTERVAL_DATE'] = price_frame['INTERVAL_DATETIME'].dt.date     # extract date only for use in pivot

180it [00:00, 13291.81it/s]


## Averaged Daily Prices
Note that current day and 8 day ahead are partial days, do not use these figures for pricing / modelling  
Click on individual states in chart to add / remove them for closer inspection

In [6]:
#hide_input
''' create a pivot table '''
''' IMPORTANT: should probably remove first and last data aggregations because they are incomplete days '''
price_pivot = pd.pivot_table(price_frame, values='RRP', index=['INTERVAL_DATE', 'REGIONID', 'RUN_DATETIME'], aggfunc=np.mean)

price_pivot = price_pivot.reset_index()
price_pivot = price_pivot.pivot(index='INTERVAL_DATE', columns='REGIONID', values='RRP')
price_pivot = price_pivot.round(2)
price_pivot['RUN_DATETIME'] = price_frame['RUN_DATETIME'][0]
print(price_pivot)

REGIONID        NSW1   QLD1      SA1    TAS1    VIC1         RUN_DATETIME
INTERVAL_DATE                                                            
2021-02-04     39.61  49.92    38.16   35.82   36.55  2021/02/04 07:30:00
2021-02-05     56.36  56.59    -7.37   30.82   29.72  2021/02/04 07:30:00
2021-02-06     43.24  54.70  -297.64  213.48 -318.70  2021/02/04 07:30:00
2021-02-07     30.07  82.02   222.64   40.65    9.39  2021/02/04 07:30:00
2021-02-08     45.02  62.31  -117.84  308.23   10.38  2021/02/04 07:30:00
2021-02-09     42.43  44.08   558.90  114.46   26.62  2021/02/04 07:30:00
2021-02-10     35.17  35.03  2273.05   73.42   30.55  2021/02/04 07:30:00
2021-02-11     30.41  29.83    31.87   34.83   27.23  2021/02/04 07:30:00
