In [1]:
#| echo: false

import os
from pathlib import Path
from zipfile import ZipFile

import pandas as pd
import plotly.express as px
from dotenv import load_dotenv

In [2]:
#| echo: false

load_env = load_dotenv()

In [3]:
#  os.environ["OPEN_WEATHER_KEY"]

In [4]:
#| echo: false

# from itables import init_notebook_mode  # displays dataframes in friendly manner

# init_notebook_mode(all_interactive=True)

In [5]:
#| echo: false

# !mv ~/Downloads/MyUsageData_30-04-2024.zip ../data/electricity

In [6]:
#| echo: false

DATA_DIR = Path.cwd().parent / "data" / "electricity"

In [7]:
#| echo: false

DATA_ZIP = DATA_DIR / "MyUsageData_30-04-2024.zip"

In [8]:
#| echo: false

# !ls ../data/electricity/

In [9]:
#| echo: false

# assert DATA_ZIP.exists()

In [10]:
#| echo: false

class ZipFileManager:
    def __init__(self, file_path, extract_to_dir="."):
        self.file_path = Path(file_path)
        self.extract_to_dir = Path(extract_to_dir)

    def unzip_archive(self):
        self.extract_to_dir.mkdir(parents=True, exist_ok=True)
        with ZipFile(self.file_path, "r") as zip_ref:
            zip_ref.extractall(self.extract_to_dir)
        #print("Unzipping completed.")

    def list_contents(self):
        with ZipFile(self.file_path, "r") as zip_ref:
            return zip_ref.namelist()

In [11]:
#| echo: false

zip = ZipFileManager(DATA_ZIP, DATA_DIR)
zip.unzip_archive()

# list(zip.list_contents())

In [12]:
#| echo: false

DATA_CSV = Path(str(DATA_ZIP).replace(".zip", ".csv"))

In [13]:
#| echo: false

# DATA_CSV.exists()

In [14]:
#| echo: false

timestampformat = "%d/%m/%Y %I:%M:%S %p"

In [15]:
#| echo: false

df_pd = pd.read_csv(str(DATA_CSV), parse_dates=["StartDate", "EndDate"], date_format=timestampformat)

In [16]:
#| echo: false

def aggregate_profile_read(df, frequency):
    """
    Aggregates the ProfileReadValue column by sum based on the specified frequency.
    
    Parameters:
    - df: DataFrame containing the data.
    - frequency: String specifying the frequency for aggregation. 
                 'D' for daily, 'W' for weekly, and 'M' for monthly.
                 
    Returns:
    - DataFrame with the StartDate as the index and the aggregated sums of ProfileReadValue.
    """
    # Ensure the StartDate column is the index and in datetime format
    if df.index.name != 'StartDate':
        df = df.set_index('StartDate')
    df.index = pd.to_datetime(df.index)
    
    # Resample and aggregate
    aggregated_df = df.resample(frequency).agg({'ProfileReadValue': 'sum'})

    return aggregated_df

### Half-hourly usage

In [17]:
#| echo: false

fig = px.line(df_pd, x="StartDate", y="ProfileReadValue", title="Electricity usage: half-hourly")
fig.show()

### Hourly usage

In [18]:
#| echo: false

hourly_aggregated = aggregate_profile_read(df_pd, 'h')

In [19]:
#| echo: false

fig = px.line(hourly_aggregated, x=hourly_aggregated.index, y="ProfileReadValue", title="Hourly electricity usage")
fig.show()

### Daily usage

In [20]:
#| echo: false

daily_aggregated = aggregate_profile_read(df_pd, 'D')


In [21]:
#| echo: false

fig = px.line(daily_aggregated, x=daily_aggregated.index, y="ProfileReadValue", title="Daily electricity usage")
fig.show()

### Weekly usage

In [22]:
#| echo: false

weekly_aggregated = aggregate_profile_read(df_pd, 'W')

In [23]:
#| echo: false

fig = px.line(weekly_aggregated, x=weekly_aggregated.index, y="ProfileReadValue", title="Weekly electricity usage")
fig.show()

### Monthly usage

In [24]:
#| echo: false

from pprint import pprint

monthly_aggregated = aggregate_profile_read(df_pd, 'ME')
pprint(monthly_aggregated.head())

            ProfileReadValue
StartDate                   
2023-12-31           218.084
2024-01-31          1198.359
2024-02-29          1024.242
2024-03-31           840.665
2024-04-30           357.591


### Looking at overlaying weather data - couldn't get free data so far.

Maybe scrape from BoM?

In [27]:
import requests

def get_daily_max_temperature(api_key, lat, lon):
    """
    Fetches the daily maximum temperature for a given location using OpenWeatherMap One Call API 3.0.
    
    Parameters:
    - api_key: Your OpenWeatherMap API key as a string.
    - lat: Latitude of the location as a float.
    - lon: Longitude of the location as a float.
    
    Returns:
    - A dictionary containing the date and maximum temperature for the next 7 days.
    """
    url = "https://api.openweathermap.org/data/3.0/onecall"
    params = {
        "lat": lat,
        "lon": lon,
        "exclude": "current,minutely,hourly,alerts",
        "units": "metric",  # or 'imperial' for Fahrenheit
        "appid": api_key
    }
    
    response = requests.get(url, params=params)
    data = response.json()
    
    # daily_forecasts = []
    # for day in data['daily']:
    #     # Convert timestamp to readable date format if necessary
    #     date = pd.to_datetime(day['dt'], unit='s').date()
    #     max_temp = day['temp']['max']
    #     daily_forecasts.append({'date': date, 'max_temp': max_temp})
    
    return data

In [28]:

# Coordinates for Drummoyne, NSW
lat = -33.8556
lon = 151.1535

daily_max_temps = get_daily_max_temperature(os.environ["OPEN_WEATHER_KEY"], lat, lon)

print(daily_max_temps)

#for forecast in daily_max_temps:
#    print(f"Date: {forecast['date']}, Max Temp: {forecast['max_temp']}°C")

{'cod': 401, 'message': 'Please note that using One Call 3.0 requires a separate subscription to the One Call by Call plan. Learn more here https://openweathermap.org/price. If you have a valid subscription to the One Call by Call plan, but still receive this error, then please see https://openweathermap.org/faq#error401 for more info.'}
