# Assignment 3

## AI usage

## Log



## Links 

- Github: https://github.com/Satheris/IND320_SMAA
- Streamlit app: https://ind320smaa-2eg32uba6uhmrknkwtxzar.streamlit.app/

## Coding 

### Imports 

In [79]:
import pandas as pd
import numpy as np
import plotly.express as px
import pymongo
import streamlit as st
import matplotlib
matplotlib.use('TkAgg')
import matplotlib.pyplot as plt

import openmeteo_requests
import requests_cache
from retry_requests import retry

from scipy.fft import dct, idct
from statsmodels.tsa.seasonal import STL

### Download from openmeteo API

In [19]:
osloLat = 59.9127
osloLong = 10.7461

bergenLat = 60.393
bergenLong = 5.3242

trdLat = 63.4305
trdLong = 10.3951

tromsLat = 69.6489
tromsLong = 18.9551

kristLat = 58.1467
kristLong = 7.9956

citydict = {'priceArea': ['NO1', 'NO2', 'NO3', 'NO4', 'NO5'],
            'city': ['Oslo', 'Kristiansand', 'Bergen', 'Tromsø', 'Trondheim'],
            'longitude': [osloLong, kristLong, bergenLong, tromsLong, trdLong],
            'latitude': [osloLat, kristLat, bergenLat, tromsLat, trdLat]}


df = pd.DataFrame(citydict)

df

Unnamed: 0,priceArea,city,longitude,latitude
0,NO1,Oslo,10.7461,59.9127
1,NO2,Kristiansand,7.9956,58.1467
2,NO3,Bergen,5.3242,60.393
3,NO4,Tromsø,18.9551,69.6489
4,NO5,Trondheim,10.3951,63.4305


In [20]:
# Function for reading openmeteo data

def openmeteo_download(longitude, latitude, year):
    # Setup the Open-Meteo API client with cache and retry on error
    cache_session = requests_cache.CachedSession('.cache', expire_after = 3600)
    retry_session = retry(cache_session, retries = 5, backoff_factor = 0.2)
    openmeteo = openmeteo_requests.Client(session = retry_session)

    # Make sure all required weather variables are listed here
    # The order of variables in hourly or daily is important to assign them correctly below
    url = "https://archive-api.open-meteo.com/v1/archive"
    params = {
        "latitude": latitude,
        "longitude": longitude,
        "start_date": f"{year}-01-01",
        "end_date": f"{year}-12-31",
        "hourly": ["temperature_2m", "precipitation", "wind_speed_10m", "wind_direction_10m", "wind_gusts_10m"],
        "models": "era5",
        "timezone": "Europe/Berlin",
        "wind_speed_unit": "ms",
    }
    responses = openmeteo.weather_api(url, params=params)

    return responses

In [21]:
responses = openmeteo_download(bergenLong, bergenLat, 2019)

# Process first location. Add a for-loop for multiple locations or weather models
response = responses[0]
print(f"Coordinates: {response.Latitude()}°N {response.Longitude()}°E")
print(f"Elevation: {response.Elevation()} m asl")
print(f"Timezone: {response.Timezone()}{response.TimezoneAbbreviation()}")
print(f"Timezone difference to GMT+0: {response.UtcOffsetSeconds()}s")

# Process hourly data. The order of variables needs to be the same as requested.
hourly = response.Hourly()
hourly_temperature_2m = hourly.Variables(0).ValuesAsNumpy()
hourly_wind_direction_10m = hourly.Variables(1).ValuesAsNumpy()
hourly_wind_speed_10m = hourly.Variables(2).ValuesAsNumpy()
hourly_wind_gusts_10m = hourly.Variables(3).ValuesAsNumpy()
hourly_precipitation = hourly.Variables(4).ValuesAsNumpy()

hourly_data = {"date": pd.date_range(
	start = pd.to_datetime(hourly.Time(), unit = "s", utc = True).tz_convert('Europe/Oslo'),
	end =  pd.to_datetime(hourly.TimeEnd(), unit = "s", utc = True).tz_convert('Europe/Oslo'),
	freq = pd.Timedelta(seconds = hourly.Interval()),
	inclusive = "left",
)}

hourly_data["temperature_2m"] = hourly_temperature_2m
hourly_data["wind_direction_10m"] = hourly_wind_direction_10m
hourly_data["wind_speed_10m"] = hourly_wind_speed_10m
hourly_data["wind_gusts_10m"] = hourly_wind_gusts_10m
hourly_data["precipitation"] = hourly_precipitation

df = pd.DataFrame(data = hourly_data)
df.head()

Coordinates: 60.5°N 5.25°E
Elevation: 17.0 m asl
Timezone: b'Europe/Berlin'b'GMT+1'
Timezone difference to GMT+0: 3600s


Unnamed: 0,date,temperature_2m,wind_direction_10m,wind_speed_10m,wind_gusts_10m,precipitation
0,2019-01-01 00:00:00+01:00,6.7,0.4,11.85327,260.776154,22.700001
1,2019-01-01 01:00:00+01:00,6.55,0.5,13.322162,277.765076,24.4
2,2019-01-01 02:00:00+01:00,6.8,0.9,13.505925,296.375275,22.299999
3,2019-01-01 03:00:00+01:00,6.85,0.7,14.621901,310.006195,23.700001
4,2019-01-01 04:00:00+01:00,6.55,0.6,15.487092,314.215271,27.4


### Outliers and anomalies 

In [31]:
dct_trend = dct(df['temperature_2m'])

fig = px.line(dct_trend)

# fig = px.line(df, x='date', y='temperature_2m')

cos_total = pd.Series()
for i, k in enumerate(dct_trend[:40]):
    cos_i = pd.Series(k*np.cos(np.pi*i*df['date']))
    cos_i.combine()

fig.show()

TypeError: cannot perform __rmul__ with this index type: DatetimeArray

In [None]:
# starttime til datetime 

# dt.tz_convert('Europe/Oslo')

# merge(on='startTime')

# response.Hourly

### Seasonal-Trend decomposition using LOESS (STL)

- Perform LOESS on the production data from elhub (downloaded in part 2 of the project) and plot its decomposition.
- Let the electricity price area, production group, period length, seasonal smoother, trend smoother and robust (true/false) be parameters, and give each of them sensible defaults.
- Wrap this in a function that returns the plot, and test the function.

In [36]:
def init_connection():
    return pymongo.MongoClient(st.secrets["mongo"]["uri"])

client = init_connection()

# Send a ping to confirm a successful connection
try:
    client.admin.command('ping')
    print("Pinged your deployment. You successfully connected to MongoDB!")
except Exception as e:
    print(e)

Pinged your deployment. You successfully connected to MongoDB!


In [39]:
# Selecting a database and a collection
database = client['project']
collection = database['data']

try: 
    documents = collection.find({})
    documents = pd.DataFrame(list(documents))
except Exception as e:
    print(e)

In [61]:
df_elhub = documents.sort_values(by=['priceArea', 'productionGroup', 'startTime'])\
                .reset_index().set_index('_id').drop('index', axis=1)
df_elhub.head()

Unnamed: 0_level_0,priceArea,productionGroup,startTime,quantityKwh
_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
68ff61ddf09eaf22c14d6fae,NO1,hydro,2021-01-01T00:00:00+01:00,2507716.75
68ff61def09eaf22c14f86ab,NO1,hydro,2021-01-01T01:00:00+01:00,2494728.0
68ff61ddf09eaf22c14d7ac9,NO1,hydro,2021-01-01T02:00:00+01:00,2486777.5
68ff61def09eaf22c14ed531,NO1,hydro,2021-01-01T03:00:00+01:00,2461176.0
68ff61ddf09eaf22c14ddaec,NO1,hydro,2021-01-01T04:00:00+01:00,2466969.25


In [64]:
area = 'NO1'
prodGroup = 'hydro'

sub_df_elhub = df_elhub[(df_elhub['priceArea'] == area) & (df_elhub['productionGroup'] == prodGroup)]

sub_df_elhub = pd.DataFrame(sub_df_elhub['quantityKwh'], index=sub_df_elhub['startTime'])

In [73]:
periodLength = 7
seasonalSmoother = 7
trendSmoother = None
robust = False

stl = STL(sub_df_elhub, period=periodLength, seasonal=seasonalSmoother, trend=trendSmoother, robust=robust)

res = stl.fit()
fig = res.plot()

In [80]:
fig.show()


FigureCanvasAgg is non-interactive, and thus cannot be shown



### Spectrogram

### Testing for Streamlit app