In [1]:
import os
import sys
module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)

In [2]:
from io import BytesIO

import warnings
warnings.filterwarnings(action='ignore')

import pandas as pd
import matplotlib.pyplot as plt

from joblib import Parallel, delayed

from google.cloud import bigquery

from data_collector import BQManager, GCP, Omie, MarginalPriceParams, OfferCurvesParams, OfferCurvesUnitsParams

## Upload tables to BigQuery

In [None]:
job_config = bigquery.job.LoadJobConfig()
job_config.autodetect = True
job_config.create_disposition = bigquery.job.CreateDisposition.CREATE_IF_NEEDED
job_config.write_disposition = bigquery.job.WriteDisposition.WRITE_APPEND

years = range(2016, 2022)

for year in years:
    print("="*15  + f" {year} " + "="*15)
    Omie.upload_bq_year_file(omie_parameter=OfferCurvesUnitsParams, year=year, job_config=job_config)

In [None]:
job_config = bigquery.job.LoadJobConfig()
job_config.autodetect = True
job_config.create_disposition = bigquery.job.CreateDisposition.CREATE_IF_NEEDED
job_config.write_disposition = bigquery.job.WriteDisposition.WRITE_APPEND


dates = pd.date_range(start="2021-01-01", end="2022-02-19", freq="D")
_ = Parallel(n_jobs=-1)(
    delayed(Omie.upload_bq_date_file)(omie_parameter=OfferCurvesParams, date=date, job_config=job_config)
    for date in dates
)

## Generate hourly tables

In [3]:
client = bigquery.Client(GCP.PROJECT_ID)
bq_manager = BQManager(client=client)

In [None]:
bq_manager.generate_hourly_offer_curve_tables(curve_parameter=OfferCurvesUnitsParams, 
                                             offer_status=OfferCurvesUnitsParams.OfferStatus.offered)

## Download table

In [4]:
df = bq_manager.download_query_to_df(
    query=f"SELECT * FROM {GCP.BigQuery.Omie.DATASET_ID}.{GCP.BigQuery.Omie.OFFER_CURVES_UNITS_TABLE_ID}_{2}"
)

Downloading:   0%|          | 0/3475532 [00:00<?, ?rows/s]

In [16]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3475532 entries, 0 to 3475531
Data columns (total 8 columns):
 #   Column      Dtype              
---  ------      -----              
 0   hour        int64              
 1   date        datetime64[ns, UTC]
 2   country     object             
 3   unit        object             
 4   offer_type  object             
 5   energy      float64            
 6   price       float64            
 7   status      object             
dtypes: datetime64[ns, UTC](1), float64(2), int64(1), object(4)
memory usage: 212.1+ MB
