In [1]:
import os
import sys
module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)

In [2]:
from io import BytesIO

import warnings
warnings.filterwarnings(action='ignore')

import pandas as pd
import matplotlib.pyplot as plt

from google.cloud import bigquery

from data_collector import Omie, MarginalPriceParams, OfferCurvesParams

In [None]:
filename = "curva_pbc"
date = "20211001"

date = pd.to_datetime(date).strftime("%Y%m%d")
filename_date = f"{filename}_{date}.1"

cols = [
    "hour", "date", "country", "unit", "offer_type", "energy", "price", "status"
]

In [None]:
content = Omie._download_content(family_file=filename, filename=filename_date)

In [None]:
filebytes = BytesIO(content)

In [None]:
df = pd.read_csv(filebytes, delimiter=";", index_col=False, skiprows=3, names=cols, encoding="latin-1")
df.drop(["unit"], axis=1, inplace=True)
df.dropna(inplace=True)

df["price"] = df["price"].str.replace(".", "").str.replace(",", ".").astype(float)
df["energy"] = df["energy"].str.replace(".", "").str.replace(",", ".").astype(float)

In [None]:
df.dropna(axis=0, how="all")

In [None]:
df_1 = df[df["hour"] == 1]

In [None]:
df_offer = df_1[(df_1["offer_type"] == "V") & (df_1["status"] == "O")].sort_values("price")
df_offer["agg_energy"] = df_offer["energy"].cumsum() 

In [None]:
df_demand = df_1[(df_1["offer_type"] == "C") & (df_1["status"] == "O")].sort_values("price", ascending=False)
df_demand["agg_energy"] = df_demand["energy"].cumsum()

In [None]:
fig, ax = plt.subplots(figsize=(10, 7))
df_offer.plot(x="agg_energy", y="price", ax=ax, label="ask")
df_demand.plot(x="agg_energy", y="price", ax=ax, label="bid")
ax.set_xlim(11000, 24000)
ax.set_ylim(-10, 300)

In [None]:
df_units = pd.read_csv("curva_pbc_uof_20211001.1", delimiter=";", index_col=False, skiprows=3, encoding="latin-1",
                      names=cols)
#df_units.dropna(inplace=True)

df_units["price"] = df_units["price"].str.replace(".", "").str.replace(",", ".").astype(float)
df_units["energy"] = df_units["energy"].str.replace(".", "").str.replace(",", ".").astype(float)

In [None]:
df_units

In [None]:
df_units_offer = df_units[
    (df_units["hour"] == 1) & (df_units["offer_type"] == "V") & (df_units["status"] == "O")
].sort_values("price")

In [None]:
df_offer

In [None]:
df_units_offer.head(50)

In [None]:
filename = "curva_pbc_uof"
year = 2016
month = 1
year_month = f"{year}{month:02d}"

date_file_pattern = f"{filename}_{year_month}"
filename_zip = date_file_pattern + ".zip"

In [None]:
content = Omie._download_content(family_file=filename, filename=filename_zip)

In [None]:
type(content)

In [None]:
unzip_file = Omie._decompress_zip(zip_content=content)

In [None]:
d = pd.to_datetime(f'{year}-{month}')
dates = pd.date_range(start=d.strftime("%Y-%m-%d"), periods=d.daysinmonth, freq="D")  

df_units_list = []
for d in dates:
    file_pattern = Omie.date_file_pattern.format(filename=filename, date_str=d.strftime("%Y%m%d"))
    file_list = [s for s in unzip_file.namelist() if file_pattern in s]
    
    if len(file_list) > 1:
        print(f"There are multiple files for {file_pattern}: {len(file_list)}")
    
    file = file_list[-1]

    filebytes = unzip_file.open(name=file, mode="r")
    
    df = pd.read_csv(filepath_or_buffer=filebytes, 
                     delimiter=";", index_col=False, skiprows=3, encoding="latin-1",
                     names=cols)
    df = Omie._parse_floats(df=df, col_name="price")
    df = Omie._parse_floats(df=df, col_name="energy")
    df.dropna(inplace=True)
    
    df_units_list.append(df)

In [None]:
df_curves_month = pd.concat(df_units_list, ignore_index=True)

In [None]:
df_curves_month

In [None]:
pd.to_datetime(df_curves_month["date"], format="%d/%m/%Y")

In [None]:
cc_unit_offers = df_curves_month[(df_curves_month["unit"] == "SBO3") & 
                (df_curves_month["status"] == "O")].sort_values(["date", "hour"])

In [None]:
cc_unit_offers["total_enegy"] = cc_unit_offers.groupby(['date', "hour"])['energy'].transform('sum')
cc_unit_offers["weighted_price"] = (cc_unit_offers["price"] * cc_unit_offers["energy"]) /\
                                    cc_unit_offers["total_enegy"]

In [None]:
cc_unit_offers

In [None]:
cc_unit_offers.groupby(["date", "hour"])["weighted_price"].sum().plot(figsize=(20, 7))

In [None]:
cc_unit_offers.groupby(["date", "hour"])["weighted_price"].sum().head(50)

In [None]:
cc_unit_offers.head(50)

In [None]:
df = Omie.download_year_file(omie_parameter=MarginalPriceParams, year=2020)

In [None]:
Omie.download_year_file(omie_parameter=OfferCurvesParams, year=2020)

In [None]:
df_2["status"].value_counts()

In [None]:
df_2 = df_2[df_2["status"] == OfferCurvesParams.OfferStatus.offered]

In [None]:
df = pd.read_csv("test_cves_2020.csv", nrows=200)

In [None]:
df.head()

In [8]:
job_config = bigquery.job.LoadJobConfig()
job_config.autodetect = True
job_config.create_disposition = bigquery.job.CreateDisposition.CREATE_IF_NEEDED
job_config.write_disposition = bigquery.job.WriteDisposition.WRITE_APPEND

for year in range(2016, 2020):
    print("="*15 +  f" {year} " + "="*15)
    Omie.upload_year_file_gcp(omie_parameter=OfferCurvesParams, year=2020, job_config=job_config)



2022-02-17 21:18:07,548|root|INFO|Uploading batch 30 to BigQuery ...
2022-02-17 21:18:21,888|root|INFO|Uploading batch 60 to BigQuery ...
2022-02-17 21:18:45,150|root|INFO|Uploading batch 90 to BigQuery ...
2022-02-17 21:19:09,331|root|INFO|Uploading batch 120 to BigQuery ...
2022-02-17 21:19:27,685|root|INFO|Uploading batch 150 to BigQuery ...
2022-02-17 21:19:50,227|root|INFO|Uploading batch 180 to BigQuery ...
2022-02-17 21:20:05,767|root|INFO|Uploading batch 210 to BigQuery ...
2022-02-17 21:20:19,303|root|INFO|Uploading batch 240 to BigQuery ...
2022-02-17 21:20:33,433|root|INFO|Uploading batch 270 to BigQuery ...
2022-02-17 21:20:46,790|root|INFO|Uploading batch 300 to BigQuery ...
2022-02-17 21:21:03,217|root|INFO|Uploading batch 330 to BigQuery ...
2022-02-17 21:21:17,477|root|INFO|Uploading batch 360 to BigQuery ...
2022-02-17 21:21:24,400|root|INFO|Uploading batch 365 to BigQuery ...




2022-02-17 21:23:28,309|root|INFO|Uploading batch 30 to BigQuery ...
2022-02-17 21:23:41,253|root|INFO|Uploading batch 60 to BigQuery ...
2022-02-17 21:23:54,305|root|INFO|Uploading batch 90 to BigQuery ...
2022-02-17 21:24:08,496|root|INFO|Uploading batch 120 to BigQuery ...
2022-02-17 21:24:20,195|root|INFO|Uploading batch 150 to BigQuery ...
2022-02-17 21:24:32,614|root|INFO|Uploading batch 180 to BigQuery ...
2022-02-17 21:24:45,573|root|INFO|Uploading batch 210 to BigQuery ...
2022-02-17 21:24:58,536|root|INFO|Uploading batch 240 to BigQuery ...
2022-02-17 21:25:12,030|root|INFO|Uploading batch 270 to BigQuery ...
2022-02-17 21:25:25,859|root|INFO|Uploading batch 300 to BigQuery ...
2022-02-17 21:25:39,828|root|INFO|Uploading batch 330 to BigQuery ...
2022-02-17 21:25:53,945|root|INFO|Uploading batch 360 to BigQuery ...
2022-02-17 21:26:00,142|root|INFO|Uploading batch 365 to BigQuery ...




2022-02-17 21:29:49,442|root|INFO|Uploading batch 30 to BigQuery ...
2022-02-17 21:30:00,705|root|INFO|Uploading batch 60 to BigQuery ...
2022-02-17 21:30:12,540|root|INFO|Uploading batch 90 to BigQuery ...
2022-02-17 21:30:24,400|root|INFO|Uploading batch 120 to BigQuery ...
2022-02-17 21:30:37,034|root|INFO|Uploading batch 150 to BigQuery ...
2022-02-17 21:30:49,451|root|INFO|Uploading batch 180 to BigQuery ...
2022-02-17 21:31:04,009|root|INFO|Uploading batch 210 to BigQuery ...
2022-02-17 21:31:20,660|root|INFO|Uploading batch 240 to BigQuery ...
2022-02-17 21:31:35,494|root|INFO|Uploading batch 270 to BigQuery ...
2022-02-17 21:31:50,484|root|INFO|Uploading batch 300 to BigQuery ...
2022-02-17 21:32:05,502|root|INFO|Uploading batch 330 to BigQuery ...
2022-02-17 21:32:36,121|root|INFO|Uploading batch 360 to BigQuery ...
2022-02-17 21:32:47,146|root|INFO|Uploading batch 365 to BigQuery ...




2022-02-17 21:34:24,433|root|INFO|Uploading batch 30 to BigQuery ...
2022-02-17 21:34:40,411|root|INFO|Uploading batch 60 to BigQuery ...
2022-02-17 21:34:54,326|root|INFO|Uploading batch 90 to BigQuery ...
2022-02-17 21:35:09,852|root|INFO|Uploading batch 120 to BigQuery ...
2022-02-17 21:35:28,629|root|INFO|Uploading batch 150 to BigQuery ...
2022-02-17 21:35:43,927|root|INFO|Uploading batch 180 to BigQuery ...
2022-02-17 21:35:58,314|root|INFO|Uploading batch 210 to BigQuery ...
2022-02-17 21:36:11,892|root|INFO|Uploading batch 240 to BigQuery ...
2022-02-17 21:36:26,077|root|INFO|Uploading batch 270 to BigQuery ...
2022-02-17 21:36:41,449|root|INFO|Uploading batch 300 to BigQuery ...
2022-02-17 21:36:56,185|root|INFO|Uploading batch 330 to BigQuery ...
2022-02-17 21:37:09,636|root|INFO|Uploading batch 360 to BigQuery ...
2022-02-17 21:37:16,751|root|INFO|Uploading batch 365 to BigQuery ...


In [15]:
job_config = bigquery.job.LoadJobConfig()
job_config.autodetect = True
job_config.create_disposition = bigquery.job.CreateDisposition.CREATE_IF_NEEDED
job_config.write_disposition = bigquery.job.WriteDisposition.WRITE_APPEND
dates = pd.date_range(start="2021-01-04", end="2022-02-18", freq="D")

for date in dates:
    print(date)
    Omie.upload_date_file_gcp(omie_parameter=OfferCurvesParams, 
                              date=date.strftime("%Y-%m-%d"), 
                              job_config=job_config)

2021-01-04 00:00:00
2021-01-05 00:00:00
2021-01-06 00:00:00
2021-01-07 00:00:00
2021-01-08 00:00:00
2021-01-09 00:00:00
2021-01-10 00:00:00
2021-01-11 00:00:00
2021-01-12 00:00:00
2021-01-13 00:00:00
2021-01-14 00:00:00
2021-01-15 00:00:00
2021-01-16 00:00:00
2021-01-17 00:00:00
2021-01-18 00:00:00
2021-01-19 00:00:00
2021-01-20 00:00:00
2021-01-21 00:00:00
2021-01-22 00:00:00
2021-01-23 00:00:00
2021-01-24 00:00:00
2021-01-25 00:00:00
2021-01-26 00:00:00
2021-01-27 00:00:00
2021-01-28 00:00:00
2021-01-29 00:00:00
2021-01-30 00:00:00
2021-01-31 00:00:00
2021-02-01 00:00:00
2021-02-02 00:00:00
2021-02-03 00:00:00
2021-02-04 00:00:00
2021-02-05 00:00:00
2021-02-06 00:00:00
2021-02-07 00:00:00
2021-02-08 00:00:00
2021-02-09 00:00:00
2021-02-10 00:00:00
2021-02-11 00:00:00
2021-02-12 00:00:00
2021-02-13 00:00:00
2021-02-14 00:00:00
2021-02-15 00:00:00
2021-02-16 00:00:00
2021-02-17 00:00:00
2021-02-18 00:00:00
2021-02-19 00:00:00
2021-02-20 00:00:00
2021-02-21 00:00:00
2021-02-22 00:00:00


2022-02-18 00:00:00
