In [1]:
import os
import sys
module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)

In [2]:
from io import BytesIO

import warnings
warnings.filterwarnings(action='ignore')

import pandas as pd
import matplotlib.pyplot as plt

from google.cloud import bigquery

from data_collector import Omie, MarginalPriceParams, OfferCurvesParams

In [None]:
filename = "curva_pbc"
date = "20211001"

date = pd.to_datetime(date).strftime("%Y%m%d")
filename_date = f"{filename}_{date}.1"

cols = [
    "hour", "date", "country", "unit", "offer_type", "energy", "price", "status"
]

In [None]:
content = Omie._download_content(family_file=filename, filename=filename_date)

In [None]:
filebytes = BytesIO(content)

In [None]:
df = pd.read_csv(filebytes, delimiter=";", index_col=False, skiprows=3, names=cols, encoding="latin-1")
df.drop(["unit"], axis=1, inplace=True)
df.dropna(inplace=True)

df["price"] = df["price"].str.replace(".", "").str.replace(",", ".").astype(float)
df["energy"] = df["energy"].str.replace(".", "").str.replace(",", ".").astype(float)

In [None]:
df.dropna(axis=0, how="all")

In [None]:
df_1 = df[df["hour"] == 1]

In [None]:
df_offer = df_1[(df_1["offer_type"] == "V") & (df_1["status"] == "O")].sort_values("price")
df_offer["agg_energy"] = df_offer["energy"].cumsum() 

In [None]:
df_demand = df_1[(df_1["offer_type"] == "C") & (df_1["status"] == "O")].sort_values("price", ascending=False)
df_demand["agg_energy"] = df_demand["energy"].cumsum()

In [None]:
fig, ax = plt.subplots(figsize=(10, 7))
df_offer.plot(x="agg_energy", y="price", ax=ax, label="ask")
df_demand.plot(x="agg_energy", y="price", ax=ax, label="bid")
ax.set_xlim(11000, 24000)
ax.set_ylim(-10, 300)

In [None]:
df_units = pd.read_csv("curva_pbc_uof_20211001.1", delimiter=";", index_col=False, skiprows=3, encoding="latin-1",
                      names=cols)
#df_units.dropna(inplace=True)

df_units["price"] = df_units["price"].str.replace(".", "").str.replace(",", ".").astype(float)
df_units["energy"] = df_units["energy"].str.replace(".", "").str.replace(",", ".").astype(float)

In [None]:
df_units

In [None]:
df_units_offer = df_units[
    (df_units["hour"] == 1) & (df_units["offer_type"] == "V") & (df_units["status"] == "O")
].sort_values("price")

In [None]:
df_offer

In [None]:
df_units_offer.head(50)

In [None]:
filename = "curva_pbc_uof"
year = 2016
month = 1
year_month = f"{year}{month:02d}"

date_file_pattern = f"{filename}_{year_month}"
filename_zip = date_file_pattern + ".zip"

In [None]:
content = Omie._download_content(family_file=filename, filename=filename_zip)

In [None]:
type(content)

In [None]:
unzip_file = Omie._decompress_zip(zip_content=content)

In [None]:
d = pd.to_datetime(f'{year}-{month}')
dates = pd.date_range(start=d.strftime("%Y-%m-%d"), periods=d.daysinmonth, freq="D")  

df_units_list = []
for d in dates:
    file_pattern = Omie.date_file_pattern.format(filename=filename, date_str=d.strftime("%Y%m%d"))
    file_list = [s for s in unzip_file.namelist() if file_pattern in s]
    
    if len(file_list) > 1:
        print(f"There are multiple files for {file_pattern}: {len(file_list)}")
    
    file = file_list[-1]

    filebytes = unzip_file.open(name=file, mode="r")
    
    df = pd.read_csv(filepath_or_buffer=filebytes, 
                     delimiter=";", index_col=False, skiprows=3, encoding="latin-1",
                     names=cols)
    df = Omie._parse_floats(df=df, col_name="price")
    df = Omie._parse_floats(df=df, col_name="energy")
    df.dropna(inplace=True)
    
    df_units_list.append(df)

In [None]:
df_curves_month = pd.concat(df_units_list, ignore_index=True)

In [None]:
df_curves_month

In [None]:
pd.to_datetime(df_curves_month["date"], format="%d/%m/%Y")

In [None]:
cc_unit_offers = df_curves_month[(df_curves_month["unit"] == "SBO3") & 
                (df_curves_month["status"] == "O")].sort_values(["date", "hour"])

In [None]:
cc_unit_offers["total_enegy"] = cc_unit_offers.groupby(['date', "hour"])['energy'].transform('sum')
cc_unit_offers["weighted_price"] = (cc_unit_offers["price"] * cc_unit_offers["energy"]) /\
                                    cc_unit_offers["total_enegy"]

In [None]:
cc_unit_offers

In [None]:
cc_unit_offers.groupby(["date", "hour"])["weighted_price"].sum().plot(figsize=(20, 7))

In [None]:
cc_unit_offers.groupby(["date", "hour"])["weighted_price"].sum().head(50)

In [None]:
cc_unit_offers.head(50)

In [None]:
df = Omie.download_year_file(omie_parameter=MarginalPriceParams, year=2020)

In [None]:
Omie.download_year_file(omie_parameter=OfferCurvesParams, year=2020)

In [None]:
df_2["status"].value_counts()

In [None]:
df_2 = df_2[df_2["status"] == OfferCurvesParams.OfferStatus.offered]

In [None]:
df = pd.read_csv("test_cves_2020.csv", nrows=200)

In [None]:
df.head()

In [None]:
job_config = bigquery.job.LoadJobConfig()
job_config.autodetect = True
job_config.create_disposition = bigquery.job.CreateDisposition.CREATE_IF_NEEDED
job_config.write_disposition = bigquery.job.WriteDisposition.WRITE_APPEND

for year in range(2016, 2020):
    print("="*15 +  f" {year} " + "="*15)
    Omie.upload_year_file_gcp(omie_parameter=OfferCurvesParams, year=2020, job_config=job_config)

In [None]:
job_config = bigquery.job.LoadJobConfig()
job_config.autodetect = True
job_config.create_disposition = bigquery.job.CreateDisposition.CREATE_IF_NEEDED
job_config.write_disposition = bigquery.job.WriteDisposition.WRITE_APPEND
dates = pd.date_range(start="2021-01-04", end="2022-02-18", freq="D")

for date in dates:
    print(date)
    Omie.upload_date_file_gcp(omie_parameter=OfferCurvesParams, 
                              date=date.strftime("%Y-%m-%d"), 
                              job_config=job_config)

In [None]:
df = Omie.download_period_file(omie_parameter=MarginalPriceParams, start_year=2016, end_year=2018)

In [None]:
df

In [4]:
job_config = bigquery.job.LoadJobConfig()
job_config.autodetect = True
job_config.create_disposition = bigquery.job.CreateDisposition.CREATE_IF_NEEDED
job_config.write_disposition = bigquery.job.WriteDisposition.WRITE_APPEND
Omie.upload_bq_year_file(omie_parameter=OfferCurvesParams, year=2016, job_config=job_config)

2022-02-18 09:35:53,227|root|INFO|Uploading batch until date 2016-04-10 to BigQuery ...
2022-02-18 09:36:32,578|root|INFO|Uploading batch until date 2016-07-19 to BigQuery ...
2022-02-18 09:37:07,675|root|INFO|Uploading batch until date 2016-10-27 to BigQuery ...
2022-02-18 09:37:38,549|root|INFO|Uploading batch until date 2016-12-31 to BigQuery ...
