In [None]:
%%html
<style>
.output_subarea.output_text.output_stream.output_stdout > pre {
    width:max-content;
}
.p-Widget.jp-RenderedText.jp-OutputArea-output > pre {
   width:max-content;
}
</style>

In [None]:
import pandas as pd
import matplotlib

from pyspark.sql.functions import col, count as spark_count, avg as spark_avg

%matplotlib inline
pd.set_option('display.max_columns', 10000000)
pd.set_option('display.max_rows', 10000000)
pd.set_option('display.width', 10000000)
matplotlib.style.use('ggplot')

------------------------------------------------------------------------------------------------------------------------------------------------------------------

# Iniciamos Sesión con Ophelia

In [None]:
from enquire.vendetta import Ophelia

In [None]:
ophelia = Ophelia()

# Loading CSV daily price Funds file.

In [None]:
def read_portfolio_data(path_file, source, date_col, withSchema=True):
    spark = ophelia.spk
    portfolio_path_file = path_file
    portfolio_data = ophelia.tr.read_file(path_file, source, spark)
    if withSchema is True:
        return ophelia.tr.schema_define(portfolio_data, date_col)
    return portfolio_data

## Change impure schema portfolio input data.

In [None]:
portfolio_df = read_portfolio_data(
    path_file="data/master/ophelia/data/OpheliaData/analytical_base_table/",
    source="parquet",
    date_col="operation_date",
    withSchema=False
)

portfolio_df.limit(5).toPandas()

In [None]:
def join_price_lag(df, on="row_num", how="left"):
    portfolio_indexed = ophelia.df.row_index(df, "operation_date")
    lag_portfolio_df = ophelia.df.lag_min_max_data(df)
    lag_portfolio_indexed =ophelia.df.row_index(lag_portfolio_df, "operation_date_lag")
    join_indexed = portfolio_indexed.join(lag_portfolio_indexed, on=on, how=how)
    return join_indexed

In [None]:
join_price_lag = join_price_lag(portfolio_df)
join_price_lag.limit(5).toPandas()

In [None]:
def price_yield(df, fix_cols):
    float_cols = ophelia.arr.feature_picking(portfolio_df)["float"]
    formula_yield = [((col(c) / col("{0}_lag".format(c)) - 1)).alias("{0}_yield".format(c)) for c in float_cols]
    price_yield = df.select(*fix_cols, *formula_yield).na.fill(0)
    return price_yield

In [None]:
fix_cols = [
    "row_num", 
    "operation_id", 
    "operation_date"
]
portfolio_yield_df = price_yield(df=join_price_lag, fix_cols=fix_cols).orderBy(col("row_num"))
portfolio_yield_df.limit(5).toPandas()

In [None]:
fixed_df = portfolio_yield_df.select(portfolio_yield_df.columns[6:]).toPandas()
fixed_df.plot(figsize=(15, 10))

# Writing Portfolio's Yield dataframe.

In [None]:
price_path = ophelia.tr.write_parquet(
    dataframe=portfolio_yield_df, 
    name_directory="yield_portfolio_price", 
    partition_field="operation_date"
)

In [None]:
def read_yield(parquet_path, source="parquet"):
    spark = ophelia.spk
    yield_df = ophelia.tr.read_file(path_source=parquet_path, source=source, spark_session=spark)
    return yield_df

In [None]:
yield_porfolio_df = read_yield(parquet_path=price_path)
yield_porfolio_df.orderBy(col("operation_date").desc()).limit(5).toPandas()