In [None]:
from functools import reduce
from pyspark.sql import SparkSession, DataFrame
from pyspark.sql.functions import col, lit, avg as spark_avg, stddev as spark_stddev
import pandas as pd

In [None]:
pd.set_option('display.max_columns', 10000000)
pd.set_option('display.max_rows', 10000000)
pd.set_option('display.width', 10000000)

------------------------------------------------------------------------------------------------------------------------------------------------------------------

# Defining Spark Session for pseudo-distributed computing:

In [None]:
spark = SparkSession.builder.appName('Sharpe&Sortino_ratio').getOrCreate()
sc = spark.sparkContext
sc

# Reading persisted Portfolio Yields dataframe:

In [None]:
portfolio_yield_window_path = '/data/core/fince/data/portfolioOptimization/portfolio_yield_window/'
portfolio_yield_df = spark.read.parquet(portfolio_yield_window_path)
portfolio_yield_df.limit(5).toPandas()

In [None]:
dataframes = [portfolio_yield_df.select(lit(fund).alias('fund_name'), col(fund).alias('fund_yield')) for fund in portfolio_yield_df.columns[:-1]]

In [None]:
def unionAll_df(*dfs):
    return reduce(DataFrame.unionAll, dfs)

In [None]:
portfolio_yield_T = unionAll_df(*dataframes).cache()
portfolio_yield_T.show(5)

# Writing Portfolio's Yield Transpose dataframe.

In [None]:
writing_path_mod3 = '/data/core/fince/data/portfolioOptimization/portfolio_yield_transpose/'

print('\nWriting parquets ...')
portfolio_yield_T.repartition(1).write.mode('overwrite').parquet(writing_path_mod3)

%time
print('\nSUCCESS \nPARQUET DATA SAVED!')
print('\nNew root path tabla data:', writing_path_mod3)

# Reading persisted Portfolio Yields Transpose.

In [None]:
portfolio_yield_T_path = '/data/core/fince/data/portfolioOptimization/portfolio_yield_transpose/'
portfolio_yield_T_df = spark.read.parquet(portfolio_yield_T_path)
len(portfolio_yield_df.columns)

In [None]:
TRESHOLD = float(0.0)
CASE = "SCOTIA1"

negative_fund_yield = portfolio_yield_T_df.where(col("fund_yield") < TRESHOLD)
negative_fund_yield.where(col("fund_name") == CASE).show(5)
negative_fund_yield.where(col("fund_name") == CASE)\
                   .describe("fund_yield")\
                   .where((col("summary") == "min")
                        | (col("summary") == "max")
                        | (col("summary") == "stddev")).show()
print("after filtering negative yields we've got following parameters:")
print("{stddev:8.547E-5 , min: -1.872, max: -4.646}")

In [None]:
mean_yield_df = portfolio_yield_T_df.groupBy("fund_name").agg(spark_avg('fund_yield').alias("mean_yield"))
print("mean yield df:")
mean_yield_df.where(col("fund_name") == CASE).show()

stddev_negative_yield_df = negative_fund_yield.groupBy("fund_name").agg(spark_stddev('fund_yield').alias("stddev_negative_yield"))
print("stddev negative yield df:")
stddev_negative_yield_df.where(col("fund_name") == CASE).show()

# Sortino ratio:

## **The Formula for the Sortino Ratio Is:**
## Sortino Ratio = $\frac{ R_p - r_f }{ \sigma_d }$ 
## **Where:**
### *R_p = Actual or expected portfolio return*
### *r_f = Risk-free rate*
### *sigma_d = Standard deviation of the downside*

In [None]:
sortino_df = mean_yield_df.join(stddev_negative_yield_df, on="fund_name", how="left")\
                          .select("fund_name", (col("mean_yield") / col("stddev_negative_yield")).alias("sortino_ratio"))\
                          .na.fill(0.0)
sortino_df.orderBy(col("sortino_ratio")).show(5)
sortino_df.where(col("fund_name") == CASE).show(100)

# Sharpe ratio:

In [None]:
sharpe_df = portfolio_yield_T_df.groupBy("fund_name")\
                                .agg(spark_avg('fund_yield').alias("mean_yield"), spark_stddev('fund_yield').alias("stddev_yield"))\
                                .select("fund_name", (col("mean_yield") / col("stddev_yield")).alias("sharpe_ratio"))
sharpe_df.orderBy(col("sharpe_ratio").desc()).show(5)
sharpe_df.where(col("fund_name") == CASE).show(100)

# Joined Both Ratios:

In [None]:
joined_ratios_df = sortino_df.join(sharpe_df, on="fund_name", how="left")
joined_ratios_df.where(col("fund_name") == CASE).show()

-------------------------------------------------------------------------------------------------------------------------------------------------------------------