In [12]:
from functools import reduce
from pyspark.sql import SparkSession, DataFrame
from pyspark.sql.functions import col, lit, avg as spark_avg, stddev as spark_stddev

In [None]:
pd.set_option('display.max_columns', 10000000)
pd.set_option('display.max_rows', 10000000)
pd.set_option('display.width', 10000000)

------------------------------------------------------------------------------------------------------------------------------------------------------------------

# Defining Spark Session for pseudo-distributed computing:

In [2]:
spark = SparkSession.builder.appName('Sharpe_Ratio').getOrCreate()
sc = spark.sparkContext
sc

# Reading persisted Portfolio Yields dataframe:

In [3]:
portfolio_yield_window_path = '/data/core/fince/data/portfolioOptimization/portfolio_yield_window/'
portfolio_yield_df = spark.read.parquet(portfolio_yield_window_path)

In [4]:
portfolio_yield_df.limit(10).toPandas()

Unnamed: 0,SCOTIAG,AXESCP,BMERGOB,BMRGOB25,VALUEF4,BLKDIA7,BLKGUB1,GBMGUBL,INVEXGU,NTEGUB13,...,FINDE1400,ACTIGOB401,AXESMP402,HSBCMP405,INVEXMP407,NAFFP28,PRINFMP410,PRINMAS411,VECTMD414,operation_date
0,-0.000404,-0.000149,-0.000422,-0.000405,-0.000347,-0.000412,-0.000407,-0.000404,-0.000348,-0.0002,...,-0.00014,-0.000203,-0.000154,-0.000168,-0.000172,-0.000159,-0.000132,-5.6e-05,-0.00017,2019-08-07
1,-0.000108,-4.3e-05,-0.000114,-0.000118,-6.1e-05,-0.000106,-9.3e-05,-7.6e-05,-7.7e-05,-0.000102,...,-0.000259,-0.000111,-0.000243,-0.000196,-9.4e-05,-6.6e-05,-0.000128,-0.000209,-0.000109,2016-09-29
2,-0.000104,-6.4e-05,-0.000103,-0.000303,-6e-05,-9e-05,-0.000177,-6e-05,-6.2e-05,-8.8e-05,...,0.000103,-9.4e-05,-0.000109,-4.6e-05,-6.3e-05,-5.2e-05,-9.4e-05,2.6e-05,-5.3e-05,2016-04-08
3,-0.00019,-0.000146,-0.000213,-0.000147,-0.000171,-0.000205,-0.000205,-0.000202,-0.000175,-0.000201,...,-0.000365,-0.000201,-0.000163,-0.0002,-0.000174,-0.000213,-0.000584,-0.000454,-0.000257,2019-07-03
4,-0.000194,-0.000153,-0.000205,-0.000223,-0.000173,-0.000205,-0.000205,-0.000203,-0.000178,-0.000202,...,-0.000198,-0.000176,-0.000166,-0.000174,-0.00018,-0.000151,-0.000198,-0.000205,-0.000173,2018-09-20
5,-0.000199,-0.000142,-0.000208,-0.000202,-0.000168,-0.000204,-0.000197,-0.000201,-0.000174,-0.000198,...,-0.000164,-0.000192,-0.000155,-0.000162,-0.000171,-0.000159,-0.000138,-0.000125,-0.000354,2019-01-29
6,-0.000195,-0.000146,-0.000213,-0.000163,-0.000168,-0.000207,-0.000204,-0.000201,-0.000175,-0.000198,...,-0.000255,-0.000195,-0.000161,-0.000191,-0.000169,-0.00016,-0.000143,-0.000221,-0.000169,2019-06-27
7,-0.000205,-0.000155,-0.000203,-0.000189,-0.000174,-0.000202,-0.000203,-0.000203,-0.000178,-0.000201,...,-0.00028,-0.0002,-0.000154,-0.000292,-0.000175,-0.00015,-0.000181,-0.000139,-0.000207,2018-09-19
8,-0.000153,-0.000142,-0.000191,-0.000153,-0.000167,-0.0002,-0.000198,-0.000198,-0.00017,-0.000193,...,-0.000159,-0.000146,-0.000144,-0.00016,-0.000166,-0.000148,-0.000119,7.6e-05,-0.000173,2018-06-13
9,-0.000199,-0.000151,-0.000214,-0.000207,-0.000169,-0.00021,-0.000207,-0.000202,-0.000175,-0.000201,...,-0.000358,-0.000205,-0.000186,-0.000177,-0.000179,-0.000179,-0.000309,-0.000487,-0.000276,2019-05-16


In [5]:
dataframes = [portfolio_yield_df.select(lit(fund).alias('fund_name'), col(fund).alias('fund_yield')) for fund in portfolio_yield_df.columns[:-1]]

In [6]:
def unionAll_df(*dfs):
    return reduce(DataFrame.unionAll, dfs)

In [11]:
portfolio_yield_T = unionAll_df(*dataframes).cache()

# Writing Portfolio's Yield Transpose dataframe.

In [None]:
writing_path_mod3 = '/data/core/fince/data/portfolioOptimization/portfolio_yield_transpose/'

print('\nWriting parquets ...')
portfolio_yield_T.repartition(1).write.mode('overwrite').parquet(writing_path_mod3)

%time
print('\nSUCCESS \nPARQUET DATA SAVED!')
print('\nNew root path tabla data:', writing_path_mod3)

# Reading persisted Portfolio Yields Transpose.

In [13]:
portfolio_yield_T_path = '/data/core/fince/data/portfolioOptimization/portfolio_yield_transpose/'
portfolio_yield_T_df = spark.read.parquet(portfolio_yield_T_path)

In [14]:
sharpe_ratio_df = portfolio_yield_T_df.groupBy("fund_name")\
                                      .agg(spark_avg('fund_yield'), spark_stddev('fund_yield'))\
                                      .select("*", (col("avg(fund_yield)") / col("stddev_samp(fund_yield)")).alias("sharpe_ratio"))\
                                      .orderBy(col("sharpe_ratio").desc())\
                                      .drop("avg(fund_yield)", "stddev_samp(fund_yield)")

# Sharpe ratio:

In [15]:
sharpe_ratio_df.show(100)

+----------+--------------------+
| fund_name|        sharpe_ratio|
+----------+--------------------+
|    GBMMOD| 0.03137959880780523|
|    GBMCRE| 0.03124715775689063|
|   NAFINDX| 0.01844368305409419|
|   HSBCBOL|0.015666999130266483|
|   INVEXMX|0.013004038169870356|
|   ACTIPAT|0.012462306905213617|
|   ACTIVAR|0.011726284859371962|
|   ACTINMO|0.010918150288106438|
|   BMERIND|0.010006322375368616|
|    VECTPA|0.009086193748315314|
|    BLKIPC|0.008840245406882136|
|    BLKPAT|0.008052367375532735|
|    SURIPC|0.008036704146507328|
|   SCOTIPC| 0.00799137535845908|
|   VECTIND|0.007295953729364737|
|    SURPAT|0.006253600896206242|
|   PRINRVA|0.003672604815147...|
|    NTESEL|6.952067806518164E-4|
|      MAYA|1.502042229259964E-4|
|STERDOW270|-0.00157575205813...|
|STERDOW281|-0.00157575205813...|
|SVIVE20338|-0.00188773265509...|
|SVIVE20390|-0.00188773265509...|
|   ST&ERUS|-0.00229508790117...|
|HSBCDOL201|-0.00551912186143...|
|HSBCDOL189|-0.00551912186143...|
|SVIVE35322|-0

-------------------------------------------------------------------------------------------------------------------------------------------------------------------