In [None]:
import time
import sys
import math
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from functools import reduce
from itertools import chain
from scipy.stats import stats
from scipy.stats import rankdata
from scipy.optimize import minimize

from mpl_toolkits.mplot3d import Axes3D

from pyspark.mllib.linalg.distributed import IndexedRowMatrix, IndexedRow
from pyspark.ml.feature import StandardScaler
from pyspark.ml.linalg import Vectors, VectorUDT
from pyspark.sql.functions import create_map, col, to_date, date_format, year, month, dayofmonth, when, lit, lag, array, explode, struct, udf, first
from pyspark.sql.functions import sum as spark_sum, avg as spark_avg, count, stddev as spark_stddev
from pyspark.sql.types import FloatType, StructField, StructType, DateType, IntegerType, ArrayType
from pyspark.sql import SparkSession, Window, DataFrame
from pylab import *
from matplotlib.ticker import LinearLocator, FormatStrFormatter

In [None]:
pd.set_option('display.max_columns', 10000000)
pd.set_option('display.max_rows', 10000000)
pd.set_option('display.width', 10000000)

------------------------------------------------------------------------------------------------------------------------------------------------------------------

# Defining Spark Session for pseudo-distributed computing:

In [None]:
spark = SparkSession.builder.appName('Portfolio_Optimization').getOrCreate()
sc = spark.sparkContext
sc

# Reading persisted Portfolio Yields dataframe:

In [None]:
portfolio_yield_window_path = '/data/core/fince/data/portfolioOptimization/portfolio_yield_window/'
portfolio_yield_df = spark.read.parquet(portfolio_yield_window_path)

In [None]:
portfolio_yield_df.limit(10).toPandas()

# From now on we construct yield portfolio matrix for Risk Parity process.

In [None]:
field_array = portfolio_yield_df.columns[:-1]
print('test with', len(field_array), 'funds\n')

monthly_return = np.array(portfolio_yield_df.select(*field_array).collect())
print("numpy matrix generated correctly!\n")
print('monthly_return matrix:\n', monthly_return)

### Get model parameters Class:
#### This calss will have defined functions that will help us as utilities for creating specific matrices objects.

In [None]:
class ModelParameters(object):
    
    def get_shape_matrix(numpy_matrix):
        """
        Gets numpy.array matrix shape.
        :param numpy_matrix: yield numpy.array matrix.
        :return: tuple with matrix shape (rows, cols).
        """
        if isinstance(numpy_matrix, (np.ndarray)):
            n_rows, n_cols = numpy_matrix.shape[0], numpy_matrix.shape[1]
        else:
            print("matrix not valid, must be numpy.array object with float terms.")
            return None
        
        return n_rows, n_cols
    
    def set_time_prediction(numpy_matrix, time_predict=None, n_month=None):
        """
        Set time prediction function it creates an returns time prediction window, 
        starting month variable for analysis and historical observations variable.
        :param numpy_matrix: yield portfolio numpy.array matrix.
        :param time_predict: time to predict int, 11 default.
        :param n_month: number of months int, 1 default.
        :return: tuple with initial model optimization params.
        """
        if not isinstance(numpy_matrix, (np.ndarray)):
            print("matrix not valid, must be numpy.array object with float terms.")
            return None
        else:
            n_rows, n_cols = ModelParameters.get_shape_matrix(numpy_matrix)
            
        if time_predict is None:
            time_predict = 11
        if n_month is None:
            n_month = 1

        if time_predict is not None:
            if isinstance(time_predict, int):
                time_predict = time_predict
            else:
                print("time predict params not valid, must be integer.")
                return None
        
        if n_month is not None:
            if isinstance(n_month, int):
                n_month =  n_month
            else:
                print("number of months params not valid, must be integer.")
                return None
            
        start_month = time_predict + n_month
        time_observed = n_rows - start_month
        end_month = n_rows
          
        return start_month, end_month, time_predict, time_observed
    
    def create_zero_matrix(time_observed, n_cols):
        """
        Create zero matrix initializes a zero filled numpy.array matrix with adapting dimensions.
        :param n_cols: int number columns N (fund target).
        :param time_observed: time to analyze, int.
        :return: tuple with initial zero filled numpy.array objects.
        """
        if isinstance(n_cols, int):
            n_cols = n_cols
        else:
            print("number of columns params not valid, must be integer.")
            return None
        
        if isinstance(time_observed, int):
            time_observed = time_observed
        else:
            print("time observed params not valid, must be integer.")
            return None
        
        cov_matrix = np.zeros((n_cols, n_cols))
        weight_riskparity_matrix = np.zeros((time_observed, n_cols))
        
        return cov_matrix, weight_riskparity_matrix
    
    def matrix_transpose(numpy_matrix):
        """
        Matrix transpose creates an numpy.array object to its transposed shape.
        :param numpy_matrix: yield portfolio numpy.array matrix.
        :return: numpy.array object transposed.
        """
        if not isinstance(numpy_matrix, (np.ndarray)):
            print("matrix not valid, must be numpy.array object with float terms.")
            return None
            
        return numpy_matrix.T
    
    def wr_equally_weighted_matrix(time_observed, n_cols):
        """
        w & r Equally Weighted Matrix creates a zero filled numpy.array matrix
        for filling them with its ponderations.
        :param time_observed: int time window observations.
        :param n_cols: int number columns N (fund target).
        :return: numpy.array object W matrix Equally Weighted with time & cols dimensions.
        """
        if isinstance(time_observed, int):
            time_observed = time_observed
        else:
            print("time observed params not valid, must be integer.")
            return None
        
        if isinstance(n_cols, int):
            n_cols = n_cols
        else:
            print("number of columns params not valid, must be integer.")
            return None
        
        w_ew = np.zeros((time_observed, n_cols))
        r_ew  = np .zeros((time_observed, n_cols))
        
        return w_ew, r_ew
    
    def one_n_matrix(n_cols):
        """
        One/n creates a equeally weighted numpy.array matrix.
        :param n_cols: int number columns N (fund target).
        :return: numpy.array object Onen/n weights with (1 x N) dimensions.
        """
        if isinstance(n_cols, int):
            n_cols = n_cols
        else:
            print("number of columns params not valid, must be integer.")
            return None
        
        return np.full((1, n_cols), 1/n_cols)
    
    def zero_filled_vector(list_vector, time_observed):
        """
        Zero Filled Vector creates a zero filled numpy.array 1-column matrix, aka: zero-vector.
        :param list_vector: array type with string names for different type of vector.
        :param time_observed: int time window observations.
        :return: numpy.array objectZero Filled Vector with (N x 1) dimensions.
        """
        if isinstance(time_observed, int):
            time_observed = time_observed
        else:
            print("time observed params not valid, must be integer.")
            return None
        
        mapped_vec = {}
        for i in list_vector:
            mapped_vec[i] = np.zeros((time_observed, 1))
        
        return mapped_vec
    
    def return_weights_matrices(numpy_matrix, start_month, end_month, time_observed, n_cols):
        """
        One/n creates a equally weighted numpy.array matrix.
        :param n_cols: int number columns N (fund target).
        :return: numpy.array object Onen/n weights with (1 x N) dimensions.
        """
        return_equal_weight = ModelParameters.wr_equally_weighted_matrix(time_observed=time_observed, n_cols=n_cols)[1]
        weight_matrix = ModelParameters.zero_filled_vector(list_vector=["retEW"], time_observed=time_observed)
    
        for y in range(start_month, end_month):
            return_equal_weight[y - start_month] = np.dot(numpy_matrix[y,:] , 1/n_cols)
            weight_matrix[y - start_month] = sum(return_equal_weight[y - start_month])
            
        print('Generating optimization parameters...\n')
        for y in range(start_month, end_month):
            w_EW[:] = onen
            r_ew[y - start_month] = np.dot(monthly_return[y,:] , 1/N)
            retEW[y - start_month] = sum(r_ew[y-start_month])
        %time
        print('\nDONE!')
        
        return return_equal_weight, weight_matrix

### Lets define **size** and $N$ parameters for optimization model, that will be the same as total rows and total columns; respectively.

In [None]:
size, N = ModelParameters.get_shape_matrix(numpy_matrix=monthly_return)

### Now lets define:
#### *Start month window: as the start of historical analysis for prediction.*
#### *End month window: as the end of historical analysis for prediction.*
#### *$T_1$: as the Timing 1 for prediction window (11 months).*
#### *$T_2$: as the Timing 2 for window observations (931 unique dates).*

**Note 1: Keep on-track with *size* variable, such that is the total count of historical portfolio data (943), and *$T_2$* as the total time to analyze (931), the mathematical difference between this two will be the time to predict (12 months).** 

In [None]:
start_month, end_month, T1, T2 = ModelParameters.set_time_prediction(numpy_matrix=monthly_return)

In [None]:
# Note 1 ref.
size-T2

### Then, lets fill with zeros *(initialize)* **covariance matrix** with dimensions $(N x N)$, and **weight Risk Parity** matrix with $(T_2 x N)$.
#### *$N$ = as Total columns from portfolio funds data (211).*
#### *$T$ = as Tistorical yeilds from portfolio funds data (931).*

In [None]:
covmatr, w_RP = ModelParameters.create_zero_matrix(time_observed=T2, n_cols=N)

In [None]:
w_RP

### Lets transpose **monthly returns** price difference matrix (yield to date) as *ret*. 

In [None]:
ret = ModelParameters.matrix_transpose(numpy_matrix=monthly_return)

### No lets initialize with zeros two matrices:
#### 1-. **$W$** matrix for equally weighted inputs with $(T_2 x N)$ dimensions.
#### 2-. **$R$** matrix for equally weighted inputs with $(T_2 x N)$ dinensions.

**Note 2: One matrix *w_EW* will be the matrix with equal weights with dimensions $(T_2 x N)$, .** 

In [None]:
ret = monthly_return.T
w_EW = np.zeros((T2, N)) # no se usa
onen = np.full((1, N), 1/N) # no se usa
r_ew  = np .zeros((T2, N)) # 
r_rp = np.zeros((T2, 1))
retEW = np.zeros((T2, 1)) # 
retRP = np.zeros((T2, 1))

In [None]:
w_EW, r_ew = ModelParameters.wr_equally_weighted_matrix(time_observed=T2, n_cols=N)

In [None]:
w_EW

#### Lets define One/n matrix.

In [None]:
# onen = ModelParameters.one_n_matrix(n_cols=N)

#### Lets define vectors shape zero filled investment return by term as r_rp, total sum investment Return Equal Weighted as retEW, and investment Return Risk Parity.

In [None]:
lst = [
    "r_rp", 
    "retRP",
]
r_rp = ModelParameters.zero_filled_vector(lst, T2)["r_rp"]
retRP = ModelParameters.zero_filled_vector(lst, T2)["retRP"]

In [None]:
print('Generating optimization parameters...\n')
ModelParameters.return_weights_matrices(numpy_matrix=monthly_return,
                                        start_month=start_month,
                                        end_month=end_month,
                                        time_observed=T2,
                                        n_cols=N)
%time
print('\nDONE!')

In [None]:
print('Generating Marginal Risk Contribution variables...\n')
for w in range(start_month, end_month):
    covmatr = np.cov(ret[:,w-T1:w])    
%time
print('\nDONE!')
print('\nvariance & covariance matrix:')
print(covmatr)

# Generator function for Risk Contribution variables
- mrc aka: marginal risk contribution
- rc aka: risk contribution

In [None]:
def RC(weight, covmatr):
    weight = np.array(weight)
    variance = weight.T @ covmatr @ weight
    sigma = variance ** .5
    mrc = 1/sigma * (covmatr @ weight)
    rc = weight * mrc
    rc = rc/rc.sum()
    return rc

# Generator function for RiskParity objective variables

In [None]:
def RiskParity_objective(x):
    variance = x.T @ covmatr @ x
    sigma = variance ** .5
    mrc = 1/sigma * (covmatr @ x)
    rc = x * mrc
    a = np.reshape(rc, (len(rc),1))
    risk_diffs = a - a.T
    sum_risk_diffs_squared = np.sum(np.square(np.ravel(risk_diffs)))
    return sum_risk_diffs_squared

# Generator function for Maximum diversification ratio objective variables

In [None]:
def Max_div_objective(x):
    variance = x.T @ covmatr @ x
    sigma = variance ** .5
    w_vol = np.dot(np.sqrt(np.diag(covmatr)), x.T)
    diver_ratio = w_vol/sigma
    return -diver_ratio

In [None]:
def weight_sum_constraint(x):
    return np.sum(x) - 1.0
        
def weight_longonly(x):
    return x

# Function object for instance on Minimization scipy function

In [None]:
def RiskParity(covmatr):
    x0 = np.repeat(1/covmatr.shape[1], covmatr.shape[1])
    constraints = ({'type': 'eq', 'fun': weight_sum_constraint},
                   {'type': 'ineq', 'fun' : weight_longonly})
    options = {'ftol' : 1e-20, 'maxiter': 999}
    result = minimize(fun = RiskParity_objective,
                      x0 = x0,
                      constraints = constraints,
                      options = options)
    return result.x

In [None]:
print('Generating optimized return matrices...')
for w in range(start_month, end_month):
    w_RP[w - start_month] = RiskParity(covmatr)
    r_rp[w - start_month] = np.dot(monthly_return[w,:], w_RP[w - start_month,:])
    retRP[w - start_month] = sum(r_rp[w - start_month])
%time
print('\nDONE!')
print('\nw_RP matrix:')
print(w_RP)
print('\nretRP matrix:')
print(retRP)

### The x axis will be the month, the y axis will be the asset (fund), and z will be the active weight of the portfolio.

In [None]:
mx = np.amax(w_RP)
mn = np.amin(w_RP)

fig = plt.figure()
ax = fig.gca(projection = '3d')

X = np.arange (0, T2, 1)
Y = np.arange( 0, N, 1)
X, Y = np.meshgrid(X, Y)
Z = np.transpose(w_RP)

surf = ax.plot_surface(X, Y, Z, cmap = cm.Reds_r, linewidth = 0)

ax.set_zlim(mn-.02, mx+.05)
plt.show()