In [11]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import scipy.stats as sstats
from functools import cmp_to_key
from tqdm import trange
import zipfile

%matplotlib inline
plt.rcParams['figure.figsize'] = [20, 5]

# Reading data

In [2]:
def load_stock_quotations(stock_names, filename):
    s = {}
    with zipfile.ZipFile(filename) as z:
        for stock_name in stock_names:
            with z.open(stock_name + '.mst') as f:
                s[stock_name] = pd.read_csv(f, index_col='<DTYYYYMMDD>', parse_dates=True)[['<OPEN>', '<HIGH>', '<LOW>', '<CLOSE>', '<VOL>']]
                s[stock_name].index.rename('time', inplace=True)
                s[stock_name].rename(columns={'<OPEN>':'open', '<HIGH>':'high', '<LOW>':'low', '<CLOSE>':'close', '<VOL>':'volume'}, inplace=True)
    return pd.concat(s.values(), keys=s.keys(), axis=1)

In [3]:
STOCK_QUOTATIONS_ARCHIVE_FILE_NAME = 'mstall.zip'
STOCK_NAMES_FILE_NAME = 'WIG20.txt'

In [4]:
with open(STOCK_NAMES_FILE_NAME) as f:
    stock_names = list(map(lambda line: line.strip(), f))

number_of_stocks = len(stock_names)

stock_quotations = load_stock_quotations(stock_names, STOCK_QUOTATIONS_ARCHIVE_FILE_NAME)
stock_quotations.fillna(method='ffill', inplace=True)

# Objective function

In [5]:
def semi_covariance(X, Y):
    N = len(X)
    XX, YY = X - X.mean(), Y - Y.mean()
    XX[XX > 0] = 0
    YY[YY > 0] = 0
    return np.sum(XX * YY) / N


def semi_variance(X):
    return semi_covariance(X,X)


def semi_covariance_matrix(X):
    N,M = X.shape
    result = np.empty((M,M))
    for i in range(M):
        for j in range(M):
            result[i,j] = semi_covariance(X[:,i], X[:,j])
    return result

In [6]:
delta_t = 90
stock_returns = stock_quotations.xs('close', level=1, axis=1).pct_change()
last_returns = stock_returns[-delta_t-1:-1]

stock_returns_mean = last_returns.mean() # predicted return for next day

# variance as risk
stock_returns_var = last_returns.var() # risk
stock_returns_cov = last_returns.cov() # covariance matrix

# semi-variance as risk
stock_returns_semi_var = semi_variance(last_returns)
stock_returns_semi_cov = semi_covariance_matrix(np.array(last_returns))

In [7]:
def portfolio_return_value(X):
    return X @ stock_returns_mean


def portfolio_risk_var(X):
    return np.diag(X @ stock_returns_cov @ X.T)


def portfolio_risk_semi_var(X):
    return np.diag(X @ stock_returns_semi_cov @ X.T)


def portfolio_objective_function(risk_function=portfolio_risk_var,
                                 return_function=portfolio_return_value):
    return lambda X: np.c_[risk_function(X), -return_function(X)]

# MOEA/D algorithm

In [None]:
def default_weight_vectors(N):
     return np.c_[np.linspace(0,1,N), 1-np.linspace(0,1,N)]


def moead(objective_function,
          chromosome_length=20,
          number_of_iterations=1000,
          number_of_subproblems=100,
          weight_vectors=default_weight_vectors(100),
          neighborhood_size=20):
    np.random.seed(7896)

    pareto_population = []

    weight_vectors_2 = np.sum(weight_vectors**2, axis=1)
    distance_matrix = np.sqrt(weight_vectors_2.reshape(-1,1) - 2*(weight_vectors @ weight_vectors.T) + weight_vectors_2)

    neigborhoods = np.empty((number_of_subproblems, neighborhood_size))
    for i in range(number_of_subproblems):
         dupa