In [4]:
import pandas as pd
from pypfopt.efficient_frontier import EfficientFrontier
from pypfopt import risk_models
from pypfopt import expected_returns
from pypfopt import discrete_allocation

In [3]:
%%time
# Read in price data
df = pd.read_csv("tests/stock_prices.csv", parse_dates=True, index_col="date")

# Calculate expected returns and sample covariance
mu = expected_returns.mean_historical_return(df)
S = risk_models.sample_cov(df)

# Optimise for maximal Sharpe ratio
ef = EfficientFrontier(mu, S)
raw_weights = ef.max_sharpe()
cleaned_weights = ef.clean_weights()
print(cleaned_weights)
ef.portfolio_performance(verbose=True)

{'UAA': 0.0, 'AAPL': 0.09202, 'AMZN': 0.07158, 'BBY': 0.06129, 'FB': 0.19856, 'T': 0.0, 'BABA': 0.09642, 'MA': 0.24562, 'SHLD': 0.0, 'RRC': 0.0, 'JPM': 0.0, 'SBUX': 0.03769, 'PFE': 0.18413, 'BAC': 0.0, 'GOOG': 0.01269, 'XOM': 0.0, 'GM': 0.0, 'AMD': 0.0, 'GE': 0.0, 'WMT': 0.0}
Expected annual return: 33.0%
Annual volatility: 21.7%
Sharpe Ratio: 1.43
Wall time: 316 ms


In [5]:
latest_prices = discrete_allocation.get_latest_prices(df)
allocation, leftover = discrete_allocation.portfolio(
    raw_weights, latest_prices, total_portfolio_value=10000
)
print(allocation)
print("Funds remaining: ${:.2f}".format(leftover))

11 out of 20 tickers were removed
Funds remaining: 12.15
{'AMZN': 0, 'BBY': 9, 'FB': 12, 'MA': 14, 'SBUX': 6, 'BABA': 5, 'AAPL': 5, 'GOOG': 1, 'PFE': 51}
Funds remaining: $12.15


In [194]:
df.shape

(6215, 500)

In [197]:
df.isna().sum().values[df.isna().sum().values>100].shape

(113,)

In [202]:
df.isna().sum().values[df.isna().sum().values>0].shape

(0,)

In [207]:
%%time
# Read in price data
df = pd.read_csv("data/10traders_1year.txt", parse_dates=True, index_col="TIME").dropna()

# Calculate expected returns and sample covariance
mu = expected_returns.mean_historical_return(df)
S = risk_models.sample_cov(df)

# Optimise for maximal Sharpe ratio
ef = EfficientFrontier(mu, S)
raw_weights = ef.max_sharpe()
cleaned_weights = ef.clean_weights()
print(cleaned_weights)
ef.portfolio_performance(verbose=True)

{'374843': 0.0, '367902': 0.46642, '372549': 0.0, '376335': 0.0, '349145': 0.0, '332613': 0.0, '378361': 0.0, '361057': 0.53358, '379751': 0.0, '208007': 0.0}
Expected annual return: 3.2%
Annual volatility: 10.4%
Sharpe Ratio: 0.11
Wall time: 153 ms


In [208]:
raw_weights

{'208007': 8.413408858487514e-17,
 '332613': 4.85722573273506e-17,
 '349145': 0.0,
 '361057': 0.5335772129198624,
 '367902': 0.46642278708013774,
 '372549': 0.0,
 '374843': 0.0,
 '376335': 0.0,
 '378361': 0.0,
 '379751': 1.8214596497756474e-17}

In [None]:
test_evaluator = Evaluator(df, 0.8, 1.0)
print("Test result:", test_evaluator.evaluate_asset(asset))

In [186]:
latest_prices = discrete_allocation.get_latest_prices(df)
allocation, leftover = discrete_allocation.portfolio(
    raw_weights, latest_prices, total_portfolio_value=10000
)
print(allocation)
print("Funds remaining: ${:.2f}".format(leftover))

8 out of 10 tickers were removed
Funds remaining: 196.40
{'367902': 14, '361057': 4}
Funds remaining: $196.40


In [8]:
import numpy as np
from scipy.optimize import minimize

In [43]:
df.values

array([[ 768.68,  153.37,  234.35, ...,  233.16,  144.72,  571.33],
       [ 768.68,  153.37,  234.35, ...,  234.95,  144.72,  571.33],
       [ 768.68,  153.37,  234.35, ...,  238.73,  144.72,  571.33],
       ...,
       [1310.88,  151.27,  294.25, ...,  336.41,  145.67,  587.83],
       [1310.88,  151.27,  294.43, ...,  334.39,  145.85,  587.83],
       [1310.88,  151.27,  294.67, ...,  335.45,  145.76,  587.83]])

In [187]:
df.values.shape

(6215, 500)

In [200]:
class Evaluator():
    def __init__(self, df, from_percent, to_percent):
        self.returns=np.array([])
        df = df.dropna()
        for tr in range(df.values.shape[1]):
            t = df.values[:,tr]
            t = t[~np.isnan(t)]
            n = len(t)
            from_index = int(n * from_percent)
            to_index = min(n - 1, int(n * to_percent))
            self.returns=np.append(self.returns, t[to_index]-t[from_index])

    def evaluate_asset(self, normalized_asset, cut_threshold=0.04):
        normalized_asset[normalized_asset < cut_threshold] = 0
        #print("normalized_asset", normalized_asset)
        #print("returns", self.returns)
        #print("res", normalized_asset * self.returns)
        #print("sum", (normalized_asset * self.returns).sum())
        return (normalized_asset * self.returns).sum()

In [177]:
def stupid_normalize(x):
    #print("x.sum", x.sum())
    return x / x.sum()

def normalize(x):
    #print("x to be normed", x)
    x[x<0]=np.exp(x[x<0])
    x[x>=0]=x[x>=0]+1
    #print("x normed", x)
    return stupid_normalize(x)

def normalized_asset(b, X):
    return normalize(X.dot(b[1:])+b[0])

def regularization(b):
    return (b ** 2).sum()

def obj_function(b, X, k, evaluate):
    assert X.shape[1] == b.shape[0] - 1, "{} != {}".format(X.shape[1], b.shape[0])
    #print("b", b)
    ev = -evaluate(normalized_asset(b, X))
    reg = k * regularization(b)
    #print("evaluation", ev)
    #print("regularization", reg)
    return ev + reg

In [178]:
def prepare_features(file_name):
    fdf = pd.read_csv(file_name)
    return fdf.drop('accountId', axis=1).dropna(axis='columns')

In [182]:
# k is regularization
def cool_algo(series_path, static_path, k=0.01, train_test_fragmentation=0.8):
    df = pd.read_csv(series_path, parse_dates=True, index_col="TIME")
    X = prepare_features(static_path).values
    print("{} features extracted".format(X.shape[0]))

    b_len = X.shape[1] + 1
    xinit = np.array([1.0 / b_len] * b_len)

    #bnds = [(0, None)] * b_len
    
    # the greater value is for the better res
    train_evaluator = Evaluator(df, 0.0, train_test_fragmentation)
    test_evaluator = Evaluator(df, train_test_fragmentation, 1.0)
    def evaluate(normalized_asset):
        return train_evaluator.evaluate_asset(normalized_asset)

    res = minimize(obj_function, args=(X, k, evaluate), x0=xinit, method='SLSQP')

    asset = normalized_asset(res.x, X)
    print("Train result:", evaluate(asset))
    print("Test result:", test_evaluator.evaluate_asset(asset))
    print("Asset:", asset)
    return res.x

In [184]:
TODO: че я сломал когда переписал Evaluator??
b = cool_algo('data/10traders_1month.txt', 'data/10traders.csv')

10 features extracted
Train result: 8.358719944799496
Test result: -3.9793905957299156
Asset: [0.         0.         0.         0.         0.99984688 0.
 0.         0.         0.         0.        ]


In [185]:
b = cool_algo('data/500traders_1year.txt', 'data/500traders.csv')

500 features extracted
Train result: 40573.304134020786
Test result: 1971.1893498736406
Asset: [0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.99995401 0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.      

In [143]:
# print features weights
from IPython.core.display import HTML
features=prepare_features('data/10traders.csv')
f = features.iloc[0:0]
f.loc[0]=b[1:]
display(HTML(f.T.to_html()))

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """


Unnamed: 0,0
ageInDays,0.004559
aggressiveness,0.004622
averageDayLoss,0.004623
averageDayProfit,0.004622
averageLeverage,0.004624
averageReturnDay,0.004622
averageReturnHalfYear,0.004629
averageReturnMonth,0.004623
averageReturnQuarterYear,0.004625
averageReturnWeek,0.004621
