In [1]:
import pickle
import datetime as dt
import pandas as pd
import math
import numpy as np
import copy
import random

In [2]:
def load_adj_price_dict():
    adj_price_dict = pickle.load( open( "adj_price_dict.p", "rb" ) )
   
    return adj_price_dict

# Defining the Portfolio Object:

In [3]:
class Portfolio(object):
    def __init__(self,symbols,weights,capital,init_time):
        self.symbols = symbols
        self.weights = weights
        self.capital = capital
        self.mapping_dict = {self.symbols[i]:self.weights[i] for i in range(len(symbols))}
        self.init_time = init_time
        self.init_positions = self.compute_position_size(init_time)
        

    def compute_position_size(self,t): #uses the capital allocation + a floor function to calculate the amount of stocks in each position
        positions = {}
        for symbol in self.symbols:
            sym_price = price_df[symbol+"_adjusted_close"].loc[t]
            weight = self.mapping_dict[symbol]
            sym_size = math.floor(self.capital*weight/sym_price)
            positions.update({symbol:sym_size})
        return positions
    
    def price_dict(self,t):
        prices = {symbol:price_df[symbol+"_adjusted_close"].loc[t] for symbol in self.symbols}
        return prices
    
    def position_value(self,t,positions): # computes total price of portfolio 
        prices = self.price_dict(t)
        price = 0
        for symbol in self.symbols:
            sym_pos = positions[symbol]
            sym_price = prices[symbol]
            price = price + sym_pos*sym_price
        return price
    
    def value(self,t):
        positions = self.init_positions
        price = self.position_value(t,positions)
        return price

    def compute_timeframe_performance(self,t_0,t_1):
        positions = self.compute_position_size(t_0)
        price_0 = self.position_value(t_0,positions)
        price_1 = self.position_value(t_1,positions)
        delta = price_1 - price_0
        return delta, delta/price_0
    
    def performance(self,t):
        return self.compute_timeframe_performance(self.init_time,t)
            

In [4]:
def construct_adj_close_df(adj_price_dict):
    adj_close_df = pd.DataFrame()
    for key in adj_price_dict.keys():
        adj_price_dict[key].rename(columns={"5. adjusted close":key+"_adjusted_close"},inplace=True)
        adj_close_df = pd.concat([adj_close_df, adj_price_dict[key][key+"_adjusted_close"]], axis=1)
    
    adj_close_df = adj_close_df.apply(pd.to_numeric)
    return adj_close_df


In [5]:
price_dict = load_adj_price_dict()


In [6]:
symbol_list = list(price_dict.keys())

In [16]:
price_df = construct_adj_close_df(price_dict)

In [19]:
t_0 = price_df.index[0]
p = Portfolio(("AAPL","XOM"),(0.4,0.6),10000,t_0)

# Tracking Portfolio performance:

In [20]:
def get_portfolio_df(p,prices):
    symbols = p.symbols
    weights = p.init_positions
    #^--- this needs to change if we wanna adjust portfolio perfomance over time
    P_df = pd.DataFrame()
    P_df["Date"] = prices.index
    P_df.set_index(["Date"], inplace = True)
    P_df["Portfolio_Price"] = np.zeros(P_df.shape[0])
    for symbol in symbols:
        P_df["Weight_"+symbol] = weights[symbol]
        P_df[symbol] = prices[symbol+"_adjusted_close"]
        P_df["Return_"+symbol] = P_df[symbol].pct_change()
        P_df["Total_Return_"+symbol] = (P_df["Return_"+symbol] + np.ones(P_df.shape[0])).cumprod()
        P_df[symbol+"_Position_size"] = P_df["Weight_"+symbol]*P_df[symbol]
        P_df[symbol+"_Position_gain"] = P_df["Return_"+symbol]*P_df["Weight_"+symbol]*P_df[symbol]
        
        P_df["Portfolio_Price"] = P_df["Portfolio_Price"] + P_df[symbol +"_Position_size"]

    
    
    P_df["Portfolio_Return"] = P_df["Portfolio_Price"].pct_change()
    P_df["Portfolio_Total_Return"] = (P_df["Portfolio_Return"] + np.ones(P_df.shape[0])).cumprod()
    
    return P_df
       

In [21]:
p_df = get_portfolio_df(p,price_df)

In [22]:
def get_drawdown_periods(P_df):
    dt_index = P_df.loc[(p_df.Portfolio_Price - p_df.Portfolio_Price.iloc[-1]) < 0].index
    drawdowns = []
    counter = 0
    for i in range(1,len(dt_index)):
        if(dt_index[i] - dt_index[i-1] == pd.Timedelta("1 days 00:00:00")):
            counter += 1
        else:
            if(counter != 0):
                drawdowns.append(counter)
                counter = 0
            else:
                counter = 0
    return drawdowns

In [23]:
def summary(P_df):
    start_investment = P_df["Portfolio_Price"].iloc[0]
    end_position_size = P_df["Portfolio_Price"].iloc[-1]
    
    total_return = P_df["Portfolio_Total_Return"].iloc[-1] - 1
    std_return = P_df["Portfolio_Return"].std()
    mean_return = P_df["Portfolio_Return"].mean()
    worst_total_return = P_df["Portfolio_Total_Return"].min()
    drawdown_ = lambda i :  max([0,(P_df["Portfolio_Return"] - P_df["Portfolio_Return"].iloc[i]).max()])

    drawdown = drawdown_(-1)
    argmax = p_df["Portfolio_Total_Return"].reset_index()["Portfolio_Total_Return"].idxmax()
    max_diff = max([P_df["Portfolio_Total_Return"].max() - P_df["Portfolio_Total_Return"].iloc[index]
                    for index in range(argmax,P_df.shape[0])])
    max_drawdown = max([0] +[drawdown_(i) for i in range(argmax,P_df.shape[0])])
    
    drawdown_periods = get_drawdown_periods(P_df)
    longest_drawdown_period = max(drawdown_periods)
    second_longest_drawdown_period = max([x for x in drawdown_periods if x != longest_drawdown_period])
    num_o_longest_drawdown = len([x for x in drawdown_periods if x == longest_drawdown_period])
    num_o_sec_longest_drawdown = len([x for x in drawdown_periods if x == second_longest_drawdown_period])
    
    summary_dict = {"start_investment":start_investment,
                    "end_pos_size":end_position_size,
                    "total_return":total_return,
                    "std_return":std_return,
                    "mean_return":mean_return,
                    "drawdown":drawdown,
                     "max_drawdown":max_drawdown,
                    "longest_drawdown_period":longest_drawdown_period,
                    "second_longest_drawdown_period":second_longest_drawdown_period,
                    "num_o_longest_drawdown":num_o_longest_drawdown,
                    "num_o_sec_longest_drawdown":num_o_sec_longest_drawdown,
                    "risk_return_ratio":total_return/max_drawdown,
                   "signal_to_noise":mean_return/std_return
                   }
    
    return summary_dict

In [24]:
def visualize_porfolio_performance(p_df):
    axs = p_df[["Portfolio_Price","Portfolio_Return"]].plot(figsize=(15, 5), subplots=True)

{'start_investment': 9938.5094,
 'end_pos_size': 10004.27,
 'total_return': 0.00661674677291102,
 'std_return': 0.020408119888738804,
 'mean_return': 0.0002737614102947065,
 'drawdown': 0.07064555781386173,
 'max_drawdown': 0.10759840308104007,
 'longest_drawdown_period': 2,
 'second_longest_drawdown_period': 1,
 'num_o_longest_drawdown': 1,
 'num_o_sec_longest_drawdown': 2,
 'risk_return_ratio': 0.061494841776856796,
 'signal_to_noise': 0.01341433761596863}

# Querying possible Portfolio Performances:

In [25]:
def weight_gen_helper(v,index,end): #helps generate capital allocations for stocks
    if v.sum() == 10:
        return v
    else:
        if len(index) == 1:
            v[index[0]] = end
            return v
        else:
            i = random.sample(range(end+1),1) 
            v[index[0]] = i[0]
            index.remove(index[0]) 
            return weight_gen_helper(v,index,end-i[0])

def rand_weight_generator(num_of_stocks): #generates a list of capital allocations for stocks
    v = np.zeros(num_of_stocks)
    index = list(range(len(v)))
    end = 10
    return weight_gen_helper(v,index,end)
        
def sample_weights(num_of_stocks,N): #samples N times for different weights then reduces it to the unique weights
    data = []
    for i in range(N):
        v = np.zeros(num_of_stocks)
        index = list(range(len(v)))
        end = 10
        out = tuple(weight_gen_helper(v,index,end)/10)
        data.append(out)
    data = list(set(data))
    return data    

In [26]:

def pickle_query_df(symbol_list,price_df): #generates a dataframe of possible portfolio performances
    weights = sample_weights(len(symbol_list),100000) # N = 100.000 !! TAKES A REALLY LONG TIME TO CALCULATE
    sec_data = []
    col_vals = []
    for weight in weights:
        p = Portfolio(symbol_list,weight,100000,t_0) # Capital = 100.000$
        
        df = get_portfolio_df(p,price_df)
        s = summary(df)
        
        sec_data.append(tuple(s.values()))
        if len(col_vals) == 0:
            col_vals = list(s.keys())
        else:
            pass 
    data = [weights[i]+sec_data[i] for i in range(len(weights))]
    query_df = pd.DataFrame(data ,columns = [str(symbol)+"_Weight" for symbol in symbol_list] + col_vals)
    pickle.dump(query_df, open( "query_df.p", "wb" )) # Pickle the query df
    

In [27]:
#pickle_query_df(symbol_list,price_df)

In [28]:
def load_query_df(): # loads query dataframe
    query_df = pickle.load( open( "query_df.p", "rb" ) )
    return query_df

In [29]:
q_df = load_query_df()

In [30]:
def get_potential_portfolios(risk_quantile,reward_quantile):
    query = q_df.loc[
        (q_df.std_return < q_df.std_return.quantile(risk_quantile)) & 
        (q_df.drawdown < q_df.drawdown.quantile(risk_quantile)) &
        (q_df.mean_return > q_df.mean_return.quantile(reward_quantile)) & 
        (q_df.signal_to_noise > q_df.signal_to_noise.quantile(reward_quantile)) & 
        (q_df.risk_return_ratio > q_df.risk_return_ratio.quantile(reward_quantile))]
    sym = [symbol + "_Weight" for symbol in symbol_list]
    return query[sym + ['std_return', 'mean_return', 'drawdown','max_drawdown', 'risk_return_ratio', 'signal_to_noise']]
        

In [31]:
def get_low_risk_portfolios(risk_quantile,worst_case_quantile):
    query = q_df.loc[(q_df.std_return < q_df.std_return.quantile(risk_quantile)) & 
                      (q_df.drawdown  < q_df.drawdown.quantile(risk_quantile)) &
                      (q_df.max_drawdown  < q_df.max_drawdown.quantile(worst_case_quantile))]
    
    sym = [symbol + "_Weight" for symbol in symbol_list]
    return query[sym + ['std_return', 'mean_return', 'drawdown','max_drawdown', 'risk_return_ratio', 'signal_to_noise']]

In [34]:
def copy_portfolio(pot_df,i):
    weights = tuple(pot_df.iloc[i].iloc[j] for j in range(len(symbol_list)))
    symbols = tuple(symbol_list)
    p = Portfolio(symbols,weights,100000,price_df.index[0])
    return get_portfolio_df(p,price_df)