In [1]:
import pickle
import datetime as dt
import pandas as pd
import mplfinance as mpf
import numpy as np
import seaborn as sns
import copy


### Load in the Price Dictonary

In [2]:
def load_adj_price_dict():
    adj_price_dict = pickle.load( open( "adj_price_dict.p", "rb" ) ) #load the pickled price dictionary
    return adj_price_dict

In [3]:
price_dict = load_adj_price_dict()


In [4]:
symbol_list = list(price_dict.keys())
#symbol_list = ["ADDDF","BAYZF","DB","TSLA","VLKAF"]

In [5]:
def get_ohlc_data(price_dict,key): 
    # constructs a OHLC-DataFrame from the symbol
    df = copy.deepcopy(price_dict[key]) # construct deepcopy as we wanna manipulate a copy without altering the original 
    df.index.name = 'Date'
    trf_dict = {"1. open":"Open", 
                "2. high":"High",
                "3. low":"Low",
                "4. close":"Close",
                "6. volume":"Volume"}
    df.rename(columns = trf_dict,inplace = True)
    df = df[["Open","High","Low","Close","Volume"]]
    return df

### Construction of the Adjusted Close Dataframe
To compare prices (and later on return) of different stocks we limit ourself to the adjusted closing prices of each day.
We chose the adjusted closing price as this already takes into account stock splits, which other wise could throw our analysis off.


In [6]:
def get_adj_close_df(adj_price_dict):
    adj_close_df = pd.DataFrame()
    for key in adj_price_dict.keys(): # iterate through each key
        adj_price_dict[key].rename(columns={"5. adjusted close":key+"_adjusted_close"},inplace=True)
        adj_close_df = pd.concat([adj_close_df, adj_price_dict[key][key+"_adjusted_close"]], axis=1) 
        #add the different columns into one dataframe
    
    adj_close_df = adj_close_df.apply(pd.to_numeric)
    return adj_close_df


In [7]:
price_df = get_adj_close_df(price_dict)

### Calculate Return for Symbols
Prices are statistically relatively uninteresting as they tend to have a mean and standard-deviation with a trend over time. Returns on the other hand are often stationary which makes statistical tendencies like the mean and the standard deviation far more descriptive than with non stationary timeseries.
This is why we care about returns far more than about actual closing prices.

In [8]:
def get_return_df(adj_close_df,symbol_list):
    manip_df = copy.deepcopy(adj_close_df)
    return_df = manip_df.pct_change()
    return_df.rename(columns={symbol+"_adjusted_close":symbol+"_return" for symbol in symbol_list},inplace=True)
    return return_df


### Helping with Diversification:
- get_diversification_candidates(corr_df, epsilon)
If we trade a portfolio of stocks to be diversified we want stocks with 0 return correlation.
The above method provides a dataframe in which every stock is, in an "epsilon interval" around 0.

- get_neg_corr(corr_df)
If we want to hedge a position we will want negative price correlation in our stocks.
This method returns us a dataframe in which only stocks with negative price correlation are present.



In [9]:
return_df = get_return_df(price_df,symbol_list)
corr_df = return_df.corr()

In [10]:
def get_diversification_candidates(corr_df,epsilon):
    div_df = corr_df.mask(np.logical_not((corr_df < epsilon) & (corr_df > -epsilon))).dropna(axis = 0,how="all").dropna(axis = 1, how="all")
    return div_df

def get_neg_corr(corr_df):
    div_df = corr_df.mask(np.logical_not(corr_df < 0)).dropna(axis = 0,how="all").dropna(axis = 1, how="all")
    return div_df


### Plotting Capabilities

In [11]:
def plot_heatmap(corr_df):
    sns.heatmap(corr_df,cmap='viridis',vmin=-1,vmax=1)

In [12]:
def plot_return_distributions(symbols):
    sym_ret = [symbol+"_return" for symbol in symbols]
    plot_df = get_return_df(price_df,symbols)
    plot_df[sym_ret].plot.kde()
    
    

In [13]:
 def candlestick_chart(symbol):
        ohlc_df = get_ohlc_data(price_dict,symbol)
        mpf.plot(ohlc_df, type= "candle", volume=True)

In [14]:
#price_dict = load_adj_price_dict()
#return_df = calculate_return_df(construct_adj_close_df(price_dict),symbol_list)
#plot_heatmap(return_df.corr())


### Future references:

- Look into Chapter 8 of Python for Finance
- Look into Chapter 5 of Mastering Pandas for Finance
- Mastering Python for Finance seems a lil over my head for now
- Python for Finance (O'Riley) Chapter 15 + seem very interesting
