In [None]:
#imports
from tqdm import tqdm

import numpy as np
import pandas as pd

import yfinance as yf

import matplotlib.pyplot as plt
%matplotlib inline

import plotly.express as px
from plotly.offline import init_notebook_mode
init_notebook_mode(connected=True)

In [2]:
#define constants
risk_free_rate = 0.03
trial_n = 100000

tickers = ['NKE', 'MSFT', 'DIS', 'COST']

start = '2012-1-1'
end = '2022-1-1'

In [None]:
#download data
df_close = yf.download(tickers, start, end, interval='1mo')['Close']
# output [*********************100%***********************]  4 of 4 completed
df_close.to_csv('./data/df_close.csv')

In [None]:
df_close.shape
# output (120, 4)

In [None]:
df_close.head()

In [None]:
df_close.tail()

In [None]:
df_close.isnull().sum()

In [None]:
if df_close.isnull().values.any():
    df_close.fillna(method='ffill', inplace = True) #front fills
    df_close.fillna(method='bfill', inplace = True) #back fills

df_close.isnull().sum()

In [9]:
def plot_line(df, xlabel, ylabel, title):
    # matplotlib
    ax = df.plot(title = title, figsize=(16, 8), ax = None)
    ax.set_xlabel(xlabel)
    ax.set_ylabel(ylabel)
    ax.axhline(y=0, color='black')
    ax.legend(loc='upper left')
    plt.show()

    # plotly
    fig = px.line(df, title=title)
    fig.update_layout(xaxis_title=xlabel, yaxis_title=ylabel, showlegend=True, template='seaborn')
    fig.show()


In [None]:
plot_line(df_close, 'Date', 'Stock Price', 'Close Price')

In [None]:
df_pct_change = df_close.pct_change().iloc[1:] 
#.iloc[1:] gets rid of NaN value row because of indexing
df_pct_change.head()

In [None]:
plot_line(df_pct_change, 'Date', 'Percent', 'Percent Change (monthly return)')

In [None]:
mean_return = df_pct_change.mean() * 12
mean_return

In [None]:
var_risk = df_pct_change.var() * 12
var_risk

In [None]:
cov_mat = df_pct_change.cov() * 12
cov_mat

In [None]:
corr_mat = df_pct_change.corr()
corr_mat

In [17]:
def plot_matrix(df, title):
    # matplotlib
    fig, ax = plt.subplots()
    im = ax.imshow(df)
    plt.title(title)
    plt.colorbar(im)
    column_names = list(df.columns.values)
    plt.xticks(np.arange(len(column_names)), column_names)
    plt.yticks(np.arange(len(column_names)), column_names)
    plt.show()

    # plotly
    fig = px.imshow(df, title=title)
    fig.show()

In [None]:
plot_matrix(corr_mat, 'Correlation Heatmap')

In [None]:
weight = 1/len(tickers)
eq_weights = [weight for i in tickers]
eq_weights

# output [0.25, 0.25, 0.25, 0.25]

In [None]:
eq_return = eq_weights @ (mean_return)
eq_return

# output 0.21426419041415923

In [None]:
eq_risk = ((eq_weights @ cov_mat) @ eq_weights)**(1/2)
eq_risk

# output 0.14462260526809836

In [None]:
eq_sharpe = (eq_return - risk_free_rate)/eq_risk
eq_sharpe

# output = 1.2741036580870198

In [None]:
efficient_frontier_data = []
for trial in tqdm(range(trial_n)):
    t_weight = np.random.random(len(tickers, ))
    t_weights = t_weight/sum(t_weight)
    t_return = t_weights @ (mean_return)
    t_risk = ((t_weights @ cov_mat) @ t_weights)**(1/2)
    t_sharpe = (t_return - risk_free_rate)/t_risk
    efficient_frontier_data.append([t_weights, t_risk, t_return, t_sharpe])

efficient_frontier_data = pd.DataFrame(efficient_frontier_data, columns=['Weights', 'Risk', 'Return', 'Sharpe'])
efficient_frontier_data

In [None]:
i_max_sharpe = efficient_frontier_data['Sharpe'].idxmax()
max_sharpe = efficient_frontier_data.iloc[i_max_sharpe, :]
max_sharpe

In [None]:
i_min_risk = efficient_frontier_data['Risk'].idxmin()
min_risk = efficient_frontier_data.iloc[i_min_risk, :]
min_risk

In [None]:
i_max_return = efficient_frontier_data['Return'].idxmax()
max_return = efficient_frontier_data.iloc[i_max_return, :]
max_return

In [27]:
from plotly import graph_objects as go

def addMarker(fig, x, y, color):
    fig.add_trace(
        go.Scatter(
            x=[x],
            y=[y],
            mode="markers",
            marker_symbol='star',
            marker_size=15,
            marker_color=color
        )
    )


In [None]:
def plot_efficient_frontier():
    #matplotlib
    plt.figure(figsize=(16,8))
    plt.scatter(efficient_frontier_data['Risk'], efficient_frontier_data['Return'], c=efficient_frontier_data['Sharpe'], cmap='viridis')
    plt.colorbar(label='Sharpe Ratio')
    plt.xlabel('Risk')
    plt.ylabel('Return')
    plt.scatter(max_sharpe['Risk'], max_sharpe['Return'],c='green', s=50)
    plt.scatter(min_risk['Risk'], min_risk['Return'],c='blue', s=50)
    plt.scatter(max_return['Risk'], max_return['Return'],c='red', s=50)
    plt.show()

    #plotly
    fig = px.scatter(efficient_frontier_data, x='Risk', y='Return', title='Efficient Frontier', color='Risk')
    addMarker(fig, max_sharpe['Risk'], max_sharpe['Return'], 'Dark Green')
    addMarker(fig, min_risk['Risk'], min_risk['Return'], 'Cyan')
    addMarker(fig, max_return['Risk'], max_return['Return'], 'Red')
    fig.show()

plot_efficient_frontier()

## Process data for recent past year for same stocks and the spy

In [None]:
tickers = tickers

start = '2022-1-1'
end = '2023-1-1'

df_recent_close = yf.download(tickers, start, end, interval='1d')['Close']
df_recent_close.to_csv('./data/df_recent_close.csv')

df_spy_close = yf.download('SPY', start, end, interval='1d')['Close'].to_frame('SPY')
df_spy_close.to_csv('./data/df_spy_close.csv')

## Percent Change for recent past year

In [None]:
recent_pct_change = df_recent_close.pct_change().iloc[1:] #get rid of last row
recent_mean_return = recent_pct_change.mean() * 365
recent_risk = recent_pct_change.var() * 365 
recent_cov_mat = recent_pct_change.cov() * 365

recent_risk

In [None]:
plot_line(recent_pct_change, 'Date', 'Percent', 'Percent Change (daily return)')

In [None]:
spy_pct_change = df_spy_close.pct_change().iloc[1:] #get rid of last row
spy_mean_return = spy_pct_change.mean() * 365
spy_risk = spy_pct_change.var() * 365

In [None]:
plot_line(spy_pct_change, 'Date', 'Percent', 'Percent Change (daily return)')

## Cumulative Product Past Recent Year

In [None]:
recent_cumulative_product = recent_pct_change.add(1).cumprod().sub(1)
recent_cumulative_product.tail()

In [None]:
plot_line(recent_cumulative_product, 'date', 'cumulative product', 'cumulative product' )

In [None]:
spy_cumulative_product = spy_pct_change.add(1).cumprod().sub(1)
spy_cumulative_product.tail()

In [None]:
plot_line(spy_cumulative_product, 'date', 'cumulative product', 'cumulative product')

In [None]:
recent_return = max_sharpe['Weights'] @ (recent_mean_return)
recent_return

In [None]:
spy_recent_return = spy_mean_return['SPY']
spy_recent_return

In [None]:
recent_risk = ((max_sharpe['Weights'] @ recent_cov_mat) @ max_sharpe['Weights'])**(1/2)
recent_risk

In [None]:
spy_risk = spy_risk['SPY']
spy_risk