# Whale off the Port(folio)
 Evaluate the performance among various algorithmic, hedge, and mutual fund portfolios and compare them against the S&P 500.


In [2]:
import pandas as pd
import numpy as np
import datetime as dt
from pathlib import Path

%matplotlib inline

### Data Cleaning
Cleaning Dataframes then combining them into a single DataFrame.

Files: 

1.whale_returns.csv

2.algo_returns.csv

3.sp500_history.csv

### Whale Returns

Reading the Whale Portfolio daily returns and clean the data. (Like you're cleaning a whale in the 1800s)

In [5]:
# Reading the Whale Returns

whale_returns_csv = Path(Resources/whale_returns.csv)
whale_returns_df = pd.read_csv(whale_returns_csv, index_col="Date", parse_dates=True, infer_datetime_format=True)

whale_returns_df.sort_index(ascending = True, inplace = True)
whale_returns_df.head()

NameError: name 'Resources' is not defined

In [None]:
#nulls 
whale_returns_df.isnull().mean() * 100
whale_returns_df.isnull().sum()

### Algorithmic Daily Returns

Read the algorithmic daily returns/clean data

In [None]:
#Reading Algo Returns 
algo_returns_csv = Path("Resources/algo_returns.csv")
algo_returns_df = pd.read_csv(algo_returns_csv)
algo_returns_df['Date'] = pd.to_datetime(algo_returns_df["Date"], dayfirst = True)
algo_returns_df.set_index(algo_returns_df['Date'], inplace=True)
algo_returns_df.drop(columns=['Date'],inplace=True)
algo_returns_df.head()


In [11]:
#count nulls
algo_returns_df.isnull().mean() *100
algo_returns_df.isnull().sum()

In [None]:
#drop nulls
algo_returns_df = algo_returns_df.dropna()

### S&P 500 Returns
Read S&P Historic Closing Prices and create new daily returns 

In [None]:
#Reading S&P Data
sp500_history_csv = Path("Resources/sp500_history.csv")
sp500_history_df = pd.read_csv(sp500_history_csv)
sp500_history_df['Date'] = pd.to_datetime(sp500_history_df["Date"], dayfirst = True)
sp500_history_df.set_index(sp500_history_df['Date'], inplace=True)
sp500_history_df.drop(columns=['Date'],inplace=True)
sp500_history_df['Close'] = sp500_history_df['Close'].str.replace('$', '')
sp500_history_df['Close']
sp500_history_df = sp500_history_df.sort_values("Date", ascending=True)
sp500_history_df.head()

In [None]:
#Check Data
sp500_history.dtypes

In [12]:
#Fix Data
sp500_history["Close"]= sp500_history["Close"].str.replace('$','').astype("float")
sp500_history.sort_index(inplace = True)
sp500_history

In [None]:
# Calculate Daily Returns
sp500_daily_returns = sp500_history.pct_change()

In [None]:
# Drop nulls
sp500_daily_returns.dropna(inplace = True)
sp500_daily_returns.head()

In [None]:
#Rename Column
sp500_daily_returns.rename(columns = {"Close":"sp500"}, inplace = True)

### Combine Whale, Algorithmic, and S&P 500 Returns 

In [None]:
#Concatenate
combined_df = pd.concat([whale_returns, algo_returns,sp500_daily_returns], axis = "columns",join = "inner")

### Portfolio Analysis 
Calculate/Visualize performance & risk metrics

#### Performance 
Calculate/Plot Daily & Cumulative Returns

In [7]:
#Daily Returns
combined_df.plot( title="Daily Returns" , figsize=(20,10))

NameError: name 'df_daily' is not defined

In [None]:
#Cumulative Returns
combined_df_cumulative_returns = (1 + combined_portfolios_df).cumprod()
combined_df_cumulative_returns.head()
combined_df_cumulative_returns.plot(figsize=(20,10))

### Risk
Determine the risk of each portfolio:

    1.Create a box plot for each portfolio.
    2.Calculate the standard deviation for all portfolios
    3.Determine which portfolios are riskier than the S&P 500
    4.Calculate the Annualized Standard Deviation

In [None]:
#Box Plot
combined_df.plot.box(title= "Risk", figsize=(20,10))

In [None]:
#Standard Deviations. 
# Calculate the standard deviation for each portfolio. Which portfolios are riskier than the S&P 500?
df_daily_std = pd.DataFrame(combined_df.std()).rename(columns = {0:"std"})
df_daily_std

In [None]:
#Determine which porfolios are risker than S&P 500
higher_std = df_daily_std[df_daily_std["std"] > df_daily_std.loc["sp500", "std"]]
higher_std

In [None]:
# Calculate the annualized standard deviation (252 trading days)
df_annualized_std = df_daily_std*np.sqrt(252)
df_annualized_std

### Rolling Statistics
Risk changes over time. Analyze the rolling statistics for Risk and Beta.

  1.Calculate and plot the rolling standard deviation for the S&P 500 using a 21-day window
  2.Calculate the correlation between each stock to determine which portfolios may mimick the S&P 500
  3.Choose one portfolio, then calculate and plot the 60-day rolling beta between it and the S&P 500


In [None]:
# Calculate and plot the rolling standard deviation for the S&PP 500 using a 21 day window
sp500_rolling_21 = combined_df[["sp500"]].rolling(window = 21).std()
sp500_rolling_21.plot(figsize = (20,10))

In [13]:
# Correlation
correlation_sp500 = pd.DataFrame(combined_df.corr()).loc[:,"sp500"]
correlation_sp500[correlation_sp500==correlation_sp500[correlation_sp500<1].max()]

NameError: name 'correlation_sp500' is not defined

In [None]:
combined_df.plot(kind="scatter", y = "Algo 2", x = "sp500", figsize = (20,10))

In [None]:
# Calculate Beta for a single portfolio compared to the total market (S&P 500)
rolling_covariance = combined_df['BERKSHIRE HATHAWAY INC'].rolling(window=60).cov(combined_df['sp500'])
rolling_variance = combined_df['sp500'].rolling(window=60).var()
rolling_beta = rolling_covariance / rolling_variance
rolling_beta.plot(figsize=(20, 10), title='Rolling 60-Day Beta of BH')

### Challenge: Exponentially Weighted Average
An Alternate way to calculate a rolling window to take the exponentially weighted moving average. 

In [None]:
combined_df_ewm = combined_df_ewm.ewm(span = 21,adjust = False).mean()
(1+combined_df_ewm).cumprod().plot(figsize = (20,10))

### Sharpe Ratios
In reality, investment managers and thier institutional investors look at the ratio of return-to-risk, and not just returns alone. (After all, if you could invest in one of two portfolios, each offered the same 10% return, yet one offered lower risk, you'd take that one, right?)

Calculate and plot the annualized Sharpe ratios for all portfolios to determine which portfolio has the best performance

In [None]:
#Annualized Sharpe Ratios
sharpe_ratios = combined_df.mean(*252/(combined_df.std()*no.sqrt(252))
sharpe_ratios

In [None]:
# Visualize the sharpe ratios as a bar plot
sharpe_ratios.plot(kind = "bar", title = "Sharpe Ratios")

### Custom Portfolio

3 (meme) stocks have been chosen

In [10]:
#AMC
AMC_history = pd.read_csv(Path("Resources/AMC2019.csv"), index_col="Date", 
                           parse_dates = True, infer_datetime_format= True)
AMC_history.rename(columns = {"Close":"AMC"}, inplace = True)
AMC_history.head()

FileNotFoundError: [Errno 2] No such file or directory: 'Resources\\AMC2021.csv'

In [None]:
#AMD
AMD_history = pd.read_csv(Path("Resources/AMD2019.csv"), index_col="Date", 
                           parse_dates = True, infer_datetime_format= True)
AMD_history.rename(columns = {"Close":"AMD"}, inplace = True)
AMD_history.head()

In [None]:
#GME
GME_history = pd.read_csv(Path("Resources/GME2019.csv"), index_col="Date", 
                           parse_dates = True, infer_datetime_format= True)
GME_history.rename(columns = {"Close":"GME"}, inplace = True)
GME_history.head()

In [9]:
#Everything together
meme_stocks =pd.concat((AMC_history, AMD_history, GME_history), axis = 1, join= "inner")
meme_stocks.sort_index(ascending = True, inplace = True)

meme_stocks

NameError: name 'AMC_history' is not defined

In [None]:
#Reset
meme_stocks.index = meme_stocks.index.normalize()

In [None]:
#Drop Nulls
meme_stocks.dropna(inplace=True)

### Calculate the weighted returns for the portfolio assuming an equal number of shares for each stock

In [None]:
# Calculate weighted portfolio returns
weights = [1/3, 1/3, 1/3]
meme_porfolio = meme_stocks.dot(weights)
meme_portfolio_returns = meme_portfolio.pct_change()
meme_portfolio_returns.dropna(inplace = True)
meme_portfolio_returns.head()

### Join your portfolio returns to the Dataframe that contains all of the porfolio returns

In [None]:
combined_portfolios_df = pd.concat((meme_portfolio_returns,combined_df), axis = 1, join = "inner")
portfolio_returns.rename(columns = {0:"meme_portfolio"}, inplace = True)
combined_portfolios_df

In [None]:
# Only compare dates where return data exists for all the stocks (drop NaNs)
combined_portfolios_df.dropna(inplace=True)
combined_portfolios_df

### Re-run the performance and risk analysis with your portfolio to see how it compares to others

In [14]:
# Risk
portfolios_std = pd.DataFrame(combined_portfolios_df.std()).rename(columns = (0:"std"))

portfolio_std

In [None]:
# Rolling
meme_portfolio_rolling_15 = portfolio_returns[["meme_portfolio"]].rolling(window = 15).std()
meme_portfolio_rolling_60 = portfolio_returns[["meme_portfolio"]].rolling(window = 60).std()
meme_portfolio_rolling_120 = portfolio_returns[["meme_portfolio"]].rolling(window = 120).std()
ax = meme_portfolio_rolling_15.plot(figsize = (20,10), title = 'meme portfolio std')
meme_portfolio_rolling_60.plot(ax = ax)
meme_portfolio_rolling_120.plot(ax = ax)
ax.legend(['rolling_15','rolling_60','rolling_120'])

In [None]:
meme_portfolio_rolling_15 = portfolio_returns[["meme_portfolio"]].rolling(window = 15).mean()
meme_portfolio_rolling_60 = portfolio_returns[["meme_portfolio"]].rolling(window = 60).mean()
meme_portfolio_rolling_120 = portfolio_returns[["meme_portfolio"]].rolling(window = 120).mean()
ax = meme_portfolio_rolling_15.plot(figsize = (20,10), title = 'meme portfolio std')
meme_portfolio_rolling_60.plot(ax = ax)
meme_portfolio_rolling_120.plot(ax = ax)
ax.legend(['rolling_15','rolling_60','rolling_120'])

In [None]:
#Beta
rolling_covariance = meme_portfolio['meme_portfolio'].rolling(window=60).cov(meme_portfolio['sp500'])
rolling_variance = meme_portfolio['sp500'].rolling(window=60).var()
rolling_beta = rolling_covariance / rolling_variance
rolling_beta.plot(figsize=(20, 10), title='Rolling 60-Day Beta of meme portfolio')

In [None]:
# Calculate Annualzied Sharpe Ratios
combined_portfolios_df_sharpe_ratio = (combined_portfolios_df.mean()*252) / (combined_portfolios_df.std() * np.sqrt(252))

combined_portfolios_df_sharpe_ratio

In [None]:
# Visualize the sharpe ratios as a bar plot
combined_portfolios_df_sharpe_ratio.plot(kind="bar", title="Sharpe Ratios")

###  Include correlation analysis to determine which stocks (if any) are correlated

In [None]:
correlation_meme_portfolio = meme_portfolio.corr().loc[:,"Meme Portfolio"]

correlation_my_portfolio[
    correlation_meme_portfolio == correlation_meme_portfolio[cprrelation_meme_portfolio<1].max()]