In [None]:
%load_ext watermark
# %reload_ext watermark
%watermark -p pandas,numpy,datetime,matplotlib,pandas_datareader,fix_yahoo_finance

In [None]:
import pandas as pd
import numpy as np
import datetime
import matplotlib.pyplot as plt

In [None]:
# import data
from pandas_datareader import data as pdr
import fix_yahoo_finance

aapl = pdr.get_data_yahoo('AAPL',
                          start = datetime.datetime(2006, 10, 1),
                          end = datetime.datetime(2012, 1, 1))
aapl.head()

In [None]:
# alternately import data
import quandl 
aapl = quandl.get("WIKI/AAPL", start_date = "2006-10-01", end_date = "2012-01-01")
aapl.head()

In [None]:
# alternately, one can load in a data set that has been retrieved already from Yahoo! Finance
aapl = pd.read_csv("https://s3.amazonaws.com/assets.datacamp.com/blog_assets/aapl.csv", header = 0, index_col = 0, names = ['Open', 'High', 'Low', 'Close', 'Volume', 'Adj Close'], parse_dates = True)
aapl.head()

In [None]:
# inspect the first rows of November-December 2006
print(aapl.loc[pd.Timestamp('2006-11-01'):pd.Timestamp('2006-12-31')].head(n = 2))

# inspect the first rows of 2007
print(aapl.loc['2007'].head(n = 2))

# inspect November 2006
print(aapl.iloc[22:43])

# inspect the 'Open' and 'Close' values at 2006-11-01 and 2006-12-01
print(aapl.iloc[[22, 43], [0, 3]])

In [None]:
# sample 20 rows
sample = aapl.sample(10)

# print `sample`
print(sample)

# resample to monthly level
monthly_aapl = aapl.resample('M').mean()

# print `monthly_aapl`
print(monthly_aapl)

print(aapl.asfreq("M", method = "bfill"))

In [None]:
# add a column `diff` to `aapl`
aapl['diff'] = aapl.Open - aapl.Close

print(aapl['diff'].head())

# delete the new `diff` column
del aapl['diff']

In [None]:
# plot the closing prices for `aapl`
aapl['Close'].plot(grid = True)
plt.show()

In [None]:
#################################
#                               #
#   common financial analysis   #
#                               #
#################################

# assign `Adj Close` to `daily_close`
daily_close = aapl[['Adj Close']]

# daily returns
daily_pct_c = daily_close.pct_change()

# replace NA values with 0
daily_pct_c.fillna(0, inplace = True)

# inspect daily returns
print(daily_pct_c)

# daily log returns
daily_log_returns = np.log(daily_close.pct_change() + 1)

# print daily log returns
print(daily_log_returns)

In [None]:
# resample `aapl` to business months, take last observation as value
monthly = aapl.resample('BM').apply(lambda x: x[-1])

# calculate the monthly percentage change
monthly.pct_change()

# resample `aapl` to quarters, take the mean as value per quarter
quarter = aapl.resample("4M").mean()

# calculate the quarterly percentage change
quarter.pct_change()

In [None]:
# alternately to using pct_change() by using shift(1) function
# daily returns
daily_pct_c = daily_close / daily_close.shift(1) - 1

# print `daily_pct_c`
print(daily_pct_c)

# daily log returns
daily_log_returns_shift = np.log(daily_close / daily_close.shift(1))

# print daily log returns
print(daily_log_returns_shift)

In [None]:
# plot the distribution of `daily_pct_c`
daily_pct_c.hist(bins = 50)
plt.show()

# summary statistics
print(daily_pct_c.describe())

In [None]:
# calculate the cumulative daily returns
cum_daily_return = (1 + daily_pct_c).cumprod()

# print `cum_daily_return`
print(cum_daily_return)

In [None]:
# plot the cumulative daily returns
cum_daily_return.plot(figsize = (12, 8))
plt.show()

In [None]:
# resample the cumulative daily return to cumulative monthly return
cum_monthly_return = cum_daily_return.resample("M").mean()

# print the `cum_monthly_return`
print(cum_monthly_return)

In [None]:
### fetch data for other stocks for comparison

# ticker = symbol of the stock
def get(tickers, startdate, enddate):
    def data(ticker):
        return (pdr.get_data_yahoo(ticker, start = startdate, end = enddate))
    datas = map(data, tickers)
    return(pd.concat(datas, keys = tickers, names = ['Ticker', 'Date'])) # tickers is the new (outer)key of the composite key, which is the only key element required to be put

tickers = ['AAPL', 'MSFT', 'IBM', 'GOOG']
all_data = get(tickers, datetime.datetime(2006, 10, 1), datetime.datetime(2012, 1, 1))
all_data.head()

In [None]:
# alternately, one can load in a data set that has been retrieved already from Yahoo! Finance
all_data = pd.read_csv("https://s3.amazonaws.com/assets.datacamp.com/blog_assets/all_stock_data.csv", index_col = [0, 1], header = 0, parse_dates = [1])
all_data.head()

In [None]:
# plot 4 distributions in 2 x 2 subplots
daily_close_px = all_data[['Adj Close']].reset_index().pivot('Date', 'Ticker', 'Adj Close')

# calculate the daily percentage change for `daily_close_px`
daily_pct_change = daily_close_px.pct_change()

# plot the 4 distributions
daily_pct_change.hist(bins = 50, sharex = True, figsize = (12, 8))
plt.show()

In [None]:
# plot a scatterplot matrix with the `daily_pct_change` data
pd.plotting.scatter_matrix(daily_pct_change, diagonal = 'kde', alpha = 0.1, figsize = (12, 12))
plt.show()

In [None]:
### moving windows

# isolate the adjusted closing prices
adj_close_px = aapl['Adj Close']

# calculate the moving average
moving_avg = adj_close_px.rolling(window = 40).mean()

# inspect the result
moving_avg[-10:]

In [None]:
# short moving window rolling mean
aapl['42'] = adj_close_px.rolling(window = 42).mean()

# long moving window rolling mean
aapl['252'] = adj_close_px.rolling(window = 252).mean()

# plot the adjusted closing price, the short and long windows of rolling means
aapl[['Adj Close', '42', '252']].plot()
plt.show()

In [None]:
### volatility calculation

# define the minumum of periods to consider
min_periods = 75

# Calculate the volatility
vol = daily_pct_change.rolling(min_periods).std() * np.sqrt(min_periods)

# plot the volatility
vol.plot(figsize = (10, 8))
plt.show()

In [None]:
### OLS regression - MSFT returns ~ AAPL returns regressed

# import the `api` model of `statsmodels` under alias `sm`
import statsmodels.api as sm
from pandas import tseries
from pandas.core import datetools

# isolate the adjusted closing price
all_adj_close = all_data[['Adj Close']]

# calculate the returns
all_returns = np.log(all_adj_close / all_adj_close.shift(1))

# isolate the AAPL returns
aapl_returns = all_returns.iloc[all_returns.index.get_level_values('Ticker') == 'AAPL']
aapl_returns.index = aapl_returns.index.droplevel('Ticker')

# isolate the MSFT returns
msft_returns = all_returns.iloc[all_returns.index.get_level_values('Ticker') == 'MSFT']
msft_returns.index = msft_returns.index.droplevel('Ticker')

# build up a new DataFrame with AAPL and MSFT returns
return_data = pd.concat([aapl_returns, msft_returns], axis = 1)[1:]
return_data.columns = ['AAPL', 'MSFT']

# add a constant
X = sm.add_constant(return_data['AAPL'])

# construct the model
model = sm.OLS(return_data['MSFT'], X).fit()

# print the summary
print(model.summary())

In [None]:
plt.plot(return_data['AAPL'], return_data['MSFT'], 'g.')

ax = plt.axis()
x = np.linspace(ax[0], ax[1] + 0.01)

plt.plot(x, model.params[0] + model.params[1] * x, 'b', lw = 2)

plt.grid(True)
plt.axis('tight')
plt.xlabel('Apple Returns')
plt.ylabel('Microsoft returns')
plt.show()

In [None]:
# rolling smoothed correlation plot with time
return_data['MSFT'].rolling(window = 252).corr(return_data['AAPL']).plot()
plt.show()

In [None]:
### Building A Trading Strategy With Python

# initialise the short and long windows
short_window = 40
long_window = 100

# initialise the `signals` DataFrame with the `signal` column
signals = pd.DataFrame(index = aapl.index)
signals['signal'] = 0.0

# create short simple moving average over the short window
signals['short_mavg'] = aapl['Close'].rolling(window = short_window, min_periods = 1, center = False).mean()

# create long simple moving average over the long window
signals['long_mavg'] = aapl['Close'].rolling(window = long_window, min_periods = 1, center = False).mean()

# create signals
signals['signal'][short_window:] = np.where(signals['short_mavg'][short_window:]
                                            > signals['long_mavg'][short_window:], 1.0, 0.0)

# generate trading orders
signals['positions'] = signals['signal'].diff()

In [None]:
# initialise the plot figure
fig = plt.figure()

# add a subplot and label for y-axis
ax1 = fig.add_subplot(111, ylabel = 'Price in $')

# plot the closing price
aapl['Close'].plot(ax = ax1, color = 'g', lw = 1.25)

# plot the short and long moving averages
signals[['short_mavg', 'long_mavg']].plot(ax = ax1, lw = 1.25)

# plot the buy signals
ax1.plot(signals.loc[signals.positions == 1.0].index,
         signals.short_mavg[signals.positions == 1.0],
         '^', markersize = 10, color = 'm')

# plot the sell signals
ax1.plot(signals.loc[signals.positions == -1.0].index,
         signals.short_mavg[signals.positions == -1.0],
         'v', markersize = 10, color = 'k')

plt.show()

In [None]:
### Backtesting A Strategy

# set the initial capital
initial_capital = float(100000.0)

# create a DataFrame `positions`
positions = pd.DataFrame(index = signals.index).fillna(0.0)

# buy a 100 shares
positions['AAPL'] = 100 * signals['signal']

# initialise the portfolio with value owned
portfolio = positions.multiply(aapl['Adj Close'], axis = 0)

# store the difference in shares owned
pos_diff = positions.diff()

# add `holdings` to portfolio
portfolio['holdings'] = (positions.multiply(aapl['Adj Close'], axis = 0)).sum(axis = 1)

# add `cash` to portfolio
portfolio['cash'] = initial_capital - (pos_diff.multiply(aapl['Adj Close'], axis = 0)).sum(axis = 1).cumsum()

# add `total` to portfolio
portfolio['total'] = portfolio['cash'] + portfolio['holdings']

# add `returns` to portfolio
portfolio['returns'] = portfolio['total'].pct_change()

In [None]:
# initialise the plot figure
fig = plt.figure()

ax1 = fig.add_subplot(111, ylabel = 'Portfolio value in $')

# plot the equity curve in dollars
portfolio['total'].plot(ax = ax1, lw = 1.25)

# plot the "buy" trades against the equity curve
ax1.plot(portfolio.loc[signals.positions == 1.0].index,
         portfolio.total[signals.positions == 1.0],
         '^', markersize = 10, color = 'm')

# plot the "sell" trades against the equity curve
ax1.plot(portfolio.loc[signals.positions == -1.0].index,
         portfolio.total[signals.positions == -1.0],
         'v', markersize = 10, color = 'k')

plt.show()

In [None]:
# Backtesting with Quantopian
def initialize(context):
    context.sym = symbol('AAPL')
    context.i = 0

def handle_data(context, data):
    # skip first 300 days to get full windows
    context.i += 1
    if context.i < 300:
        return

    # compute averages
    # history() has to be called with the same params
    # from above and returns a pandas dataframe
    short_mavg = data.history(context.sym, 'price', 100, '1d').mean()
    long_mavg = data.history(context.sym, 'price', 300, '1d').mean()

    # trading logic
    if short_mavg > long_mavg:
        # order_target orders as many shares as needed to
        # achieve the desired number of shares
        order_target(context.sym, 100)
    elif short_mavg < long_mavg:
        order_target(context.sym, 0)

    # save values for later inspection
    record(AAPL = data.current(context.sym, "price"),
           short_mavg = short_mavg,
           long_mavg = long_mavg)

In [None]:
### Evaluating Moving Average Crossover Strategy

# Sharpe ratio

# isolate the returns of your strategy
returns = portfolio['returns']

# annualised Sharpe ratio
sharpe_ratio = np.sqrt(252) * (returns.mean() / returns.std())

# print the Sharpe ratio
print(sharpe_ratio)

In [None]:
# Maximum Drawdown

# define a trailing 252 trading day window
window = 252

# calculate the max drawdown in the past window days for each day
rolling_max = aapl['Adj Close'].rolling(window, min_periods = 1).max()
daily_drawdown = aapl['Adj Close'] / rolling_max - 1.0

# calculate the minimum (negative) daily drawdown
max_daily_drawdown = daily_drawdown.rolling(window, min_periods = 1).min()

# plot the results
daily_drawdown.plot()
max_daily_drawdown.plot()
plt.show()

In [None]:
# Compound Annual Growth Rate (CAGR)

# get the number of days in `aapl`
days = (aapl.index[-1] - aapl.index[0]).days

# calculate the CAGR 
cagr = ((((aapl['Adj Close'][-1]) / aapl['Adj Close'][1])) ** (365.0/days)) - 1

# Print CAGR
print(cagr)