In [None]:
import sys
sys.path.insert(0, 'C:\\Users\\kelvi\\mustafa\\artificial-intelligence-for-trading')

In [None]:
sys.executable

# Exploratory Data Analysis

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

plt.rcParams['figure.figsize'] = (20,5)

In [None]:
data = pd.read_csv('eod-quotemedia.csv', parse_dates=['date'], index_col=False)
data.head()

Represent data in 3D

In [None]:
data.index

In [None]:
data.info()

In [None]:
data3d = data.reset_index().pivot(index='date', columns='ticker', values='adj_close')

In [None]:
data3d.index

In [None]:
data3d.iloc[:3, :5]

Resample data to Month End
* Using resample needs the index to be a datetime index.
* The `Resample()` function will group data into monthly buckets.
* We can't use `OHLC` function since our data only contains adjusted closing prices. Therefore we'll use `last()` function to get the last observation of each month.

In [None]:
resampled_data = data3d.resample('ME').last()

In [None]:
resampled_data.iloc[:5, :3]

In [None]:
resampled_data.info()

In [None]:
apple_ticker = 'AAPL'
# resampled_data.loc[:, apple_ticker]
resampled_data[apple_ticker].head(10)

In [None]:
plt.plot(resampled_data.loc[:, apple_ticker])

In [None]:
plt.plot(resampled_data.loc[:, apple_ticker], label='Monthly Close')
plt.plot(data3d.loc[:, apple_ticker], label='Daily Adjusted Close')
plt.legend()

# Creating Interactive charts using `Plotly`

In [None]:
import plotly.express as px
import plotly.graph_objects as go
import plotly.offline as offline_mode

offline_mode.init_notebook_mode(connected=True)

In [None]:
resampled_data.columns

In [None]:
def plot_stock(name, df):   
    return px.line(title=name, x=df.index, y=df )

In [None]:
# px.scatter(x=resampled_data.loc[:, apple_ticker].index, y=resampled_data.loc[:, apple_ticker])
plot_stock('Monthly close', resampled_data.loc[:, apple_ticker])

In [None]:
fig = px.line(title='{} close'.format(apple_ticker), x=resampled_data.loc[:, apple_ticker].index, y=resampled_data.loc[:, apple_ticker])
fig.add_scatter(x=data3d.loc[:, apple_ticker].index, y=data3d.loc[:, apple_ticker])
fig.show()

In [None]:
def _generate_traces(name_df):
    traces = []

    for name,df in name_df:
        traces.append(go.Scatter(
            name=name,
            x = df.index,
            y = df,
            mode = 'lines'
        ))
    return traces
    

In [None]:
traces = _generate_traces([('Monthly close',resampled_data.loc[:, apple_ticker]), ('Adjusted Daily close',data3d.loc[:, apple_ticker])])
offline_mode.iplot({"data":traces, 'layout':go.Layout(title="{} close".format(apple_ticker))})

# Calculate Returns

In [None]:
def calc_raw_returns(df, n_shift=1):
    # current price divided by previous price. Then minus 1 from the result.
    stock_returns = (df / df.shift(n_shift)) -1
    return stock_returns

In [None]:
raw_returns = calc_raw_returns(resampled_data)

In [None]:
plot_stock('{} raw_returns'.format(apple_ticker), raw_returns.loc[:,apple_ticker])

In [None]:
def calc_log_returns(df, nshift=1):
    """
    $$R_t = log_e(P_t) - log_e(P_{t-1})$$
    """
    # log of loookahead price minus log of current price
    log_returns = np.log(df) - np.log(df.shift(nshift))
    return log_returns

In [None]:
log_returns = calc_log_returns(resampled_data)

In [None]:
plot_stock('{} log_returns'.format(apple_ticker), log_returns.loc[:,apple_ticker])

In [None]:
prev_returns = log_returns.shift(1)
lookahead_returns = log_returns.shift(-1)

In [None]:
traces = _generate_traces([('previous log returns',prev_returns.loc[:, apple_ticker]), ('current log returns',log_returns.loc[:, apple_ticker])])
offline_mode.iplot({"data":traces, 'layout':go.Layout(title="{} previous vs current log returns".format(apple_ticker))})

In [None]:
traces = _generate_traces([('current log returns',log_returns.loc[:, apple_ticker]), ('lookahead log returns',lookahead_returns.loc[:, apple_ticker])])
offline_mode.iplot({"data":traces, 'layout':go.Layout(title="{} current vs lookahead log returns".format(apple_ticker))})

### Generate a Trading Signal

Produce long ("buy") and short ("sell") positions.

Select the top performing stocks for the long portfolio, and the bottom performing stocks for the short portfolio.

Implement the `get_top_n` function to get the top performing stocks for each month. Get the top performing stocks from `prev_returns` by assigning them a value of 1. For all other stocks, give them a value of 0.

In [None]:
def get_top_n(df, n):
    top_stock = df.copy()

    # for index, series in df.iterrows():
    #     top_stock.loc[index, series.index] = series.isin(df.loc[index, series.nlargest(n).index])
        
    return (top_stock.rank(1,method='average', numeric_only=False, na_option='keep', ascending=False) <= n).astype(np.int64)

In [None]:
sig = get_top_n(log_returns, 50)
sig

In [None]:
it = log_returns.iterrows()
next(it)

In [None]:
next(it)

In [None]:
import itertools

In [None]:
for index, series in itertools.islice(sig.iterrows(), 48):
    print(f'index: {index} ====> {series.isnull().sum()}')

view Data

In [None]:
# print top tickers
sig.sum().sort_values()

In [None]:
sig.sum().sort_values(ascending=False).index[:5].values.tolist()

In [None]:
def print_top_N(df, title, top_n):
    print('{} Most {}'.format(top_n,title), sep='\n')
    print(" , ".join(df.sum().sort_values(ascending=False).index[:top_n].values.tolist()), end='\n')

In [None]:
long_stocks = get_top_n(log_returns,50)
short_stocks = get_top_n(-1*log_returns,50)
print_top_N(long_stocks, 'Longed stocks', 10)
print_top_N(short_stocks, 'Shorted stocks', 10)

# Cross-sectional Momentum Strategy (Trading Strategy)
also known as equity statistical arbitrage or equity market nautral investing.

NB: Avoid survivorship bias (when testing your strategy, use dataset that contains companies that were part of the stock universe at that time.)

Momentum-based Portfolio.

Assumptions
* Each stock has the same number of dollars invested, so that the portfolio weights for each stock are the same.
* Both the long and short portfolio have the same dollar amount invested (in terms of absolute magnitude), in which case the combination of the long and short portfolios would also be the simple average between the two. where the short is a negative value.
$$(long + short)/2$$

Portfolio Returns

In [None]:
def portfolio_returns(df_long, df_short, lookahead_returns, n_stocks):
    portfolio_returns = ((df_long + (-1*df_short)) * lookahead_returns)/n_stocks
    return portfolio_returns

In [None]:
expected_portfolio_returns = portfolio_returns(long_stocks, short_stocks, lookahead_returns, 100)

In [None]:
expected_portfolio_returns.T.sum()

In [None]:
plot_stock('Portfolio Returns', expected_portfolio_returns.T.sum())

# Statistical Tests

The annualized rate of return allows you to compare the rate of return from this strategy to other quoted rates of return, which are usually quoted on an annual basis. 

In [None]:
expected_portfolio_returns_by_date = expected_portfolio_returns.T.sum()

portfolio_returns_mean = expected_portfolio_returns_by_date.mean()
portfolio_returns_ste = expected_portfolio_returns_by_date.sem()
annualized_rate = (np.exp(portfolio_returns_mean*12) - 1)* 100
print("""
      mean : {:.6f}
      Standard Error: {:6f}
      Annualized Rate: {:.2f}%
      """.format(portfolio_returns_mean,portfolio_returns_ste, annualized_rate))

## t-Test
Our null hypothesis ($H_0$) is that the actual mean return from the signal is zero. 
We'll perform a one-sample, one-sided t-test on the observed mean return, to see if we can reject $H_0$.
* compute the t-statistic, and then find its corresponding p-value. (p-value will indicate the probability of observing a t-statistic equally or more extreme than the one we observed if the null hypothesis were true)

A small p-value means that the chance of observing the t-statistic we observed under the null hypothesis is small, and thus casts doubt on the null hypothesis. It's good practice to set a desired level of significance or alpha ($\alpha$) _before_ computing the p-value, and then reject the null hypothesis if $p < \alpha$.

For this project, we'll use $\alpha = 0.05$, since it's a common value to use.






In [None]:
%%!pip install scipy

In [None]:
from scipy.stats import ttest_1samp

In [None]:
def analyze_alpha(df):
    null_hypothesis = 0.0
    #ttest_1samp performs a two-sided test, so divide the p-value by 2 to get 1-sided p-value
    t_statistic, p_value = ttest_1samp(df, null_hypothesis)
    return t_statistic, p_value*0.5

In [None]:
t_statistic, p_value = analyze_alpha(expected_portfolio_returns_by_date)
print("""
Alpha Analysis:
      t_statistic: {:.6f}
      p_value: {:.6f}
""".format(t_statistic, p_value))

### Question: What p-value did you observe? And what does that indicate about your signal?

Answer : The p_value is greater than $\alpha$=0.05, which is not statistically significant. Therefore, we `fail to reject` the null hypothesis and conclude that the actual mean return from the trading signal is Zero.