# TECHNICAL INDICATOR ANALYSIS
#### Good Indicators:
###### Momentum +
###### advanced Momentum +
###### Alpha Architect Momentum +
###### Trendline +
###### Volatility +
###### TEM +
###### MaxGap -

#### Borderline Indicators:
###### Direction
###### MessageSum

#### Bad Indicators:
###### MoneyflowVolume21d
###### Mean_rev

In [None]:
#quantopian imports
from quantopian.research import run_pipeline
from quantopian.pipeline import Pipeline
from quantopian.pipeline.data.builtin import USEquityPricing
from quantopian.pipeline.filters import QTradableStocksUS

from quantopian.pipeline.factors import Latest
from quantopian.pipeline.data import morningstar, Fundamentals
from quantopian.pipeline.factors import CustomFactor, SimpleMovingAverage, AverageDollarVolume,SimpleBeta, Returns, RSI
from quantopian.pipeline.classifiers.morningstar import Sector
from quantopian.pipeline.classifiers.fundamentals import Sector
from quantopian.pipeline.data.zacks import EarningsSurprises
from quantopian.pipeline.data import factset
from quantopian.pipeline.data.psychsignal import stocktwits

#Python imports
import math
import talib
import numpy as np
import pandas as pd
import pyfolio as pf
from scipy import stats
import matplotlib.pyplot as plt
from sklearn import linear_model, decomposition, ensemble, preprocessing, isotonic, metrics
from scipy.stats.mstats import winsorize
from zipline.utils.numpy_utils import (
    repeat_first_axis,
    repeat_last_axis,
)
from scipy.stats.mstats import gmean
from sklearn.cluster import SpectralClustering
 
from collections import Counter


In [None]:
WIN_LIMIT = 0
N_FACTOR_WINDOW = 5 # trailing window of alpha factors exported to before_trading_start
N_CLUSTERS = 5
TAU = 5
ALPHA_SMOOTH = 1-np.exp(-1.0/TAU)

def preprocess(a):
    
    a = a.astype(np.float64)
    a[np.isinf(a)] = np.nan
    a = np.nan_to_num(a - np.nanmean(a))
    a = winsorize(a, limits=[WIN_LIMIT,WIN_LIMIT])
    
    return preprocessing.scale(a)

"""
potentially change the preprocess to this 
alpha_w2 = alpha_factor2.winsorize(min_percentile=0.02,
                                 max_percentile=0.98,
                                 mask=QTradableStocksUS() & alpha_factor2.isfinite())

alpha_z2 = alpha_w2.zscore()
alpha_weight2 = alpha_z2 / 100.0

outlier_filter2 = alpha_z2.abs() < ZSCORE_FILTER
zero_filter2 = alpha_weight2.abs() > ZERO_FILTER

universe2 = QTradableStocksUS() & \
           outlier_filter2 & \
           zero_filter2
"""


# Momentum

In [None]:
class Momentum(CustomFactor):
    inputs = [USEquityPricing.close]
    window_length = 252

    def compute(self, today, assets, out, close):       
        out[:] = close[-20] / close[0]
        
class AdvancedMomentum(CustomFactor):
        inputs = (USEquityPricing.close, Returns(window_length=126))
        window_length = 252
        window_safe = True
 
        def compute(self, today, assets, out, prices, returns):
            am = np.divide(
            (
            (prices[-21] - prices[-252]) / prices[-252] -prices[-1] - prices[-21]) / prices[-21],
            np.nanstd(returns, axis=0)
            )
            out[:] = (-am)
            
class aa_momentum(CustomFactor):
    """ Alpha Architect - Momentum factor """
    inputs = [USEquityPricing.close,
    Returns(window_length=126)]
    window_length = 252
    def compute(self, today, assets, out, prices, returns):  
            out[:] = ((prices[-21] - prices[-252])/prices[-252] -  
                      (prices[-1] - prices[-21])/prices[-21]) / np.nanstd(returns, axis=0)

In [None]:
def make_pipeline():
    ZSCORE_FILTER = 3 # Maximum number of standard deviations to include before counting as outliers
    ZERO_FILTER = 0.001 # Minimum weight we allow before dropping security

   # Setting up the variables
    alpha_factor = Momentum()
 
    # Standardized logic for each input factor after this point
    alpha_w = alpha_factor.winsorize(min_percentile=0.02,
                                     max_percentile=0.98,
                                     mask=QTradableStocksUS() & alpha_factor.isfinite())
 
    alpha_z = alpha_w.zscore()
    alpha_weight = alpha_z / 100.0
 
    outlier_filter = alpha_z.abs() < 3
    zero_filter = alpha_weight.abs() > 0.001
 
    universe1 = QTradableStocksUS() & \
               outlier_filter & \
               zero_filter
    
    # ALPHA FACTOR 2
    alpha_factor2 = AdvancedMomentum()
    
    # Standardized logic for each input factor after this point
    alpha_w2 = alpha_factor2.winsorize(min_percentile=0.02,
                                     max_percentile=0.98,
                                     mask=QTradableStocksUS() & alpha_factor2.isfinite())
    
    alpha_z2 = alpha_w2.zscore()
    alpha_weight2 = alpha_z2 / 100.0
    
    outlier_filter2 = alpha_z2.abs() < ZSCORE_FILTER
    zero_filter2 = alpha_weight2.abs() > ZERO_FILTER
 
    universe2 = QTradableStocksUS() & \
               outlier_filter2 & \
               zero_filter2
    
    # ALPHA FACTOR 3
    alpha_factor3 = aa_momentum()
    
    # Standardized logic for each input factor after this point
    alpha_w3 = alpha_factor3.winsorize(min_percentile=0.02,
                                     max_percentile=0.98,
                                     mask=alpha_factor3.isfinite())
    
    alpha_z3 = alpha_w3.zscore()
    alpha_weight3 = alpha_z3 / 100.0
    
    outlier_filter3 = alpha_z3.abs() < ZSCORE_FILTER
    zero_filter3 = alpha_weight3.abs() > ZERO_FILTER
 
    universe3 = QTradableStocksUS() & \
               outlier_filter3 & \
               zero_filter3
    
    
    universe = universe1 & universe2 & universe3
    
    sector = Sector()
 
    pipe = Pipeline(
        columns={
            'momentum': alpha_weight,
            'adv_momentum':alpha_weight2,
            'aa_momentum':alpha_weight3,
            'sector': sector,
        },
        screen=universe)
        
    return pipe

In [None]:
result = run_pipeline(make_pipeline(), start_date = '2015-01-01', end_date = '2016-01-01')
result.head()

In [None]:
assets = result.index.levels[1].unique()
len(assets)

In [None]:
pricing_data = get_pricing(assets, start_date = '2014-01-01', end_date = '2017-01-01',fields='open_price')

In [None]:
import alphalens
from alphalens.utils import get_clean_factor_and_forward_returns
from alphalens.tears import create_full_tear_sheet

sector_labels, sector_labels[-1] = dict(Sector.SECTOR_NAMES), "Unknown"

factor_data1 = get_clean_factor_and_forward_returns(
    result['momentum'],
    pricing_data,
    quantiles =2,
    periods = (21,63,126),
    groupby=result['sector'],
    groupby_labels=sector_labels,
)

create_full_tear_sheet(factor_data1, by_group=True)

In [None]:
factor_data2 = get_clean_factor_and_forward_returns(
    result['adv_momentum'],
    pricing_data,
    quantiles =2,
    periods = (21,63,126),
    groupby=result['sector'],
    groupby_labels=sector_labels,
)

create_full_tear_sheet(factor_data2, by_group=True)

##### Alpha Architect Momentum - Good Indicator

In [None]:
factor_data2 = get_clean_factor_and_forward_returns(
    result['aa_momentum'],
    pricing_data,
    quantiles =2,
    periods = (21,63,126),
    groupby=result['sector'],
    groupby_labels=sector_labels,
)

create_full_tear_sheet(factor_data2, by_group=True)

# Trend, Volume, Volatility

In [None]:
class Volatility(CustomFactor):
    
    inputs = [USEquityPricing.close]
    window_length = 252
    
    def compute(self, today, assets, out, close):  
        close = pd.DataFrame(data=close, columns=assets) 
        # Since we are going to rank largest is best we need to invert the sdev.
        out[:] = 1 / np.log(close).diff().std()

class Trendline(CustomFactor):
        inputs = [USEquityPricing.close]
        window_length = 252
        window_safe = True
            
        _x = np.arange(window_length)
        _x_var = np.var(_x)
 
        def compute(self, today, assets, out, close):
            
            x_matrix = repeat_last_axis(
            (self.window_length - 1) / 2 - self._x,
            len(assets),
            )
 
            y_bar = np.nanmean(close, axis=0)
            y_bars = repeat_first_axis(y_bar, self.window_length)
            y_matrix = close - y_bars
 
            out[:] = preprocess(-np.divide(
            (x_matrix * y_matrix).sum(axis=0) / self._x_var,
            self.window_length
            ))
    
class Direction(CustomFactor):
    inputs = [USEquityPricing.open, USEquityPricing.close]
    window_length = 21
    window_safe = True
    def compute(self, today, assets, out, open, close):
        p = (close-open)/close
        out[:] = (np.nansum(-p,axis=0))
        
class MoneyflowVolume21d(CustomFactor):
        inputs = (USEquityPricing.close, USEquityPricing.volume)
 
        # we need one more day to get the direction of the price on the first
        # day of our desired window of 5 days
        window_length = 22
        window_safe = True
            
        def compute(self, today, assets, out, close_extra, volume_extra):
            # slice off the extra row used to get the direction of the close
            # on the first day
            close = close_extra[1:]
            volume = volume_extra[1:]
                
            dollar_volume = close * volume
            denominator = dollar_volume.sum(axis=0)
                
            difference = np.diff(close_extra, axis=0)
            direction = np.where(difference > 0, 1, -1)
            numerator = (direction * dollar_volume).sum(axis=0)
                
            out[:] = (-np.divide(numerator, denominator))

In [None]:
def make_pipeline():
    ZSCORE_FILTER = 3 # Maximum number of standard deviations to include before counting as outliers
    ZERO_FILTER = 0.001 # Minimum weight we allow before dropping security
    
    # ALPHA FACTOR 1
    alpha_factor1 = Volatility()
 
    # Standardized logic for each input factor after this point
    alpha_w1 = alpha_factor1.winsorize(min_percentile=0.02,
                                     max_percentile=0.98,
                                     mask=QTradableStocksUS() & alpha_factor1.isfinite())
 
    alpha_z1 = alpha_w1.zscore()
    alpha_weight1 = alpha_z1 / 100.0
 
    outlier_filter1 = alpha_z1.abs() < 3
    zero_filter1 = alpha_weight1.abs() > 0.001
 
    universe1 = QTradableStocksUS() & \
               outlier_filter1 & \
               zero_filter1
    
    # ALPHA FACTOR 2
    alpha_factor2 = Trendline()
    
    # Standardized logic for each input factor after this point
    alpha_w2 = alpha_factor2.winsorize(min_percentile=0.02,
                                     max_percentile=0.98,
                                     mask=QTradableStocksUS() & alpha_factor2.isfinite())
    
    alpha_z2 = alpha_w2.zscore()
    alpha_weight2 = alpha_z2 / 100.0
    
    outlier_filter2 = alpha_z2.abs() < ZSCORE_FILTER
    zero_filter2 = alpha_weight2.abs() > ZERO_FILTER
 
    universe2 = QTradableStocksUS() & \
               outlier_filter2 & \
               zero_filter2
 
    # ALPHA FACTOR 3
    alpha_factor3 = Direction()
    
    alpha_factor3 = alpha_factor3.astype(np.float64)
    
    # Standardized logic for each input factor after this point
    alpha_w3 = alpha_factor3.winsorize(min_percentile=0.02,
                                     max_percentile=0.98,
                                     mask=alpha_factor3.isfinite())
    
    alpha_z3 = alpha_w3.zscore()
    alpha_weight3 = alpha_z3 / 100.0
    
    outlier_filter3 = alpha_z3.abs() < ZSCORE_FILTER
    zero_filter3 = alpha_weight3.abs() > ZERO_FILTER
 
    universe3 = QTradableStocksUS() & \
                outlier_filter3 & \
                zero_filter3 &
                alpha_factor3.notnull()
    
    OUTLIER_THRESHOLD = 3 # Maximum zscore that is not an outlier
    ZERO_THRESHOLD = 0.001 # Minimum zscore we allow before dropping security
    # ALPHA FACTOR 4
    alpha_factor4 = MoneyflowVolume21d()
    alpha_factor4 = alpha_factor4.astype(np.float64)
    # Standardized logic for each input factor after this point
    alpha_w4 = alpha_factor4.winsorize(
        min_percentile=0.02,
        max_percentile=0.98,
        mask=QTradableStocksUS() & alpha_factor4.isfinite()
    )
    alpha_z4 = alpha_w4.zscore()
    alpha_weight4 = alpha_z4 /100
    
    outlier_filter4 = alpha_z4.abs() < OUTLIER_THRESHOLD
    non_zero_filter4 = alpha_z4.abs() > ZERO_THRESHOLD
    
    universe4 = QTradableStocksUS() & \
                outlier_filter4 & \
                non_zero_filter4 &
                alpha_factor4.notnull()
    
    universe = universe1 & universe2 & universe3 & universe4
    
    alpha_weight = alpha_weight1 + alpha_weight2 + alpha_weight3 + alpha_weight4
    
    testing_quantiles = alpha_weight.quantiles(2)
    
    sector = Sector()
    
    pipe = Pipeline(
        columns={
            'alpha_weight': alpha_weight,
            'shorts':testing_quantiles.eq(0),
            'longs':testing_quantiles.eq(1),
            'volatility': alpha_weight1,
            'trendline':alpha_weight2,
            'direction': alpha_weight3,
            'MoneyflowVolume21d':alpha_weight4,
            'sector': sector,
        },
        screen=universe
    )
    return pipe

In [None]:
result = run_pipeline(make_pipeline(), start_date = '2015-01-01', end_date = '2016-01-01')
result.head()

In [None]:
assets = result.index.levels[1].unique()
len(assets)

In [None]:
pricing_data = get_pricing(assets, start_date = '2014-01-01', end_date = '2017-01-01', fields='open_price')

##### MoneyflowVolume - BAD

In [None]:
import alphalens
from alphalens.utils import get_clean_factor_and_forward_returns
from alphalens.tears import create_full_tear_sheet

sector_labels, sector_labels[-1] = dict(Sector.SECTOR_NAMES), "Unknown"

factor_data1 = get_clean_factor_and_forward_returns(
    result['MoneyflowVolume21d'],
    pricing_data,
    quantiles =2,
    periods = (21,63,126),
    groupby=result['sector'],
    groupby_labels=sector_labels,
)

create_full_tear_sheet(factor_data1, by_group=True)

##### Trendline = Good Indicator

In [None]:
factor_data2 = get_clean_factor_and_forward_returns(
    result['trendline'],
    pricing_data,
    quantiles =2,
    periods = (21,63,126),
    groupby=result['sector'],
    groupby_labels=sector_labels,
)

create_full_tear_sheet(factor_data2, by_group=True)

##### Direction - borderline good indicator

In [None]:
factor_data2 = get_clean_factor_and_forward_returns(
    result['direction'],
    pricing_data,
    quantiles =2,
    periods = (21,63,126),
    groupby=result['sector'],
    groupby_labels=sector_labels,
)

create_full_tear_sheet(factor_data2, by_group=True)

##### Volatility - Good Indicator

In [None]:
factor_data2 = get_clean_factor_and_forward_returns(
    result['volatility'],
    pricing_data,
    quantiles =2,
    periods = (21,63,126),
    groupby=result['sector'],
    groupby_labels=sector_labels,
)

create_full_tear_sheet(factor_data2, by_group=True)

# Other

In [None]:
class MaxGap(CustomFactor): 
        # the biggest absolute overnight gap in the previous 90 sessions
        inputs = [USEquityPricing.close] ; window_length = 90
        window_safe = True
        def compute(self, today, assets, out, close):
            abs_log_rets = np.abs(np.diff(np.log(close),axis=0))
            max_gap = np.max(abs_log_rets, axis=0)
            out[:] = preprocess(max_gap)
            
class TEM(CustomFactor):
        """
        TEM = standard deviation of past 6 quarters' reports
        """
        inputs=[factset.Fundamentals.capex_qf_asof_date,
            factset.Fundamentals.capex_qf,
            factset.Fundamentals.assets]
        window_length = 390
        window_safe = True
        def compute(self, today, assets, out, asof_date, capex, total_assets):
            values = capex/total_assets
            values[np.isinf(values)] = np.nan
            out_temp = np.zeros_like(values[-1,:])
            for column_ix in range(asof_date.shape[1]):
                _, unique_indices = np.unique(asof_date[:, column_ix], return_index=True)
                quarterly_values = values[unique_indices, column_ix]
                if len(quarterly_values) < 6:
                    quarterly_values = np.hstack([
                    np.repeat([np.nan], 6 - len(quarterly_values)),
                    quarterly_values,
                    ])
            
                out_temp[column_ix] = np.std(quarterly_values[-6:])
                
            out[:] = preprocess(-out_temp)
            
"""
different sentiment analysis           
sentiment_score = SimpleMovingAverage(
        inputs=[stocktwits.bull_minus_bear],
        window_length=3,
    )
"""           

#Sentiment
class MessageSum(CustomFactor):
        inputs = [USEquityPricing.high, USEquityPricing.low, USEquityPricing.close, stocktwits.bull_scored_messages, stocktwits.bear_scored_messages, stocktwits.total_scanned_messages]
        window_length = 21
        window_safe = True
        def compute(self, today, assets, out, high, low, close, bull, bear, total):
            v = np.nansum((high-low)/close, axis=0)
            out[:] = preprocess(v*np.nansum(total*(bear-bull), axis=0))

class mean_rev(CustomFactor):   
        inputs = [USEquityPricing.high,USEquityPricing.low,USEquityPricing.close]
        window_length = 30
        window_safe = True
        def compute(self, today, assets, out, high, low, close):
            
            p = (high+low+close)/3
 
            m = len(close[0,:])
            n = len(close[:,0])
                
            b = np.zeros(m)
            a = np.zeros(m)
                
            for k in range(10,n+1):
                price_rel = np.nanmean(p[-k:,:],axis=0)/p[-1,:]
                wt = np.nansum(price_rel)
                b += wt*price_rel
                price_rel = 1.0/price_rel
                wt = np.nansum(price_rel)
                a += wt*price_rel
                
            out[:] = preprocess(b-a)
            

In [None]:
def make_pipeline():
    ZSCORE_FILTER = 3 # Maximum number of standard deviations to include before counting as outliers
    ZERO_FILTER = 0.001 # Minimum weight we allow before dropping security
    
    # ALPHA FACTOR 1
    alpha_factor1 = MaxGap()
 
    # Standardized logic for each input factor after this point
    alpha_w1 = alpha_factor1.winsorize(min_percentile=0.02,
                                     max_percentile=0.98,
                                     mask=QTradableStocksUS() & alpha_factor1.isfinite())
 
    alpha_z1 = alpha_w1.zscore()
    alpha_weight1 = alpha_z1 / 100.0
 
    outlier_filter1 = alpha_z1.abs() < 3
    zero_filter1 = alpha_weight1.abs() > 0.001
 
    universe1 = QTradableStocksUS() & \
               outlier_filter1 & \
               zero_filter1
    
    # ALPHA FACTOR 2
    alpha_factor2 = TEM()
    
    # Standardized logic for each input factor after this point
    alpha_w2 = alpha_factor2.winsorize(min_percentile=0.02,
                                     max_percentile=0.98,
                                     mask=QTradableStocksUS() & alpha_factor2.isfinite())
    
    alpha_z2 = alpha_w2.zscore()
    alpha_weight2 = alpha_z2 / 100.0
    
    outlier_filter2 = alpha_z2.abs() < ZSCORE_FILTER
    zero_filter2 = alpha_weight2.abs() > ZERO_FILTER
 
    universe2 = QTradableStocksUS() & \
               outlier_filter2 & \
               zero_filter2
 
    # ALPHA FACTOR 3
    alpha_factor3 = MessageSum()
    
    # Standardized logic for each input factor after this point
    alpha_w3 = alpha_factor3.winsorize(min_percentile=0.02,
                                     max_percentile=0.98,
                                     mask=alpha_factor3.isfinite())
    
    alpha_z3 = alpha_w3.zscore()
    alpha_weight3 = alpha_z3 / 100.0
    
    outlier_filter3 = alpha_z3.abs() < ZSCORE_FILTER
    zero_filter3 = alpha_weight3.abs() > ZERO_FILTER
 
    universe3 = QTradableStocksUS() & \
               outlier_filter3 & \
               zero_filter3
    
    OUTLIER_THRESHOLD = 3 # Maximum zscore that is not an outlier
    ZERO_THRESHOLD = 0.001 # Minimum zscore we allow before dropping security
    # ALPHA FACTOR 4
    alpha_factor4 = mean_rev()
    
    # Standardized logic for each input factor after this point
    alpha_w4 = alpha_factor4.winsorize(
        min_percentile=0.02,
        max_percentile=0.98,
        mask=QTradableStocksUS() & alpha_factor4.isfinite()
    )
    alpha_z4 = alpha_w4.zscore()
    alpha_weight4 = alpha_z4 /100
    
    outlier_filter4 = alpha_z4.abs() < OUTLIER_THRESHOLD
    non_zero_filter4 = alpha_z4.abs() > ZERO_THRESHOLD
    
    universe4 = QTradableStocksUS() & \
               outlier_filter4 & \
               non_zero_filter4
    
    universe = universe1 & universe2 & universe3 & universe4
    
    alpha_weight = alpha_weight1 + alpha_weight2
    
    testing_quantiles = alpha_weight.quantiles(2)
    
    sector = Sector()
    
    pipe = Pipeline(
        columns={
            'maxgap': alpha_weight1,
            'TEM':alpha_weight2,
            'MessageSum': alpha_weight3,
            'mean_rev':alpha_weight4,
            'alpha_weight': alpha_weight,
            'shorts':testing_quantiles.eq(0),
            'longs':testing_quantiles.eq(1),
            'sector': sector,
        },
        screen=universe
    )
    return pipe

In [None]:
results = run_pipeline(make_pipeline(), start_date = '2015-01-01',end_date = '2016-01-01')
results.head()

In [None]:
assets = results.index.levels[1].unique()
len(assets)

In [None]:
pricing_data = get_pricing(assets, '2014-01-01', '2017-01-01',fields = 'open_price')

##### MaxGap - Good Negative 

In [None]:
import alphalens
from alphalens.utils import get_clean_factor_and_forward_returns
from alphalens.tears import create_full_tear_sheet

sector_labels, sector_labels[-1] = dict(Sector.SECTOR_NAMES), "Unknown"

factor_data1 = get_clean_factor_and_forward_returns(
    results['maxgap'],
    pricing_data,
    quantiles =2,
    periods = (21,63,126),
    groupby=results['sector'],
    groupby_labels=sector_labels,
)

create_full_tear_sheet(factor_data1, by_group=True)

##### TEM - Good 

In [None]:
factor_data1 = get_clean_factor_and_forward_returns(
    results['TEM'],
    pricing_data,
    quantiles =2,
    periods = (21,63,126),
    groupby=results['sector'],
    groupby_labels=sector_labels,
)

create_full_tear_sheet(factor_data1, by_group=True)

##### MessageSum - borderline

In [None]:
factor_data1 = get_clean_factor_and_forward_returns(
    results['MessageSum'],
    pricing_data,
    quantiles =2,
    periods = (21,63,126),
    groupby=results['sector'],
    groupby_labels=sector_labels,
)

create_full_tear_sheet(factor_data1, by_group=True)

##### Mean Reversion - weak

In [None]:
factor_data1 = get_clean_factor_and_forward_returns(
    results['mean_rev'],
    pricing_data,
    quantiles =2,
    periods = (21,63,126),
    groupby=results['sector'],
    groupby_labels=sector_labels,
)

create_full_tear_sheet(factor_data1, by_group=True)