# Sentiment Analysis Factors

In [None]:
#quantopian imports
from quantopian.research import run_pipeline
from quantopian.pipeline import Pipeline
from quantopian.pipeline.data.builtin import USEquityPricing
from quantopian.pipeline.filters import QTradableStocksUS

from quantopian.pipeline.factors import Latest
from quantopian.pipeline.data import morningstar, Fundamentals
from quantopian.pipeline.factors import CustomFactor, SimpleMovingAverage, AverageDollarVolume,SimpleBeta, Returns, RSI
from quantopian.pipeline.classifiers.morningstar import Sector
from quantopian.pipeline.classifiers.fundamentals import Sector
from quantopian.pipeline.data.zacks import EarningsSurprises
from quantopian.pipeline.data import factset
from quantopian.pipeline.data.psychsignal import stocktwits
from quantopian.interactive.data.sentdex import sentiment

#Python imports
import math
import talib
import numpy as np
import pandas as pd
import pyfolio as pf
from scipy import stats
import matplotlib.pyplot as plt
from sklearn import linear_model, decomposition, ensemble, preprocessing, isotonic, metrics
from scipy.stats.mstats import winsorize
from zipline.utils.numpy_utils import (
    repeat_first_axis,
    repeat_last_axis,
)
from scipy.stats.mstats import gmean
from sklearn.cluster import SpectralClustering
 
from collections import Counter

In [None]:
class AverageMonthlyArticleSentiment(CustomFactor):  
    # Economic hypothesis: Article sentiment can reflect the  
    # public's mood about a given security. In this case, use the past  
    # 30 day's article sentiment to make decisions for the next 20 trading  
    # days  
    inputs = [sentiment.sentiment_signal]  
    window_length = 30

    def compute(self, today, assets, out, sentiment_signal):  
        out[:] = np.nanmean(sentiment_signal, axis=0)  

In [None]:
def make_pipeline():
    
    ZSCORE_FILTER = 3 # Maximum number of standard deviations to include before counting as outliers
    ZERO_FILTER = 0.001 # Minimum weight we allow before dropping security
    
    # ALPHA FACTOR 1
    alpha_factor1 = sentiment.sentiment_signal
    
    # ALPHA FACTOR 2
    alpha_factor2 = AverageMonthlyArticleSentiment()
    alpha_factor2= alpha_factor2.astype(np.float64)
    
    # Standardized logic for each input factor after this point
    alpha_w2 = alpha_factor2.winsorize(min_percentile=0.02,
                                     max_percentile=0.98,
                                     mask=QTradableStocksUS() & alpha_factor2.isfinite())
    
    alpha_z2 = alpha_w2.zscore()
    alpha_weight2 = alpha_z2 / 100.0
    
    outlier_filter2 = alpha_z2.abs() < ZSCORE_FILTER
    zero_filter2 = alpha_weight2.abs() > ZERO_FILTER
 
    universe2 = QTradableStocksUS() & \
               outlier_filter2 & \
               zero_filter2
    
    # ALPHA FACTOR 3
    alpha_factor3 = SimpleMovingAverage(
        inputs=[stocktwits.bull_minus_bear],
        window_length = 21,
    )  
    alpha_factor3= alpha_factor3.astype(np.float64)
    # Standardized logic for each input factor after this point
    alpha_w3 = alpha_factor3.winsorize(min_percentile=0.02,
                                     max_percentile=0.98,
                                     mask=alpha_factor3.isfinite())
    
    alpha_z3 = alpha_w3.zscore()
    alpha_weight3 = alpha_z3 / 100.0
    
    outlier_filter3 = alpha_z3.abs() < ZSCORE_FILTER
    zero_filter3 = alpha_weight3.abs() > ZERO_FILTER
 
    universe3 = QTradableStocksUS() & \
               outlier_filter3 & \
               zero_filter3
    
    # ALPHA FACTOR 4
    alpha_factor4 = SimpleMovingAverage(
        inputs=[stocktwits.bull_bear_msg_ratio],
        window_length = 21,
    )
    alpha_factor4= alpha_factor4.astype(np.float64)
    # Standardized logic for each input factor after this point
    alpha_w4 = alpha_factor4.winsorize(min_percentile=0.02,
                                     max_percentile=0.98,
                                     mask=alpha_factor4.isfinite())
    
    alpha_z4 = alpha_w4.zscore()
    alpha_weight4 = alpha_z4 / 100.0
    
    outlier_filter4 = alpha_z4.abs() < ZSCORE_FILTER
    zero_filter4 = alpha_weight4.abs() > ZERO_FILTER
 
    universe4 = QTradableStocksUS() & \
               outlier_filter4 & \
               zero_filter4
    
    # ALPHA FACTOR 5
    alpha_factor5 = SimpleMovingAverage(
        inputs=[stocktwits.total_scanned_messages],
        window_length=21,
    )        
    alpha_factor5= alpha_factor5.astype(np.float64)
    # Standardized logic for each input factor after this point
    alpha_w5 = alpha_factor5.winsorize(min_percentile=0.02,
                                     max_percentile=0.98,
                                     mask=QTradableStocksUS() & alpha_factor5.isfinite())
    
    alpha_z5 = alpha_w5.zscore()
    alpha_weight5 = alpha_z5 / 100.0
    
    outlier_filter5 = alpha_z5.abs() < ZSCORE_FILTER
    zero_filter5 = alpha_weight5.abs() > ZERO_FILTER
 
    universe5 = QTradableStocksUS() & \
               outlier_filter5 & \
               zero_filter5
    
    universe =  universe1 & universe2 & universe3 & universe4 & universe5 
    
    testing_quantiles = alpha_weight.quantiles(2)
    
    universe = (QTradableStocksUS() & sentiment_factor.notnull())
    
    pipe = Pipeline(columns = {'sentiment':alpha_factor1,
                               'sentiment_monthly': alpha_weight2,
                               'score':alpha_weight3,
                               'msg_ratio':alpha_weight4,
                               'msg_total':alpha_weight5,
                              },screen = universe)
    return pipe

In [None]:
result = run_pipeline(make_pipeline(), start_date = '2015-01-01', end_date = '2016-01-01')
result.head()

In [None]:
assets = result.index.levels[1]
len(assets)

In [None]:
pricing_data = get_pricing(assets, start_date = '2014-01-01', end_date = '2017-01-01',fields='open_price')