# Checking for Correlation
###### https://www.quantopian.com/posts/checking-correlation-and-risk-exposure-of-alpha-factors

### Screens
#### Value 
###### EV/EBITDA
#### Quality
###### Long term FCF generation
###### Long term ROIC
###### Long term margin characteristics
###### Piotroski's F-score



### TECHNICAL INDICATOR ANALYSIS
###### Momentum +
###### advanced Momentum +
###### Alpha Architect Momentum +
###### Trendline +
###### Volatility +
###### TEM +
###### MaxGap -
### More TA-Lib Indicators
###### LINEARREG_INTERCEPT +
###### MEDPRICE +
###### TRANGE +
###### TYPPRICE +
###### MINUS_DM +
###### PLUS_DM +
###### Price oscillator +
###### Mean Rev 1M -
###### Volatility 3M -
###### ADX - 
###### DX - 
### Fundamental Factors
###### FCF to EV +
###### CapEx Vol +
###### Debt to Total Assets +
###### *capex and debt to assets highly correlated

In [None]:
def make_factors():
    
    
        
    all_factors = {
        'Mean Reversion 1M': Mean_Reversion_1M,
        'Price Momentum 3M': Price_Momentum_3M,
        'Price Oscillator': Price_Oscillator,
        'Trendline': Trendline,
        'Vol 3M': Vol_3M,
    }        
    
    return all_factors

#### Make Pipeline

In [None]:
n_fwd_days = 5 # number of days to compute returns over

def make_pipeline():
    
    pipe_cols= None
    pipe_cols = {}
    
    # Value filter
    #value = morningstar.valuation_ratios.ev_to_ebitda.latest
    #market_cap = morningstar.valuation.market_cap.latest > 2e9 
    #undervalued = value.bottom(50, mask = (QTradableStocksUS() & market_cap))  
    
    # Before we do any other ranking, we want to throw away the bad assets.
    initial_screen = QTradeableUS()
    
    ## Test Factor
    factors = make_factors()
    # Call .rank() on all factors and mask out the universe
    pipe_cols = {name: f().rank(mask=initial_screen) for name, f in factors.iteritems()}
    
    ## Combine all the alphas to make 1 mega-alpha
    mega_alpha = 0
    for iAlpha in pipe_cols:
        mega_alpha += pipe_cols[iAlpha]
    pipe_cols['test_alpha'] = mega_alpha
        
    ## Make Long and short quantiles
    alpha_quantiles = mega_alpha.quantiles(3)
    pipe_cols['shorts'] = alpha_quantiles.eq(0)
    pipe_cols['longs'] = alpha_quantiles.eq(2)
    
    # Get cumulative returns over last n_fwd_days days. We will later shift these.
    pipe_cols['Returns'] = Returns(inputs=[USEquityPricing.open],
                                      mask=initial_screen, window_length=n_fwd_days)
    
    ## Add Sector data
    pipe_cols['Sector'] = Sector(mask=initial_screen)
    
    ## Return the new pipeline
    return Pipeline(columns=pipe_cols, screen=initial_screen)

In [None]:
result = run_pipeline(make_pipeline(), start_date = '2015-01-01', end_date = '2016-01-01')
result = result.dropna()
result.head()

In [None]:
assets = result.index.levels[1].unique()
len(assets)

In [None]:
pricing_data = get_pricing(assets, start_date = '2014-06-01', end_date = '2016-07-01', fields='open_price')

In [None]:
import alphalens as al

sector_labels = dict(Sector.SECTOR_NAMES)
sector_labels[-1] = "Unknown" # no dataset is perfect, better handle the unexpected

##### Get clean factor and forward return for all factors

In [None]:
factor1_data = al.utils.get_clean_factor_and_forward_returns(
    result["fcf_to_ev"],
    pricing_data,
    groupby=result["sector"],
    quantiles=5,
    periods=(21, 63, 126)
)

factor2_data = al.utils.get_clean_factor_and_forward_returns(
    result["debt_to_assets"],
    pricing_data,
    groupby=result["sector"],
    quantiles=5,
    periods=(21, 63, 126)
)

factor3_data = al.utils.get_clean_factor_and_forward_returns(
    result["capex_vol"],
    pricing_data,
    groupby=result["sector"],
    quantiles=5,
    periods=(21, 63, 126)
)

factor4_data = al.utils.get_clean_factor_and_forward_returns(
    result["sales_size"],
    pricing_data,
    groupby=result["sector"],
    quantiles=5,
    periods=(21, 63, 126)
)

In [None]:
factor1_returns, factor1_positions, factor1_benchmark = \
    al.performance.create_pyfolio_input(factor1_data,
                                        period='63D',
                                        capital=1000000,
                                        long_short=True,
                                        group_neutral=False,
                                        equal_weight=True,
                                        quantiles=[1,5],
                                        groups=None,
                                        benchmark_period='21D')

factor2_returns, factor2_positions, factor2_benchmark = \
    al.performance.create_pyfolio_input(factor2_data,
                                        period='63D',
                                        capital=1000000,
                                        long_short=True,
                                        group_neutral=False,
                                        equal_weight=True,
                                        quantiles=[1,5],
                                        groups=None,
                                        benchmark_period='21D')

factor3_returns, factor3_positions, factor3_benchmark = \
    al.performance.create_pyfolio_input(factor3_data,
                                        period='63D',
                                        capital=1000000,
                                        long_short=True,
                                        group_neutral=False,
                                        equal_weight=True,
                                        quantiles=[1,5],
                                        groups=None,
                                        benchmark_period='21D')

factor4_returns, factor4_positions, factor4_benchmark = \
    al.performance.create_pyfolio_input(factor4_data,
                                        period='63D',
                                        capital=1000000,
                                        long_short=True,
                                        group_neutral=False,
                                        equal_weight=True,
                                        quantiles=[1,5],
                                        groups=None,
                                        benchmark_period='21D')

In [None]:
import matplotlib.pyplot as plt
factor1_returns.plot()
factor2_returns.plot()
factor3_returns.plot()
factor4_returns.plot()
plt.ylabel('Returns')
plt.legend(['Factor1', 'Factor2', 'Factor3','Factor4']);

In [None]:
np.corrcoef([factor1_returns, factor2_returns,factor3_returns,factor4_returns],
           columns = ['factor1_returns', 'factor2_returns','factor3_returns','factor4_returns'])