# Identify most cointegrated pair from the 10 largest companies in each of the 11 sectors

## Author: Malik Jabati (May 7, 2019)

In [5]:
#First import some things we will need to run pipeline and get our data
from quantopian.research import run_pipeline
from quantopian.pipeline import Pipeline

from quantopian.pipeline.data import Fundamentals

from quantopian.pipeline.classifiers.morningstar import Sector
from quantopian.pipeline.filters import Q1500US

from quantopian.pipeline.data import morningstar
from quantopian.pipeline.data import EquityPricing

from quantopian.pipeline.factors.morningstar import MarketCap
from quantopian.pipeline.factors import DailyReturns
from quantopian.pipeline.filters import  StaticAssets

import pandas as pd
import statsmodels.tsa.stattools as sm

In [6]:
#sector = Fundamentals.morningstar_sector_code.latest
#ticker = Fundamentals.primary_symbol.latest

# Morningstar codes for all eleven sectors
SectorCodes = {'Basic Materials': 101, 'Consumer Cyclical': 102, 'Financial Services': 103,
               'Real Estate': 104, 'Consumer Defensive': 205, 'Healthcare': 206, 'Utilities': 207,
               'Communication Services': 308, 'Energy': 309, 'Industrials': 310, 'Technology': 311}

# Create an array of the the keys in the sector and sort alphabetically
SectorKeys = SectorCodes.keys()
SectorKeys.sort()

# Going to fill this array with the top 500 largest firms (by market cap) separated by sector
CompaniesInSector_pipe = []



In [7]:

"""
Fill CompaniesInSector_pipe with pipelines containing sector
and market cap data for the top 500 companies by sector
"""

for i in range(len(SectorCodes)):
    def make_pipeline():
        top_500 = morningstar.valuation.market_cap.latest.top(500)

        return Pipeline(
            columns={
                "sector": Sector(),
                "market_cap": MarketCap()
                #,"ticker": ticker
            },
            screen=(top_500 & Sector().element_of([SectorCodes[SectorKeys[i]]]))
        )

    pipeline_output = run_pipeline(
        make_pipeline(),
        start_date="2018-01-01",
        end_date="2018-01-01"
    )

    CompaniesInSector_pipe.append(pipeline_output.sort_values(by="market_cap", ascending=False))

In [8]:
"""
Fill CompaniesInSector with Quantopian trading symbols and IDs for the top 500 companies by sector
"""

CompaniesInSector = []

for i in range(len(CompaniesInSector_pipe)):
    print(SectorKeys[i])        
    candidates = CompaniesInSector_pipe[i].iloc[:5].index #creates a multiindex
    candidates = candidates.levels[1].tolist() #gets equity level index and turns to list of equity objects
    
    CompaniesInSector.append(candidates)
    
    print(candidates)
    print('\n')

Basic Materials
[Equity(460 [APD]), Equity(863 [BHP]), Equity(1856 [CRH]), Equity(2427 [ECL]), Equity(5261 [NEM]), Equity(5488 [NUE]), Equity(6109 [POT]), Equity(6116 [PPG]), Equity(6272 [PX]), Equity(6584 [RIO]), Equity(6868 [SHW]), Equity(7998 [VMC]), Equity(8326 [WY]), Equity(12095 [PKX]), Equity(13197 [FCX]), Equity(14284 [SCCO]), Equity(17395 [MT]), Equity(22140 [MON]), Equity(22275 [SYT]), Equity(23536 [VALE]), Equity(24522 [TS]), Equity(25165 [BBL]), Equity(39546 [LYB]), Equity(45378 [VEDL]), Equity(51157 [DWDP])]


Communication Services
[Equity(766 [BCE]), Equity(1161 [BT]), Equity(1637 [CMCS_A]), Equity(1960 [CTL]), Equity(2938 [S]), Equity(6653 [T]), Equity(7386 [TEF]), Equity(8017 [VOD]), Equity(13017 [DISH]), Equity(13950 [TLK]), Equity(14298 [RCI]), Equity(15221 [SKM]), Equity(17657 [ORAN]), Equity(19258 [CCI]), Equity(19540 [VIV]), Equity(20281 [SBAC]), Equity(21596 [CHU]), Equity(21839 [VZ]), Equity(22176 [TU]), Equity(22414 [AMOV]), Equity(22416 [AMX]), Equity(24394 [C

In [65]:
print(SectorKeys)

['Basic Materials', 'Communication Services', 'Consumer Cyclical', 'Consumer Defensive', 'Energy', 'Financial Services', 'Healthcare', 'Industrials', 'Real Estate', 'Technology', 'Utilities']


In [66]:
# Get the latest daily close price for all equities.
yesterday_close = EquityPricing.close.latest

def prices_pipeline(asset):
# Get prices for a particular asset
    return Pipeline(
        columns={
            "close": yesterday_close
        },
        screen = asset
    )


CointMatrix = []

#CointMatrix = [[[0 for x in range(10)] for y in range(10)] for z in range(len(CompaniesInSector))]

for z in range(len(CompaniesInSector)):

    #Create empty 10x10 cointegration matrix
    CointMatrix_temp = [[0 for x in range(10)] for y in range(10)]

    CointMatrix.append(CointMatrix_temp)

    # Only look for pairs in the 10 largest firms (by market cap) in each sector
    for i in range(10):
        for j in range(10):    
            #First grab prices for data set
            asset_x = StaticAssets([CompaniesInSector[z][i]])
            asset_y = StaticAssets([CompaniesInSector[z][j]])

            ##Use daily returns for the four years before the start of the backtest
            pipeline_output_x = run_pipeline(
                prices_pipeline(asset_x),
                start_date="2006-01-01",
                end_date="2010-01-01"
            )

            pipeline_output_y = run_pipeline(
                prices_pipeline(asset_y),
                start_date="2006-01-01",
                end_date="2010-01-01"
            )

            prices_x = pipeline_output_x['close']
            prices_y = pipeline_output_y['close']

            try:
                #Then cointegrate price series
                t_stat, p_value, _ = sm.coint(prices_x, prices_y)

                if i==j:
                    CointMatrix[z][i][j] = 0
                elif p_value > 0.5:
                    CointMatrix[z][i][j] = 0
                else:
                    CointMatrix[z][i][j] = abs(t_stat)
            except:
                CointMatrix[z][i][j] = 0
                

    print(CointMatrix[z])
    print('\n')

Equity(460 [APD])
[[0, 2.5147075984854479, 0, 2.7926396272831555, 0, 0, 2.203682694123938, 0, 0, 3.0106647100769877], [2.5422890249097962, 0, 0, 0, 0, 0, 0, 0, 3.9565729218106283, 0], [0, 0, 0, 0, 0, 0, 0, 2.8703045289419369, 0, 0], [3.4878536917315746, 2.9011308437701389, 2.6723721444339206, 0, 2.8160486171223851, 2.6890117272212843, 3.0546857686353568, 2.9163759623022916, 2.6798763359966289, 3.2072561054489119], [3.2452072092425444, 3.1268750335174356, 3.174834370472523, 3.2196915584684915, 0, 3.6225522505608594, 3.139282324543395, 3.2377724016530713, 3.1310656858337138, 3.1947919823586295], [2.2565151886347525, 2.2546395621194426, 2.5746417650843529, 2.2501170232113235, 2.9156691936671719, 0, 2.3587532700025511, 2.4423100459207125, 2.3711927944938571, 2.3466060431653708], [2.7265470078845579, 2.2843881526468843, 2.3927152291777323, 2.7507509078050001, 2.3760537616505117, 2.4437504914309338, 0, 2.4082359212531137, 2.4930221808576891, 2.9451379125947637], [0, 0, 2.8450822563685421, 0,

[[0, 4.0406168961882099, 3.4684482241740424, 3.3803423146466662, 2.9421881006424413, 2.33599889383756, 5.1543800646186906, 4.0309744255531275, 2.7812090935376501, 3.2985781398543157], [3.3371269087849118, 0, 2.0792406418536351, 2.0772734111979521, 2.0646395823483346, 0, 2.8353041211954584, 4.4560794749701165, 0, 2.8715767599268514], [2.9672316460147554, 2.5038926426710155, 0, 2.6898734341633475, 0, 0, 3.1835294861669046, 2.3180294185146511, 2.8531459950226217, 2.0994247583065868], [2.7118113351239423, 2.3169517993482232, 2.5237548833330785, 0, 2.7349121498672258, 0, 0, 2.2525279418312896, 0, 2.7004005863088216], [2.4269272121935797, 2.5596805278994093, 0, 2.9539008922988002, 0, 0, 0, 2.7579436197757254, 0, 4.0703432178682082], [0, 0, 2.3789744646860771, 2.3853797869358178, 2.081126268900698, 0, 0, 0, 2.1366240511172689, 0], [4.7872594911277071, 3.102632489245297, 3.1290910759565724, 0, 0, 0, 0, 3.0158249650227327, 2.2433093819703998, 0], [3.5762490417253558, 4.6498154861489249, 2.26759

In [67]:
PairsLocation = []

for z in range(len(CompaniesInSector)):

    max = 0
    stock_a = stock_b = -1

    for i in range(10):
        for j in range(10):
            if CointMatrix[z][i][j] > max:
                max = CointMatrix[z][i][j]
                stock_a = i
                stock_b = j

    PairsLocation.append((stock_a, stock_b))
            
print(PairsLocation)


[(8, 1), (3, 8), (7, 3), (6, 0), (4, 9), (3, 1), (1, 7), (0, 6), (2, 7), (6, 0), (7, 4)]


In [72]:
SectorPairs = []

for z in range(len(CompaniesInSector)):
    i = PairsLocation[z][0]
    j = PairsLocation[z][1]
    SectorPairs.append((SectorKeys[z], CompaniesInSector[z][i],
                        CompaniesInSector[z][j], CointMatrix[z][i][j]))
    
    print(SectorPairs[z])
    print('\n')

('Basic Materials', Equity(6272 [PX]), Equity(863 [BHP]), 4.0889372566101372)


('Communication Services', Equity(1960 [CTL]), Equity(13017 [DISH]), 4.5983726711866542)


('Consumer Cyclical', Equity(3597 [HMC]), Equity(2190 [DIS]), 4.0269192360360044)


('Consumer Defensive', Equity(1616 [CLX]), Equity(76 [TAP]), 3.7648810883221864)


('Energy', Equity(3895 [IMO]), Equity(7538 [TOT]), 7.3135673879848389)


('Financial Services', Equity(679 [AXP]), Equity(239 [AIG]), 3.9013617293517919)


('Healthcare', Equity(168 [AET]), Equity(1376 [CAH]), 4.6108160739455668)


('Industrials', Equity(630 [ADP]), Equity(1985 [CMI]), 5.1543800646186906)


('Real Estate', Equity(9540 [EQR]), Equity(21475 [BAM]), 4.0656378948319967)


('Technology', Equity(1419 [CERN]), Equity(24 [AAPL]), 5.2417112516523829)


('Utilities', Equity(5792 [PCG]), Equity(2434 [ED]), 3.4946557830049634)


