In [1]:
import numpy as np
import pandas as pd
import os
import pywikibot
import pickle
import torch

  from .autonotebook import tqdm as notebook_tqdm


In [None]:
# Obtained from Temporal ranking Paper
#data = np.load("NASDAQ_wiki_relation.npy")
ticker_wikicode_map_df = pd.read_csv("NASDAQ_ticker_to_wiki.csv")

ticker_wikicode_map_nyse_df = pd.read_csv("NYSE_ticker_to_wiki.csv")


# Tickers that are selected for training the model
INDEX = "nasdaq100"

directory = "../../../data/" + INDEX + "/"

ticker_list = {}
for filename in os.listdir(directory):
    f = os.path.join(directory, filename)
    if os.path.isfile(f):
        ticker, name = filename.split("-")

        df = pd.read_csv(f)

        if df.shape[0] <= 2600: 
            continue 
        
        ticker_list[ticker] = [name]

INDEX = "sp500"

directory = "../../../data/" + INDEX + "/"

for filename in os.listdir(directory):
    f = os.path.join(directory, filename)
    if os.path.isfile(f):
        ticker, name = filename.split("-")

        df = pd.read_csv(f)

        if df.shape[0] <= 2600: 
            continue 
        
        ticker_list[ticker] = [name]

print(len(ticker_list.keys()))
print(ticker_list.keys())

# Getting the WikiBase Id for the companies
# Some of the wanted tickers are not available.
wikicode_map = {}
for index, row in ticker_wikicode_map_df.iterrows():
    if row[0] in ticker_list:
        if row[1] == 'unknown':
            continue
        wikicode_map[row[0]] = row[1]

for index, row in ticker_wikicode_map_nyse_df.iterrows():
    if row[0] in ticker_list:
        if row[1] == 'unknown':
            continue
        wikicode_map[row[0]] = row[1]

print(len(wikicode_map.values()), len(ticker_list.values()))

# Obtained from Wikibase Query builder, using filter
# key 'part of' and value 's&p500
sp500_id = pd.read_csv("query.csv")

ticker_maps_mine = {}
for i in range(528):
    id = sp500_id.loc[i]['item'].split("/")[-1]
    print(sp500_id.loc[i])
    site = pywikibot.Site("wikidata", "wikidata")
    repo = site.data_repository()
    item = pywikibot.ItemPage(repo, id)
    item_dict = item.get()
    try:
        d = item_dict['claims']['P414']
        ticker = d[0].qualifiers['P249'][0].toJSON()['datavalue']['value']

        ticker_maps_mine[ticker] = id
    except:
        print("Not available")

# Even then, IDs for some tickers were not available, so I manually searched wikibase with company names
# and obtained the ID
my_map = {
    'MNST': 'Q1945295',
    'WBA': 'Q18712620',
    'EBAY': 'Q58024',
    'ENPH': 'Q17012234',
    'MTCH': 'Q23133268',
    'CHTR': 'Q2961234',
    'TMUS': 'Q3511885',
    'SIRI': 'Q3277465',
    'BKNG': 'Q18674747',
    'AMD': 'Q128896',
    'AVGO': 'Q4827416',
    'KDP': 'Q3116111',
    'SPLK': 'Q1835753',
    'CTRA': 'Q4035714',
    'PENN': 'Q7163201',
    'NEM': 'Q1785405',
    'GNRC': 'Q5531627',
    'TFC': 'Q795486',
    'PTC': 'Q1760641',
    'BKR': 'Q804353',
    'UAA': 'Q2031485',
    'SPGI': 'Q868587'    
}

ticker_id_map = {}
for ticker, val in ticker_list.items():
    if ticker in ticker_maps_mine:
        ticker_id_map[ticker] = ticker_maps_mine[ticker]
    elif ticker in my_map:
        ticker_id_map[ticker] = my_map[ticker]
    elif ticker in wikicode_map:
        ticker_id_map[ticker] = wikicode_map[ticker]
print(len(ticker_id_map.keys()))

with open('ticker_wiki_map.pkl', 'wb') as f:
    pickle.dump(ticker_id_map, f)

In [3]:
with open('ticker_wiki_map.pkl', 'rb') as f:
    ticker_wiki_map = pickle.load(f)

In [4]:
inverse_ticker_wiki_map = {ticker_wiki_map[k] : k for k in ticker_wiki_map}

In [5]:
# Relations to consider
relation_map = {
    'P127': 'Owned By - FO',
    'P155': 'Follows - FO - immediately prior item in a series of which the subject is a part, preferably use as qualifier of P179 [if the subject has replaced the preceding item, e.g. political offices',
    'P156': 'Followed By - FO',
    'P355': 'Subsidiary - FO',
    'P749': 'Parent Organisation - FO',
    'P31': 'Instance of',
    'P366': 'Use',
    'P452': 'Industry', 
    'P1056': 'Product or Material Produced',
    'P112': 'Founded By',
    'P169': 'CEO',
    'P113': 'Airline Hub',
    'P114': 'Airline Alliance',
    'P121': "Item Operated",
    'P1830': 'Owner of - FO',
    'P3320': 'Board Member',
    'P166': 'Award received',
    'P199': 'Business Division',
    'P361': 'Part of',
    'P400': 'Platform',
    'P2770': 'Source of Income',
    'P463': 'Member of',
    'P306': 'Operating System',
    'P1344': 'Participant of'
}

In [3]:
wiki_relations = {}

with open('wiki_relations_value.pkl', 'rb') as f:
    wiki_relations = pickle.load(f)

In [6]:
for relation, rel_label in relation_map.items():
    for ticker, wiki_id in ticker_wiki_map.items():
        if (ticker, relation) in wiki_relations:
            continue

        site = pywikibot.Site("wikidata", "wikidata")
        repo = site.data_repository()
        item_dict = pywikibot.ItemPage(repo, wiki_id).get()
        claims = item_dict['claims']

        try:
            value = claims[relation]
            for snaks in value:
                snak = snaks.toJSON()
                snak_value = snak['mainsnak']['datavalue']['value']['numeric-id']
                wiki_relations[(ticker, relation)] = wiki_relations.get((ticker, relation), []) + [snak_value]
            print(ticker, relation, rel_label, wiki_relations[(ticker, relation)])
        except:
            wiki_relations[(ticker, relation)] = None

TTWO P463 Member of [55524865]
RMD P463 Member of [7389502]
BA P463 Member of [37033, 1361045]
PNC P463 Member of [15731702]
BR P463 Member of [858851]
WDC P463 Member of [60535625, 858851]
KR P463 Member of [79100838]
DIS P463 Member of [37033]
JNPR P463 Member of [858851, 67904121, 1685617]
GS P463 Member of [858851, 15731702, 1663567]
MMM P463 Member of [1203027]
JCI P463 Member of [67904121]
CRM P463 Member of [858851, 37033, 67904121, 2025721, 852297]
STT P463 Member of [858851]
ITW P463 Member of [7389502]
T P463 Member of [37033, 455048]
NOW P463 Member of [852297]
STX P463 Member of [858851, 1361045]
MRK P463 Member of [2513915, 422557, 463681, 1008895, 1454899, 2513764, 16974387, 1005586, 1005597, 111512539, 30094500]
CHTR P1344 Participant of [62562301]
PEP P1344 Participant of [1107563]
META P1344 Participant of [55696286]
BAX P1344 Participant of [928528]
LUMN P1344 Participant of [62562301]
KO P1344 Participant of [1107563]


In [7]:
with open('wiki_relations_value.pkl', 'wb') as f:
    pickle.dump(wiki_relations, f)

In [8]:
with open('wiki_relations_value.pkl', 'rb') as f:
    wiki_relations = pickle.load(f)

In [19]:
relation_knowledge_graph = []

In [20]:
# First-order relation
for key, value in wiki_relations.items():
    if value is None:
        continue
    for ids in value:
        if 'Q'+str(ids) in inverse_ticker_wiki_map:
            print(key, inverse_ticker_wiki_map['Q'+str(ids)])
            relation_knowledge_graph.append((key[0], key[1], inverse_ticker_wiki_map['Q'+str(ids)]))

('NVDA', 'P127') BLK
('AAPL', 'P127') BLK
('MNST', 'P127') KO
('EBAY', 'P127') BLK
('AMZN', 'P127') BLK
('AZN', 'P127') BLK
('MSFT', 'P127') BLK
('MSFT', 'P127') STT
('MSFT', 'P127') BLK
('TSLA', 'P127') TROW
('TSLA', 'P127') TROW
('NFLX', 'P127') BLK
('NFLX', 'P127') MS
('NFLX', 'P127') BLK
('GILD', 'P127') BLK
('GOOGL', 'P127') BLK
('GOOGL', 'P127') BLK
('GOOG', 'P127') BLK
('GOOG', 'P127') BLK
('PEP', 'P127') BLK
('META', 'P127') BLK
('GE', 'P127') TROW
('GE', 'P127') BLK
('GE', 'P127') NTRS
('WFC', 'P127') BLK
('JPM', 'P127') BLK
('C', 'P127') BLK
('WU', 'P127') BLK
('WU', 'P127') STT
('MS', 'P127') STT
('MS', 'P127') BLK
('MS', 'P127') TROW
('CAT', 'P127') STT
('CAT', 'P127') BLK
('JNJ', 'P127') BLK
('JNJ', 'P127') STT
('MCD', 'P127') BLK
('MCD', 'P127') STT
('IBM', 'P127') STT
('VZ', 'P127') BLK
('XOM', 'P127') STT
('XOM', 'P127') BLK
('XOM', 'P127') BLK
('BAC', 'P127') BLK
('DRI', 'P127') GIS
('GLW', 'P127') BLK
('COP', 'P127') BLK
('PFE', 'P127') BLK
('PFE', 'P127') STT
('DHR',

In [27]:
second_order_relation_types = ['P452_P452', 'P127_P127', 'P355_P355', 'P355_P199', 
                  'P112_P112', 'P112_P127', 'P112_P169', 'P169_P112', 'P169_P127', 'P169_P169', 'P127_P112',
                  'P127_P169', 'P31_P366', 'P127_P3320', 'P463_P463', 'P1056_P306', 'P1056_P1056',
                  'P1056_P452', 'P31_P452', 'P199_P355', 'P452_P2770', 'P1344_P1344',
                  'P169_P3320', 'P452_P31', 'P452_P1056', 'P306_P1056', 'P121_P121', 'P31_P1056', 'P166_P166',
                  'P2770_P452', 'P366_P31', 'P1056_P31', 'P400_P1056', 'P3320_P127', 'P3320_P169', 'P1056_P400',
                  'P114_P114', 'P121_P1056', 'P113_P113', 'P127_P749', 'P127_P355', 'P749_P127', 'P1830_P127', 
                  'P1830_P749', 'P355_P127', 'P155_P355', 'P155_P155', 'P355_P155', 'P127_P1830', 'P749_P1830',
                  'P1056_P121']
first_order_relation_types = ['P355', 'P155', 'P127', 'P156', 'P749', 'P1830']

together = ['P355', 'P155', 'P127', 'P156', 'P749', 'P1830', 
            'P452_P452', 'P127_P127', 'P355_P355', 'P355_P199', 
                  'P112_P112', 'P112_P127', 'P112_P169', 'P169_P112', 'P169_P127', 'P169_P169', 'P127_P112',
                  'P127_P169', 'P31_P366', 'P127_P3320', 'P463_P463', 'P1056_P306', 'P1056_P1056',
                  'P1056_P452', 'P31_P452', 'P199_P355', 'P452_P2770', 'P1344_P1344',
                  'P169_P3320', 'P452_P31', 'P452_P1056', 'P306_P1056', 'P121_P121', 'P31_P1056', 'P166_P166',
                  'P2770_P452', 'P366_P31', 'P1056_P31', 'P400_P1056', 'P3320_P127', 'P3320_P169', 'P1056_P400',
                  'P114_P114', 'P121_P1056', 'P113_P113', 'P127_P749', 'P127_P355', 'P749_P127', 'P1830_P127', 
                  'P1830_P749', 'P355_P127', 'P155_P355', 'P155_P155', 'P355_P155', 'P127_P1830', 'P749_P1830',
                  'P1056_P121'
]
print(len(second_order_relation_types))

51


In [21]:
for relation in second_order_relation_types:
    print(relation)
    r1, r2 = relation.split('_')
    for ticker, wiki_id in ticker_wiki_map.items():
        value1 = wiki_relations[(ticker, r1)]
        if value1 is None:
            continue
        for ticker2, wiki_id2 in ticker_wiki_map.items():
            value2 = wiki_relations[(ticker2, r2)]
            if value2 is None:
                continue
            if set(value1).intersection(set(value2)) and ticker != ticker2:
                if (ticker, relation, ticker2) in relation_knowledge_graph:
                    continue
                print(ticker, relation, ticker2)
                relation_knowledge_graph.append((ticker, relation, ticker2))

P452_P452
ADBE P452_P452 AAPL
ADBE P452_P452 ADSK
ADBE P452_P452 MSFT
ADBE P452_P452 CDNS
ADBE P452_P452 GOOGL
ADBE P452_P452 GOOG
ADBE P452_P452 SNPS
ADBE P452_P452 META
ADBE P452_P452 ORCL
ADBE P452_P452 GRMN
ADBE P452_P452 ACN
ADBE P452_P452 IBM
ADBE P452_P452 HPQ
ADBE P452_P452 TYL
ADBE P452_P452 CRM
NVDA P452_P452 QCOM
NVDA P452_P452 SWKS
NVDA P452_P452 MCHP
NVDA P452_P452 TXN
NVDA P452_P452 ASML
NVDA P452_P452 AMD
NVDA P452_P452 AVGO
NVDA P452_P452 INTC
AMGN P452_P452 REGN
AMGN P452_P452 SGEN
AMGN P452_P452 AZN
AMGN P452_P452 GILD
AMGN P452_P452 BIIB
AMGN P452_P452 VRTX
AMGN P452_P452 PG
AMGN P452_P452 LLY
AMGN P452_P452 VTRS
AMGN P452_P452 WST
AMGN P452_P452 JNJ
AMGN P452_P452 PFE
AMGN P452_P452 BMY
AMGN P452_P452 ABC
AMGN P452_P452 INCY
AMGN P452_P452 MRK
AAPL P452_P452 ADBE
AAPL P452_P452 ADSK
AAPL P452_P452 MCHP
AAPL P452_P452 NXPI
AAPL P452_P452 TXN
AAPL P452_P452 TMUS
AAPL P452_P452 MSFT
AAPL P452_P452 CDNS
AAPL P452_P452 GOOGL
AAPL P452_P452 MRVL
AAPL P452_P452 GOOG
AAPL P

In [24]:
relation_stat = {}
for head, relation, tail in relation_knowledge_graph:
    relation_stat[relation] = relation_stat.get(relation, 0) + 1
print(relation_stat)

{'P127': 73, 'P155': 1, 'P156': 1, 'P355': 2, 'P749': 1, 'P1830': 5, 'P452_P452': 4104, 'P127_P127': 2590, 'P355_P355': 16, 'P112_P112': 6, 'P112_P127': 3, 'P112_P169': 2, 'P169_P112': 2, 'P169_P127': 2, 'P169_P169': 6, 'P127_P112': 3, 'P127_P169': 2, 'P127_P3320': 8, 'P463_P463': 3220, 'P1056_P1056': 1188, 'P1056_P452': 219, 'P31_P452': 601, 'P1344_P1344': 4, 'P169_P3320': 69, 'P452_P31': 601, 'P452_P1056': 219, 'P121_P121': 2, 'P31_P1056': 48, 'P166_P166': 734, 'P1056_P31': 48, 'P400_P1056': 3, 'P3320_P127': 8, 'P3320_P169': 69, 'P1056_P400': 3, 'P121_P1056': 2, 'P127_P749': 3, 'P127_P355': 43, 'P749_P127': 3, 'P1830_P127': 40, 'P355_P127': 43, 'P155_P355': 4, 'P155_P155': 6, 'P355_P155': 4, 'P127_P1830': 40, 'P1056_P121': 2}


In [25]:
nasdaq_c_id = {'SBUX': 0, 'VRSK': 1, 'ADBE': 2, 'NVDA': 3, 'AMGN': 4, 'ISRG': 5, 'KLAC': 6, 'AAPL': 7, 'REGN': 8, 'BIDU': 9, 'QCOM': 10, 'XEL': 11, 'MNST': 12, 'CTSH': 13, 'EA': 14, 'WBA': 15, 'ADSK': 16, 'EBAY': 17, 'ILMN': 18, 'NTES': 19, 'SWKS': 20, 'ADP': 21, 'MU': 22, 'AMZN': 23, 'MCHP': 24, 'NXPI': 25, 'FTNT': 26, 'SGEN': 27, 'TXN': 28, 'DXCM': 29, 'AZN': 30, 'VRSN': 31, 'MTCH': 32, 'CHTR': 33, 'AMAT': 34, 'FISV': 35, 'TMUS': 36, 'MSFT': 37, 'CTAS': 38, 'IDXX': 39, 'TSLA': 40, 'MAR': 41, 'AEP': 42, 'NFLX': 43, 'CDNS': 44, 'ALGN': 45, 'MELI': 46, 'GILD': 47, 'SIRI': 48, 'EXC': 49, 'CSCO': 50, 'LULU': 51, 'GOOGL': 52, 'MRVL': 53, 'ASML': 54, 'BIIB': 55, 'GOOG': 56, 'BKNG': 57, 'FAST': 58, 'ORLY': 59, 'ROST': 60, 'VRTX': 61, 'PCAR': 62, 'CMCSA': 63, 'PEP': 64, 'ATVI': 65, 'AMD': 66, 'ANSS': 67, 'AVGO': 68, 'INTU': 69, 'SNPS': 70, 'COST': 71, 'DLTR': 72, 'HON': 73, 'INTC': 74, 'PAYX': 75, 'CSX': 76, 'LRCX': 77, 'ADI': 78, 'MDLZ': 79, 'KDP': 80, 'CPRT': 81, 'ODFL': 82}
sp500_c_id = {'APH': 0, 'PG': 1, 'OMC': 2, 'VRTX': 3, 'EXC': 4, 'KMX': 5, 'STE': 6, 'SPG': 7, 'GE': 8, 'TMO': 9, 'EA': 10, 'CTAS': 11, 'PNR': 12, 'EIX': 13, 'SCHW': 14, 'ADI': 15, 'DLTR': 16, 'EQR': 17, 'GIS': 18, 'TRMB': 19, 'FCX': 20, 'UPS': 21, 'FDX': 22, 'SYY': 23, 'COF': 24, 'LEN': 25, 'WELL': 26, 'HSIC': 27, 'BDX': 28, 'GOOGL': 29, 'BEN': 30, 'LLY': 31, 'VFC': 32, 'WFC': 33, 'WHR': 34, 'UNP': 35, 'FMC': 36, 'GPC': 37, 'ABT': 38, 'IVZ': 39, 'WAT': 40, 'WEC': 41, 'ROK': 42, 'TSLA': 43, 'BRO': 44, 'LNC': 45, 'AME': 46, 'TDY': 47, 'DAL': 48, 'CTRA': 49, 'ROP': 50, 'PENN': 51, 'PEP': 52, 'REGN': 53, 'UNH': 54, 'NFLX': 55, 'DISH': 56, 'DHI': 57, 'ORLY': 58, 'BBWI': 59, 'JPM': 60, 'NWL': 61, 'A': 62, 'PFG': 63, 'MCHP': 64, 'ORCL': 65, 'NEM': 66, 'HBI': 67, 'AMT': 68, 'CDNS': 69, 'RE': 70, 'ED': 71, 'TDG': 72, 'ARE': 73, 'XEL': 74, 'PHM': 75, 'PEG': 76, 'CME': 77, 'CTSH': 78, 'ES': 79, 'AES': 80, 'GRMN': 81, 'PNW': 82, 'FTNT': 83, 'MO': 84, 'ICE': 85, 'MRO': 86, 'PLD': 87, 'DOV': 88, 'BSX': 89, 'ISRG': 90, 'VLO': 91, 'PPL': 92, 'DFS': 93, 'DUK': 94, 'ADP': 95, 'NVDA': 96, 'MAS': 97, 'ZBH': 98, 'C': 99, 'LEG': 100, 'WU': 101, 'IEX': 102, 'NXPI': 103, 'ANSS': 104, 'ADBE': 105, 'LVS': 106, 'ILMN': 107, 'CRL': 108, 'EXPE': 109, 'IFF': 110, 'DVN': 111, 'CNC': 112, 'WRB': 113, 'VTRS': 114, 'CNP': 115, 'MS': 116, 'LNT': 117, 'ROST': 118, 'CAT': 119, 'AMZN': 120, 'GNRC': 121, 'NTAP': 122, 'KEY': 123, 'PWR': 124, 'PRU': 125, 'HES': 126, 'DD': 127, 'F': 128, 'SBUX': 129, 'MLM': 130, 'WST': 131, 'MPWR': 132, 'IRM': 133, 'NVR': 134, 'JNJ': 135, 'AKAM': 136, 'LKQ': 137, 'REG': 138, 'SEE': 139, 'FISV': 140, 'WYNN': 141, 'PVH': 142, 'DPZ': 143, 'QCOM': 144, 'ACN': 145, 'INTU': 146, 'HSY': 147, 'LUV': 148, 'MHK': 149, 'TJX': 150, 'GWW': 151, 'UHS': 152, 'MSI': 153, 'BK': 154, 'ROL': 155, 'AFL': 156, 'EXR': 157, 'SWKS': 158, 'LH': 159, 'NSC': 160, 'VTR': 161, 'GOOG': 162, 'MSFT': 163, 'BBY': 164, 'AEP': 165, 'XRAY': 166, 'GD': 167, 'CVX': 168, 'HST': 169, 'AVGO': 170, 'WMB': 171, 'SIVB': 172, 'HBAN': 173, 'MKC': 174, 'GM': 175, 'TGT': 176, 'AIZ': 177, 'MDLZ': 178, 'IT': 179, 'MCD': 180, 'IPGP': 181, 'EBAY': 182, 'BAX': 183, 'NKE': 184, 'MGM': 185, 'IBM': 186, 'BKNG': 187, 'CAH': 188, 'RF': 189, 'FRT': 190, 'VZ': 191, 'WM': 192, 'TFC': 193, 'AVB': 194, 'MOS': 195, 'TER': 196, 'CCL': 197, 'DXCM': 198, 'AMGN': 199, 'MKTX': 200, 'CMI': 201, 'AJG': 202, 'PSA': 203, 'MA': 204, 'PPG': 205, 'UAL': 206, 'XOM': 207, 'ALB': 208, 'EMR': 209, 'EQIX': 210, 'CI': 211, 'CE': 212, 'AWK': 213, 'TSN': 214, 'ESS': 215, 'FLT': 216, 'SNA': 217, 'AON': 218, 'LDOS': 219, 'MTCH': 220, 'AAL': 221, 'BWA': 222, 'TPR': 223, 'EMN': 224, 'SO': 225, 'ALGN': 226, 'DE': 227, 'PM': 228, 'DXC': 229, 'SHW': 230, 'CMCSA': 231, 'MAR': 232, 'BAC': 233, 'DRI': 234, 'ZION': 235, 'CMS': 236, 'PKG': 237, 'PKI': 238, 'NI': 239, 'ALL': 240, 'ATVI': 241, 'SJM': 242, 'AIG': 243, 'LYV': 244, 'GLW': 245, 'SYK': 246, 'DLR': 247, 'UDR': 248, 'AXP': 249, 'PH': 250, 'TT': 251, 'EVRG': 252, 'WY': 253, 'LRCX': 254, 'ETN': 255, 'BIIB': 256, 'NUE': 257, 'PGR': 258, 'LMT': 259, 'RJF': 260, 'ODFL': 261, 'V': 262, 'RSG': 263, 'COP': 264, 'JBHT': 265, 'HON': 266, 'PTC': 267, 'SLB': 268, 'HPQ': 269, 'GPN': 270, 'FFIV': 271, 'CHTR': 272, 'CMG': 273, 'BKR': 274, 'ZBRA': 275, 'CLX': 276, 'EXPD': 277, 'TSCO': 278, 'SRE': 279, 'AVY': 280, 'LYB': 281, 'PCAR': 282, 'RTX': 283, 'MPC': 284, 'APD': 285, 'EW': 286, 'PFE': 287, 'EOG': 288, 'L': 289, 'COO': 290, 'SNPS': 291, 'USB': 292, 'FITB': 293, 'DHR': 294, 'CINF': 295, 'TTWO': 296, 'MAA': 297, 'GL': 298, 'RMD': 299, 'CMA': 300, 'INTC': 301, 'HUM': 302, 'BA': 303, 'PNC': 304, 'ULTA': 305, 'GPS': 306, 'NTRS': 307, 'BR': 308, 'K': 309, 'TMUS': 310, 'CSX': 311, 'UAA': 312, 'AMD': 313, 'HRL': 314, 'KLAC': 315, 'FAST': 316, 'ALK': 317, 'MET': 318, 'AMAT': 319, 'MCK': 320, 'RL': 321, 'BLK': 322, 'CBRE': 323, 'CL': 324, 'WDC': 325, 'LHX': 326, 'KMI': 327, 'PEAK': 328, 'TFX': 329, 'CVS': 330, 'KR': 331, 'VMC': 332, 'POOL': 333, 'TAP': 334, 'VRSN': 335, 'LOW': 336, 'DIS': 337, 'HD': 338, 'AMP': 339, 'TXT': 340, 'MTD': 341, 'MDT': 342, 'COST': 343, 'FIS': 344, 'FRC': 345, 'MNST': 346, 'CCI': 347, 'KIM': 348, 'D': 349, 'NEE': 350, 'CBOE': 351, 'TROW': 352, 'IP': 353, 'EL': 354, 'SBAC': 355, 'JNPR': 356, 'GS': 357, 'HAS': 358, 'LUMN': 359, 'GILD': 360, 'PXD': 361, 'MMM': 362, 'HII': 363, 'SWK': 364, 'ATO': 365, 'JCI': 366, 'TYL': 367, 'CAG': 368, 'AAPL': 369, 'CF': 370, 'ECL': 371, 'CPRT': 372, 'CRM': 373, 'EFX': 374, 'ADSK': 375, 'YUM': 376, 'DG': 377, 'DTE': 378, 'PAYX': 379, 'DGX': 380, 'STT': 381, 'CB': 382, 'TECH': 383, 'SPGI': 384, 'WMT': 385, 'VNO': 386, 'URI': 387, 'XYL': 388, 'MCO': 389, 'JKHY': 390, 'KO': 391, 'ITW': 392, 'VRSK': 393, 'HIG': 394, 'FE': 395, 'NOC': 396, 'CHRW': 397, 'KMB': 398, 'HCA': 399, 'AEE': 400, 'HAL': 401, 'RHI': 402, 'HOLX': 403, 'MSCI': 404, 'CPB': 405, 'BMY': 406, 'T': 407, 'MU': 408, 'IDXX': 409, 'NRG': 410, 'J': 411, 'LIN': 412, 'WAB': 413, 'CSCO': 414, 'TEL': 415, 'OXY': 416, 'BXP': 417, 'AZO': 418, 'ABC': 419, 'APA': 420, 'OKE': 421, 'AAP': 422, 'CHD': 423, 'O': 424, 'INCY': 425, 'NDAQ': 426, 'TRV': 427, 'STX': 428, 'WBA': 429, 'RCL': 430, 'ADM': 431, 'ETR': 432, 'STZ': 433, 'MRK': 434, 'MMC': 435, 'MTB': 436, 'AOS': 437, 'IPG': 438, 'DVA': 439, 'TXN': 440, 'BIO': 441}

In [28]:
nasdaq_rkg = [[], [], []]
for head, relation, tail in relation_knowledge_graph:
    if head in nasdaq_c_id and tail in nasdaq_c_id:
        nasdaq_rkg[0].append(nasdaq_c_id[head])
        
        rel_pos = together.index(relation)
        nasdaq_rkg[1].append(rel_pos)

        nasdaq_rkg[2].append(nasdaq_c_id[tail])
nasdaq_rkg[0] = torch.tensor(nasdaq_rkg[0])
nasdaq_rkg[1] = torch.tensor(nasdaq_rkg[1])
nasdaq_rkg[2] = torch.tensor(nasdaq_rkg[2])

with open("nasdaq100_relations_kg.pkl", "wb") as f:
    pickle.dump({'kg': nasdaq_rkg}, f)

In [30]:
sp_rkg = [[], [], []]
for head, relation, tail in relation_knowledge_graph:
    if head in sp500_c_id and tail in sp500_c_id:
        sp_rkg[0].append(sp500_c_id[head])
        
        rel_pos = together.index(relation)
        sp_rkg[1].append(rel_pos)

        sp_rkg[2].append(sp500_c_id[tail])
sp_rkg[0] = torch.tensor(sp_rkg[0])
sp_rkg[1] = torch.tensor(sp_rkg[1])
sp_rkg[2] = torch.tensor(sp_rkg[2])

with open("sp500_relations_kg.pkl", "wb") as f:
    pickle.dump({'kg': sp_rkg}, f)

In [6]:
wiki_relations = {}

with open('wiki_relations_value.pkl', 'rb') as f:
    wiki_relations = pickle.load(f)

In [11]:
print(relation_map)

{'P127': 'Owned By - FO', 'P155': 'Follows - FO', 'P156': 'Followed By - FO', 'P355': 'Subsidiary - FO', 'P749': 'Parent Organisation - FO', 'P31': 'Instance of', 'P366': 'Use', 'P452': 'Industry', 'P1056': 'Product or Material Produced', 'P112': 'Founded By', 'P169': 'CEO', 'P113': 'Airline Hub', 'P114': 'Airline Alliance', 'P121': 'Item Operated', 'P1830': 'Owner of - FO', 'P3320': 'Board Member', 'P166': 'Award received', 'P199': 'Business Division', 'P361': 'Part of', 'P400': 'Platform', 'P2770': 'Source of Income', 'P463': 'Member of', 'P306': 'Operating System', 'P1344': 'Participant of'}


In [25]:
wiki_id_to_node_name_map = {}
non_english_nodes = []

In [30]:
with open('wiki_id_to_node_name_map.pkl', 'rb') as f:
    wiki_id_to_node_name_map = pickle.load(f)['map']
    non_english_nodes = pickle.load(f)['non_english_nodes']

In [32]:
for k, v in wiki_relations.items():
    if v == None:
        continue
    ticker, relation = k
    for nodes_id in v:
        if "Q"+str(nodes_id) in wiki_id_to_node_name_map or nodes_id in non_english_nodes:
            continue
            
        site = pywikibot.Site("wikidata", "wikidata")
        repo = site.data_repository()
        item_dict = pywikibot.ItemPage(repo, "Q"+str(nodes_id)).get()
        claims = item_dict['claims']
        try:
            node_name = item_dict['labels'].toJSON()['en']['value']
        except:
            non_english_nodes.append(nodes_id)
        print(relation, node_name)
        if "Q"+str(nodes_id) not in wiki_id_to_node_name_map:
            wiki_id_to_node_name_map["Q"+str(nodes_id)] = node_name



P127 BlackRock
P127 The Coca-Cola Company
P127 State Street Corporation
P127 T. Rowe Price
P127 Morgan Stanley
P127 Northern Trust
P127 General Mills
P127 JPMorgan Chase
P127 General Electric
P156 Darden Restaurants
P355 Yum! Brands
P749 Altria
P1830 Baker Hughes
P1830 Wabtec
P1830 Netflix


In [34]:
for ticker, wiki_id in ticker_wiki_map.items():
    print(wiki_id)
    if str(wiki_id) in wiki_id_to_node_name_map or wiki_id in non_english_nodes:
        continue
        
    site = pywikibot.Site("wikidata", "wikidata")
    repo = site.data_repository()
    item_dict = pywikibot.ItemPage(repo, str(wiki_id)).get()
    claims = item_dict['claims']
    try:
        node_name = item_dict['labels'].toJSON()['en']['value']
    except:
        non_english_nodes.append(wiki_id)
    print(node_name)
    if str(nodes_id) not in wiki_id_to_node_name_map:
        wiki_id_to_node_name_map[str(nodes_id)] = node_name

Q37158
Starbucks
Q7921370
Verisk Analytics
Q11463
Adobe
Q182477
Nvidia
Q470517
Amgen
Q2743863
Intuitive Surgical
Q624649
KLA Corporation
Q312
Apple
Q7308054
Regeneron Pharmaceuticals
Q14772
Baidu
Q544847
Qualcomm Inc.
Q1486956
Xcel Energy
Q1945295
Monster Beverage
Q1107035
Cognizant
Q173941
Electronic Arts
Q18712620
Walgreens Boots Alliance
Q628051
Autodesk
Q58024
eBay
Q2068984
Illumina
Q196259
NetEase
Q7538146
Skyworks Solutions
Q1835753
Splunk
Q489080
Automatic Data Processing
Q1197548
Micron Technology
Q3884
Amazon
Q1545076
Microchip Technology
Q1155668
NXP Semiconductors
Q2749364
Fortinet
Q17155112
Seagen
Q193412
Texas Instruments
Q17012234
Enphase Energy
Q15109865
Dexcom
Q731938
AstraZeneca
Q734338
Verisign
Q23133268
Match Group
Q2961234
Charter Communications
Q621610
Applied Materials
Q1420513
Fiserv
Q3511885
T-Mobile US
Q2283
Microsoft
Q1092571
Cintas
Q1758392
IDEXX Laboratories, Inc.
Q478214
Tesla, Inc.
Q1141173
Marriott International
Q464092
American Electric Power
Q907311
Q60

In [35]:
with open('wiki_id_to_node_name_map.pkl', 'wb') as f:
    pickle.dump({'map': wiki_id_to_node_name_map, 'non_english_nodes': non_english_nodes}, f)