#  The purpose of this document is to learn how to query and navigate the reference files for information.  When you go to train a model, or develop a signal, these files will be joined with job records, dropping reference materials with no job records.

# Import reference files

1. PIT Ticker
2. PIT Company Reference
3. Scrape Log

In [7]:
import pandas as pd

# Choose a ticker to look at
Ticker = 'AMZN'
Company_name = 'Amazon'

In [8]:
PIT_Ticker = pd.read_csv('../raw-sample/raw_pit_ticker_full.csv.gz')
PIT_Company = pd.read_csv('../raw-sample/raw_pit_company_reference_full.csv.gz')
ScrapeLog = pd.read_csv('../raw-sample/raw_company_scrape_log_full.csv.gz')

# Look up Company ID and other information based on company name

In [9]:
PIT_Company[PIT_Company['company_name'].str.contains(Company_name, case = False)==True]

Unnamed: 0,company_id,start_date,end_date,company_name,company_url,lei,open_perm_id,naics_code
411,469,2016-02-24,,"Amazon.com, Inc.",http://www.amazon.com,ZXTILKJKG63JELOEG630,4295905494.0,454110.0
19413,19202,2016-02-24,,Amazon Lab126,http://www.lab126.com,ZXTILKJKG63JELOEG630,4295905494.0,541715.0
34088,33625,2016-02-24,,"Amazon.com, Inc.",http://amazon.com,ZXTILKJKG63JELOEG630,4295905494.0,454113.0
41695,41053,2016-11-29,,Amazon,http://www.amazondelivers.jobs,,4296241666.0,454110.0
58218,58103,2018-11-21,,Amazon Contracted Delivery Service Partner,http://logistics.amazon.com,,,
59639,59981,2019-04-30,,Amazon AWS - Technical,http://aws.amazon.com,,,
59671,60064,2019-05-03,2019-05-07,Amazon Flex,http://flex.amazon.com,,,
59672,60064,2019-05-08,,Amazon Flex,http://flex.amazon.com,,4295905494.0,454110.0
59673,60065,2019-05-03,2019-05-07,Amazon HVH,http://amazon.force.com,,,
59674,60065,2019-05-08,,Amazon HVH,http://amazon.force.com,,,454110.0


# Look up Company Ids based on Ticker

In [10]:
Company_IDs = PIT_Ticker[PIT_Ticker['company_id'].isin(
    PIT_Ticker[PIT_Ticker['stock_ticker']==Ticker]['company_id'].unique())]['company_id'].unique()
Company_IDs

array([  227,   469,   945,  2868, 21652, 25784, 33164, 33625, 35288,
       35368, 41053, 42767])

# Look for all scrape changes based Company IDs

In [14]:
ScrapeLog[(ScrapeLog['company_id'].isin(Company_IDs)) & (ScrapeLog['scrape_changed']==True)].head(10)

Unnamed: 0,company_id,date,scrape_run_complete,scrape_changed
158254,227,2012-05-07,True,True
158258,227,2012-05-14,True,True
158266,227,2012-05-29,True,True
158361,227,2013-01-14,True,True
158560,227,2014-08-13,True,True
158742,227,2019-04-17,True,True
158792,227,2019-07-29,True,True
295589,469,2010-05-17,False,True
295590,469,2010-05-18,False,True
295737,469,2011-08-02,False,True


# Look up company reference file with Company IDs

In [15]:
PIT_Company[PIT_Company['company_id'].isin(Company_IDs)]

Unnamed: 0,company_id,start_date,end_date,company_name,company_url,lei,open_perm_id,naics_code
217,227,2016-02-24,,ACME Tools,http://www.acmetool.com,,,
411,469,2016-02-24,,"Amazon.com, Inc.",http://www.amazon.com,ZXTILKJKG63JELOEG630,4295905494.0,454110.0
906,945,2017-06-02,,"Zappos.com, Inc.",http://www.zappos.com,,4296946300.0,448210.0
2923,2868,2016-02-24,,Whole Foods Market,http://www.wholefoods.com,,4295908472.0,
21667,21652,2016-02-24,2016-06-28,Justin.TV,http://www.justin.tv,,5042932082.0,517311.0
21668,21652,2016-06-28,2017-02-12,Twitch,http://www.justin.tv,,5042932082.0,517311.0
21669,21652,2017-02-13,,Twitch,http://www.twitch.tv,,5042932082.0,517311.0
25958,25784,2016-02-24,,Kiva Systems,http://www.kivasystems.com,,4297527928.0,
33622,33164,2016-02-24,,Souq.com,http://www.souq.com,,5050292230.0,448120.0
34088,33625,2016-02-24,,"Amazon.com, Inc.",http://amazon.com,ZXTILKJKG63JELOEG630,4295905494.0,454113.0


# Look up all history for companies that have ever had a specified ticker

In [17]:
PIT_Ticker[PIT_Ticker['company_id'].isin(PIT_Ticker[PIT_Ticker['stock_ticker']==Ticker]['company_id'].unique())]

Unnamed: 0,company_id,start_date,end_date,stock_ticker,stock_exchange_country,stock_exchange_name,primary_flag
442,227,1999-11-09,,AMZN,CL,SGO,False
443,227,1999-11-09,,AMZN,IT,MIL,False
444,227,1999-11-09,,AMZ,DE,ETR,False
445,227,1999-11-09,,0R1O,GB,LON,False
446,227,1999-11-09,,AMZN,CH,SWX,False
447,227,1999-11-09,,AMZN,CO,BOG,False
448,227,1999-11-09,,AMZN,MX,MEX,False
449,227,1999-11-09,,AMZN,PE,LIM,False
450,227,1999-11-09,,AMZN,US,NAS,True
451,227,1999-11-09,,AMZN,AT,WBO,False


# Look up Ticker history based on Company ID

In [18]:
company_id = 469

In [19]:
PIT_Ticker[PIT_Ticker['company_id'] == company_id]

Unnamed: 0,company_id,start_date,end_date,stock_ticker,stock_exchange_country,stock_exchange_name,primary_flag
1031,469,,,AMZN,CL,SGO,False
1032,469,,,AMZN,IT,MIL,False
1033,469,,,AMZ,DE,ETR,False
1034,469,,,0R1O,GB,LON,False
1035,469,,,AMZN,CH,SWX,False
1036,469,,,AMZN,CO,BOG,False
1037,469,,,AMZN,MX,MEX,False
1038,469,,,AMZN,PE,LIM,False
1039,469,,,AMZN,AT,WBO,False
1040,469,1997-05-15,,AMZN,US,NAS,True
