# Unique Active Company Signal

This tutorial covers methodology of the most basic company signal using our raw analytics file.  It is reccomended that this be used as a starting point for a more complex signal

In [1]:
#library import
import pandas as pd
import numpy as np
import os
import gc
import datetime
import sqlite3
import pathlib

In [2]:
# Load market reports file.  
UA = pd.read_csv("../../../LinkUp_Data/Market Reports/Post Process/Raw_Quant_Company_Daily_Unique_Active_PP_2019-10-30.zip")
#  Filter for date range you want to product a signal for
UA = UA[UA.day > '2019-01-01']

# Ticker file load

A company id in our system is a unique career portal.  Often times, it makes sense to group these together by ticker or some other measure

In [3]:
# Read File
Ticker = pd.read_csv("../../../LinkUp_Data/Feeds/raw_pit_ticker_full_2019-11-01.csv.gz")

# Get rid of secondary tickers
Ticker = Ticker[Ticker.primary_flag == True]

# Concatenate Ticker information to use as a key and get rid of unneeded columns
Ticker['stock_ticker'] = (Ticker['stock_ticker'] + ':' + 
                          Ticker['stock_exchange_name'] + ':' + 
                          Ticker['stock_exchange_country'])
Ticker = Ticker[['company_id','start_date','end_date','stock_ticker']]

# Deal with null values
Ticker['start_date'] = Ticker.start_date.fillna('2007-01-01')
Ticker['end_date'] = Ticker.end_date.fillna(
    str(datetime.datetime.now().year)+'-'+
    str(datetime.datetime.now().month)+'-'+
    str(datetime.datetime.now().day))

# Get rob record counts by Ticker

In [4]:
conn = sqlite3.connect(':memory:')

# Write the tables
UA.to_sql('UA', conn, index=False)
Ticker.to_sql('Ticker', conn, index=False)
UA = None
Ticker = None
gc.collect()
# Query and create new joined table
qry = '''
    SELECT 
        UA.*,
        Ticker.stock_ticker   
     
    FROM UA
    
    LEFT JOIN Ticker
        ON (UA.day between Ticker.start_date and Ticker.end_date and
               UA.company_id = Ticker.company_id)
       
    '''
UA = pd.read_sql_query(qry, conn)

In [5]:
UA.head(100)

Unnamed: 0,day,company_id,company_name,unique_active_job_count,stock_ticker
0,2019-01-02,1,Target,9446,TGT:NYS:US
1,2019-01-03,1,Target,9195,TGT:NYS:US
2,2019-01-04,1,Target,9005,TGT:NYS:US
3,2019-01-05,1,Target,8941,TGT:NYS:US
4,2019-01-06,1,Target,8908,TGT:NYS:US
...,...,...,...,...,...
95,2019-04-07,1,Target,11418,TGT:NYS:US
96,2019-04-08,1,Target,11578,TGT:NYS:US
97,2019-04-09,1,Target,11106,TGT:NYS:US
98,2019-04-10,1,Target,11268,TGT:NYS:US


# Filter out companies that have never had a day with more than 1000 active jobs

In [6]:
UA = UA[['day','stock_ticker','unique_active_job_count']].groupby(['day','stock_ticker']).sum().reset_index()
Filter = (UA[['stock_ticker','unique_active_job_count']].groupby('stock_ticker').max()>=1000).reset_index()
Filter = Filter[Filter['unique_active_job_count']==True].stock_ticker
UA = UA[UA.stock_ticker.isin(Filter)].reset_index(drop=True)

# Conduct calculations for % change over 90 days

In [7]:
UA_t = UA.pivot(index = 'day', columns = 'stock_ticker', values = 'unique_active_job_count')
UA_s = UA_t.shift(periods = 90)
UA_f = UA_t/UA_s

UA_t = None
UA_s = None

# This function is used to get the top and bottom in terms of job growth

In [8]:
def get_top_n(prev_returns, top_n):
    top_stocks = pd.DataFrame(0, index=prev_returns.index, columns=prev_returns.columns)
    
    for index, row in prev_returns.iterrows():
        top_performers = row.nlargest(top_n).index
        top_stocks.loc[index, top_performers] = 1
    
    return top_stocks

In [9]:
# Get top and bottom 10 companies by job growth
top = get_top_n(UA_f, 10)
bottom = get_top_n(UA_f.replace(0,50) * -1, 10)

# Put into usable format and print daily signal

In [10]:
top_unpivoted = top.reset_index().melt(id_vars=['day'], 
                                       var_name='stock_ticker', 
                                       value_name='Top')
top_unpivoted = top_unpivoted[top_unpivoted.Top==1].sort_values('day')

bottom_unpivoted = bottom.reset_index().melt(id_vars=['day'], 
                                       var_name='stock_ticker', 
                                       value_name='Bottom')
bottom_unpivoted = bottom_unpivoted[bottom_unpivoted.Bottom==1].sort_values('day')

# Here you see top 10 and bottom 10 tickers in terms of growth

In [11]:
top_unpivoted.tail(25)

Unnamed: 0,day,stock_ticker,Top
83322,2019-10-29,L:TSE:CA,1
77565,2019-10-29,JMT:LIS:PT,1
76959,2019-10-29,JHTXQ:OTC:US,1
139680,2019-10-29,WW:NAS:US,1
137862,2019-10-29,WKL:AMS:NL,1
137863,2019-10-30,WKL:AMS:NL,1
139681,2019-10-30,WW:NAS:US,1
110896,2019-10-30,RT:NYS:US,1
106654,2019-10-30,QSP:TSE:CA,1
103927,2019-10-30,PMC:NYS:US,1


In [12]:
bottom_unpivoted.tail(25)

Unnamed: 0,day,stock_ticker,Bottom
28782,2019-10-29,BMW:ETR:DE,1
89079,2019-10-29,MAN:NYS:US,1
23631,2019-10-29,BCOLOMBIA:BOG:CO,1
111501,2019-10-29,RUE:NAS:US,1
111198,2019-10-29,RTW:NYS:US,1
12421,2019-10-30,AGN:AMS:NL,1
89080,2019-10-30,MAN:NYS:US,1
111199,2019-10-30,RTW:NYS:US,1
53932,2019-10-30,EHC:NYS:US,1
37570,2019-10-30,CINE:LON:GB,1
