In [1]:
import numpy as np
import pandas as pd
from datetime import datetime
from time import time
import pickle
import os
from pdb import set_trace

In [4]:
def univ_setup_from_table(big_table_dir, fund_table_dir=None):
    '''
    Read the big_table format cleaned data and set universe base on that
    return type: dictionary, keys are dates in datetime.datetime format
    values are corresponding security information
    '''
    big_table = pd.read_csv(big_table_dir)
    big_table['date'] = pd.to_datetime(big_table['date'])
    if fund_table_dir:
        fund_table = pd.read_csv(fund_table_dir)
        fund_table['date'] = pd.to_datetime(fund_table['date'])
        fund_table['annc_date'] = pd.to_datetime(fund_table['annc_date'])
    
    try:
        big_table.drop('Unnamed: 0', axis=1, inplace=True)
    except:
        pass

    if fund_table_dir:
        ix = np.isnat(fund_table.annc_date.values)
        fund_table.loc[ix, 'annc_date'] = fund_table.loc[ix, 'date'] # fillna of annc_date
        fund_table.drop('date', axis=1, inplace=True)
        fund_table.fillna(0, inplace=True)
        fund_table = fund_table.sort_values('annc_date')
        big_table = pd.merge_asof(big_table, fund_table, left_on='date', right_on='annc_date', by='ticker', tolerance=pd.Timedelta('182d'))

    set_trace()
    datecol = big_table.date.unique()

    N_T = datecol.shape[0]
    subtable = [0] * N_T

    for ti in range(N_T):
        t = datecol[ti]
        subtable[ti] = big_table.loc[big_table.date == t,:]

    return dict(zip(datecol, subtable))

def univ_setup(datadir, version=4, wFund=True):
    print('Setup R3000 universe')
    
    datadir = '/home/derek-qi/Documents/R3000_Data/data/r3000/'
    if os.path.exists(datadir + 'univ_v%d.pkl' % version):
        print('use existing binary file')
        with open(datadir + 'univ_v%d.pkl' % version, 'rb') as univ_fh:
            univ = pickle.load(univ_fh)
    
    else:
        print('construct from csv')
        big_table_dir = datadir + 'big_table_full_v%d.csv' % version
        if wFund:
            fund_table_dir = datadir + 'fund_data_v%d.csv' % version
        else:
            fund_table_dir = None
        univ = univ_setup_from_table(big_table_dir, fund_table_dir)
        with open(datadir + 'univ_v%d.pkl' % version,'wb') as fh:
            pickle.dump(univ, fh)
    
    return univ

In [5]:
datadir = '/home/derek-qi/Documents/R3000_Data/data/r3000/'
univ = univ_setup(datadir, version=5, wFund=True)

Setup R3000 universe
construct from csv
> <ipython-input-4-5059c5fdc630>(28)univ_setup_from_table()
-> datecol = big_table.date.unique()
(Pdb) c


In [6]:
from setup.utils import *

In [7]:
df = get_val(univ, 100)

In [9]:
df.dropna(inplace=True)

In [10]:
df.shape

(977, 22)

In [11]:
df.head()

Unnamed: 0,date,ticker,price,vol10,vol60,vol90,vol360,market_cap,beta,volume,...,gross_profit,f_log_ret_1,f_log_ret_4,in_r3000,gross_profit_ttm,ebitda_ttm,eps_ttm,annc_date,total_debt_to_total_capital,net_debt_to_ebitda
518430,2012-09-04,0948669D UN Equity,13.9018,31.557,28.976,15.059,40.191,486.5558,1.0259,394554.0,...,217.104,0.0794,0.15833,1,936.974,149.347,2.0218,2012-06-28,24.8925,1.1048
518433,2012-09-04,0965087D UN Equity,18.6027,23.693,28.882,22.988,36.324,389.5373,0.8366,60539.0,...,48.003,-0.006369,-0.016531,1,183.786,160.002,2.2628,2012-08-06,83.8292,7.5614
518461,2012-09-04,1284849D UN Equity,86.0256,21.727,21.146,12.4,23.432,25997.6147,0.8712,2139722.0,...,1254.8,0.033245,0.071905,1,4773.3001,1800.6,3.5552,2012-08-01,22.1879,-0.6199
518472,2012-09-04,1377691D UW Equity,13.2626,31.222,32.939,27.625,35.794,125.0381,1.0641,29167.0,...,3.316,0.021712,-0.017295,1,20.736,6.388,0.5666,2012-08-21,0.0,-3.853
518473,2012-09-04,1384851D UW Equity,32.1308,45.575,57.519,19.29,57.306,745.5873,1.0484,107277.0,...,34.255,0.027601,0.032132,1,136.384,34.409,0.78,2012-08-07,0.0,-2.9752


In [12]:
df.columns

Index(['date', 'ticker', 'price', 'vol10', 'vol60', 'vol90', 'vol360',
       'market_cap', 'beta', 'volume', 'div_ratio', 'eps', 'gross_profit',
       'f_log_ret_1', 'f_log_ret_4', 'in_r3000', 'gross_profit_ttm',
       'ebitda_ttm', 'eps_ttm', 'annc_date', 'total_debt_to_total_capital',
       'net_debt_to_ebitda'],
      dtype='object')

In [13]:
univ.keys()

dict_keys([numpy.datetime64('2010-10-05T00:00:00.000000000'), numpy.datetime64('2010-10-12T00:00:00.000000000'), numpy.datetime64('2016-08-23T00:00:00.000000000'), numpy.datetime64('2014-04-29T00:00:00.000000000'), numpy.datetime64('2016-02-09T00:00:00.000000000'), numpy.datetime64('2015-07-28T00:00:00.000000000'), numpy.datetime64('2012-12-25T00:00:00.000000000'), numpy.datetime64('2017-08-15T00:00:00.000000000'), numpy.datetime64('2012-06-12T00:00:00.000000000'), numpy.datetime64('2015-01-13T00:00:00.000000000'), numpy.datetime64('2017-01-31T00:00:00.000000000'), numpy.datetime64('2014-07-01T00:00:00.000000000'), numpy.datetime64('2016-07-19T00:00:00.000000000'), numpy.datetime64('2013-12-17T00:00:00.000000000'), numpy.datetime64('2011-05-17T00:00:00.000000000'), numpy.datetime64('2016-01-05T00:00:00.000000000'), numpy.datetime64('2013-06-04T00:00:00.000000000'), numpy.datetime64('2010-11-02T00:00:00.000000000'), numpy.datetime64('2015-06-23T00:00:00.000000000'), numpy.datetime64('20