In [1]:
# Import Modules 
import pandas as pd
import numpy as np
import math
import datetime
import statistics

import os
from tqdm import tqdm
from tqdm import trange

from linearmodels.panel import PanelOLS
from linearmodels.panel import RandomEffects
from linearmodels.panel import FamaMacBeth
from linearmodels.panel import PooledOLS
import statsmodels.api as sm  # Adding constant
import statsmodels.formula.api as smf

import matplotlib.pyplot as plt
%matplotlib inline
%config InlineBackend.figure_format='retina'
# from jupyterthemes import jtplot
# jtplot.style()

import warnings
warnings.filterwarnings('ignore')

import jupyternotify
ip = get_ipython()
ip.register_magics(jupyternotify.JupyterNotifyMagics(ip))

# from notify_run import Notify
# notify = Notify()
# notify.register()

<IPython.core.display.Javascript object>

In [2]:
def _portfolio(time):
    A_1 = os.listdir('/Volumes/LaCie/Grouping_1/Post_Phase_1/A_1/2014-01-14')
    N_1 = os.listdir('/Volumes/LaCie/Grouping_1/Post_Phase_1/N_1/2014-01-14')

    A_2 = os.listdir('/Volumes/LaCie/Grouping_1/Post_Phase_2/A_2/2014-07-22')
    B_12 = os.listdir('/Volumes/LaCie/Grouping_1/Post_Phase_2/B_12/2014-07-22')
    O_2 = os.listdir('/Volumes/LaCie/Grouping_1/Post_Phase_2/O_2/2014-07-22')
    N_2 = os.listdir('/Volumes/LaCie/Grouping_1/Post_Phase_2/N_2/2014-07-22')
    
    def _portsum(group,time,path,groupname,phasename):
        port =[]
        for fpathe, dirs,fs in os.walk(path):
            for f in fs:
                if f[-9:] in group:
                    port.append(os.path.join(fpathe,f))

        port_ret = pd.DataFrame([])
        port_vol = pd.DataFrame([])
        port_YenVol = pd.DataFrame([])
        port_ilqd = pd.DataFrame([])
        port_oib_num = pd.DataFrame([])
        port_oib_shr = pd.DataFrame([])
        port_oib_yen = pd.DataFrame([])
        for i in port:
            df = pd.read_csv(i)[['DateTime','Code',
                                 'Return-%s'%time,'Volume-%s'%time,'Volume Yen-%s'%time, 
                                 'Illiquidity-%s'%time, 
                                 'OIB #-%s'%time, 'OIB shr-%s'%time, 'OIB Yen-%s'%time]]
            df = df.set_index('DateTime')

            df_ret = pd.DataFrame({i[-9:-4]: df['Return-%s'%time]})
            df_vol = pd.DataFrame({i[-9:-4]: df['Volume-%s'%time]})
            df_YenVol = pd.DataFrame({i[-9:-4]: df['Volume Yen-%s'%time]})
            df_ilqd = pd.DataFrame({i[-9:-4]: df['Illiquidity-%s'%time]})
            df_oib_num = pd.DataFrame({i[-9:-4]: df['OIB #-%s'%time]})
            df_oib_shr = pd.DataFrame({i[-9:-4]: df['OIB shr-%s'%time]})
            df_oib_yen = pd.DataFrame({i[-9:-4]: df['OIB Yen-%s'%time]})

            port_ret = pd.concat([port_ret, df_ret], axis = 1)
            port_vol = pd.concat([port_vol, df_vol], axis = 1)
            port_YenVol = pd.concat([port_YenVol, df_YenVol], axis = 1)
            port_ilqd = pd.concat([port_ilqd, df_ilqd], axis = 1)
            port_oib_num = pd.concat([port_vol, df_oib_num], axis = 1)
            port_oib_shr = pd.concat([port_YenVol, df_oib_shr], axis = 1)
            port_oib_yen = pd.concat([port_ilqd, df_oib_yen], axis = 1)
            

        port_ret['Portfolio Return']=np.nanmean(port_ret,axis=1)
        port_vol['Portfolio Volume']=np.nanmean(port_vol,axis=1)
        port_YenVol['Portfolio Volume Yen']=np.nanmean(port_YenVol,axis=1)
        port_ilqd['Portfolio Illiquidity']=np.nanmean(port_ilqd,axis=1)
        port_oib_num['Portfolio OIB #']=np.nanmean(port_oib_num,axis=1)
        port_oib_shr['Portfolio OIB shr']=np.nanmean(port_oib_shr,axis=1)
        port_oib_yen['Portfolio OIB Yen']=np.nanmean(port_oib_yen,axis=1)

        savepath = '/Volumes/LaCie/5_4/reg_Portfolio/%s/%s/%s/%s'%(
                                                    time,groupname,phasename,path[-10:])
        if not os.path.exists(savepath):
            os.makedirs(savepath)
        pd.DataFrame(port_ret).to_csv('%s/Return.csv'%(savepath))
        pd.DataFrame(port_vol).to_csv('%s/Volume.csv'%(savepath))
        pd.DataFrame(port_YenVol).to_csv('%s/Volume Yen.csv'%(savepath))
        pd.DataFrame(port_ilqd).to_csv('%s/Illiquidity.csv'%(savepath))
        pd.DataFrame(port_oib_num).to_csv('%s/OIB #.csv'%(savepath))
        pd.DataFrame(port_oib_shr).to_csv('%s/OIB shr.csv'%(savepath))
        pd.DataFrame(port_oib_yen).to_csv('%s/OIB Yen.csv'%(savepath))

    for i in ['Pre_Phase_1','Post_Phase_1','Post_Phase_2']:
        basepath = '/Volumes/LaCie/Processed/%s'%i
        paths  = os.listdir(basepath)
        pbar = tqdm(paths)
        for pth in pbar:
            if pth[-5:] != 'Store':
                path = basepath +'/'+pth

                if i == 'Pre_Phase_1':
                    _portsum(A_1,time,path,'A1',i)
                    _portsum(N_1,time,path,'N1',i)
                    pbar.set_description("Processing %s"%(pth))
                elif i == 'Post_Phase_1':
                    _portsum(A_1,time,path,'A1',i)
                    _portsum(N_1,time,path,'N1',i)
                    
                    _portsum(A_2,time,path,'A2','Pre_Phase_2')
                    _portsum(N_2,time,path,'N2','Pre_Phase_2')
                    _portsum(O_2,time,path,'O2','Pre_Phase_2')
                    _portsum(B_12,time,path,'B12','Pre_Phase_2')
                    pbar.set_description("Processing %s"%(pth))
                elif i == 'Post_Phase_2':
                    _portsum(A_2,time,path,'A2',i)
                    _portsum(N_2,time,path,'N2',i)
                    _portsum(O_2,time,path,'O2',i)
                    _portsum(B_12,time,path,'B12',i)
                    pbar.set_description("Processing %s"%(pth))

In [3]:
%%notify
for t in ['10','15','30','45','60']:
    _portfolio(t)

Processing 2014-01-10: 100%|██████████| 131/131 [06:11<00:00,  2.84s/it]
Processing 2014-07-18: 100%|██████████| 130/130 [14:59<00:00,  6.92s/it]
Processing 2014-12-30: 100%|██████████| 111/111 [08:45<00:00,  4.73s/it]
Processing 2014-01-10: 100%|██████████| 131/131 [06:12<00:00,  2.84s/it]
Processing 2014-07-18: 100%|██████████| 130/130 [14:54<00:00,  6.88s/it]
Processing 2014-12-30: 100%|██████████| 111/111 [08:13<00:00,  4.45s/it]
Processing 2014-01-10: 100%|██████████| 131/131 [06:33<00:00,  3.00s/it]
Processing 2014-07-18: 100%|██████████| 130/130 [14:51<00:00,  6.86s/it]
Processing 2014-12-30: 100%|██████████| 111/111 [08:12<00:00,  4.44s/it]
Processing 2014-01-10: 100%|██████████| 131/131 [06:05<00:00,  2.79s/it]
Processing 2014-07-18: 100%|██████████| 130/130 [14:26<00:00,  6.67s/it]
Processing 2014-12-30: 100%|██████████| 111/111 [08:16<00:00,  4.47s/it]
Processing 2014-01-10: 100%|██████████| 131/131 [06:04<00:00,  2.78s/it]
Processing 2014-07-18: 100%|██████████| 130/130 [13

<IPython.core.display.Javascript object>

In [4]:
def _portsum(time):
    from pathlib import Path
    
    grouppath = []
    basepath = '/Volumes/LaCie/5_4/reg_Portfolio/%s'%time
    for group in os.listdir(basepath):
        if os.path.isdir(os.path.join(basepath, group)):
            grouppath.append(basepath +'/'+ group)
            
    phasepath = []
    for i in grouppath:
        for phase in os.listdir(i):
            if os.path.isdir(os.path.join(i, phase)):
                phasepath.append(i +'/'+ phase)
                
    datepath = []
    for i in phasepath:
        for date in os.listdir(i):
            if os.path.isdir(os.path.join(i, date)):
                datepath.append(i +'/'+ date)

    pbar = tqdm(datepath)
    for i in pbar:
        files = []
        for fpathe, dirs,fs in os.walk(i):
            for f in fs:
                if f[-5:]!= 'Store':
                    files.append(os.path.join(fpathe,f))
        var = pd.DataFrame([])
        for f in files:
            if f[-9:] != 'folio.csv':
                tmp = (pd.read_csv(f).iloc[:,[0,-1]]).set_index('DateTime')
                var = pd.concat([var,tmp],axis=1)
                var.to_csv('%s/reg_Portfolio.csv'%i)
                pbar.set_description("Processing %s"%(i))

In [5]:
%%notify
for t in ['10','15','30','45','60']:
    _portsum(t)

Processing /Volumes/LaCie/5_4/reg_Portfolio/10/O2/Pre_Phase_2/2014-07-18: 100%|██████████| 1474/1474 [03:28<00:00,  7.06it/s] 
Processing /Volumes/LaCie/5_4/reg_Portfolio/15/O2/Pre_Phase_2/2014-07-18: 100%|██████████| 1474/1474 [03:21<00:00,  7.31it/s] 
Processing /Volumes/LaCie/5_4/reg_Portfolio/30/O2/Pre_Phase_2/2014-07-18: 100%|██████████| 1474/1474 [03:27<00:00,  7.10it/s] 
Processing /Volumes/LaCie/5_4/reg_Portfolio/45/O2/Pre_Phase_2/2014-07-18: 100%|██████████| 1474/1474 [03:31<00:00,  6.96it/s] 
Processing /Volumes/LaCie/5_4/reg_Portfolio/60/O2/Pre_Phase_2/2014-07-18: 100%|██████████| 1474/1474 [03:12<00:00,  7.65it/s] 


<IPython.core.display.Javascript object>

In [6]:
def _wholeperiod(time):
    def _groupsum(files,groupname,phasename,time):
        grp_phs_files = []
        for f in files:
            if (groupname in f) & (phasename in f):
                grp_phs_files.append(f)

        pan = pd.DataFrame([])

        for f in grp_phs_files:
            df = pd.read_csv(f).set_index('DateTime')
            pan = pd.concat([pan, df], axis=0)

        path = '/Volumes/LaCie/5_4/reg_Panel/%s'%time
        if not os.path.exists(path):
            os.makedirs(path)
        pan.to_csv('%s/%s_%s.csv'%(path, groupname, phasename))
    
    files = []
    path = '/Volumes/LaCie/5_4/reg_Portfolio/%s'%time
    for fpathe, dirs,fs in os.walk(path):
        for f in fs: 
            if f[-9:] == 'folio.csv':
                files.append(os.path.join(fpathe,f))

    A1_pre1 = _groupsum(files,'A1','Pre_Phase_1',time)
    A1_post1 = _groupsum(files,'A1','Post_Phase_1',time)
    N1_pre1 = _groupsum(files,'N1','Pre_Phase_1',time)
    N1_post1 = _groupsum(files,'N1','Post_Phase_1',time)

    A2_pre2 = _groupsum(files,'A2','Pre_Phase_2',time)
    A2_post2 = _groupsum(files,'A2','Post_Phase_2',time)
    N2_pre2 = _groupsum(files,'N2','Pre_Phase_2',time)
    N2_post2 = _groupsum(files,'N2','Post_Phase_2',time)
    B12_pre2 = _groupsum(files,'B12','Pre_Phase_2',time)
    B12_post2 = _groupsum(files,'B12','Post_Phase_2',time)
    O2_pre2 = _groupsum(files,'O2','Pre_Phase_2',time)
    O2_post2 = _groupsum(files,'O2','Post_Phase_2',time)

In [7]:
%%notify
for t in ['10','15','30','45','60']:
    _wholeperiod(t)

<IPython.core.display.Javascript object>

In [5]:
def _tsplot(style,time,group,phase,groupname,phasename,ylim):
    tmp_pre = pd.read_csv(
        '/Volumes/LaCie/5_4/reg_Panel/%s/%s_Pre_%s.csv'%(time,group,phase)
        )['Portfolio Illiquidity']
    tmp_post = pd.read_csv(
        '/Volumes/LaCie/5_4/reg_Panel/%s/%s_Post_%s.csv'%(time,group,phase)
        )['Portfolio Illiquidity']
    tmp_post_ = pd.concat([pd.DataFrame([np.nan]*len(tmp_pre)), tmp_post], 
                         axis = 0, ignore_index=True)
    plt.style.use(style)
    plt.figure(figsize=(20,9))
    plt.title('%s - %s'%(groupname,phasename))
    plt.ylim(0, ylim)
    # plt.xticks(np.arange(len(price)), price['Date-Time'])
    plt.plot(tmp_pre, alpha=0.5, linewidth=0.2)
    plt.plot(tmp_post_,  alpha=0.5, linewidth=0.2)
    plt.hlines(np.nanmean(tmp_pre), 0, len(tmp_pre),  linestyles='solid')
    plt.hlines(np.nanmean(tmp_post_), len(tmp_pre), len(tmp_post_), linestyles='solid')
    plt.savefig('/Volumes/LaCie/5_4/ts_plot_%s_%s.png'%(group,time), orientation='portrait')
#     plt.show()

In [9]:
style_list = ['seaborn-dark', 'seaborn-darkgrid', 'seaborn-ticks', 'seaborn-whitegrid', 
              'seaborn-talk', 'seaborn-dark-palette', 'seaborn-bright', 'seaborn-pastel', 
              'seaborn-notebook', 'seaborn-white', 'seaborn-poster', 'seaborn-deep',
              'seaborn-colorblind', 'seaborn-muted', 'seaborn-paper', 'seaborn',
              'fivethirtyeight', 'classic', '_classic_test', 'fast','grayscale', 'ggplot', 
              'Solarize_Light2', 'bmh', 'tableau-colorblind10', 
              'dark_background']

In [None]:
%%notify
pbar = tqdm(['05','10','15','30','45','60'])
for t in pbar:
# t = '15'
    _tsplot('seaborn-deep',t,'A1','Phase_1','Group A','Phase 1 - %smin'%t,0.2)
    _tsplot('seaborn-deep',t,'N1','Phase_1','Group B','Phase 1 - %smin'%t,0.8)
    _tsplot('seaborn-deep',t,'A2','Phase_2','Group C','Phase 2 - %smin'%t,0.6)
    _tsplot('seaborn-deep',t,'N2','Phase_2','Group D','Phase 2 - %smin'%t,0.1)
    _tsplot('seaborn-deep',t,'O2','Phase_2','Group E','Phase 2 - %smin'%t,0.8)
    _tsplot('seaborn-deep',t,'B12','Phase_2','Group F','Phase 2 - %smin'%t,0.08)