In [1]:
# Import modules

import pandas as pd
import numpy as np
import math
import datetime
import statistics

import os
from tqdm import tqdm
from tqdm import trange

import matplotlib.pyplot as plt
%matplotlib inline
%config InlineBackend.figure_format='retina'
from jupyterthemes import jtplot
jtplot.style()

import warnings
warnings.filterwarnings('ignore')

import jupyternotify
ip = get_ipython()
ip.register_magics(jupyternotify.JupyterNotifyMagics(ip))

<IPython.core.display.Javascript object>

In [2]:
# Number of trading days 
basepaths = ['/Volumes/LaCie/Grouping_1/Pre_Phase_1/A_1',
             '/Volumes/LaCie/Grouping_1/Pre_Phase_1/N_1',
             '/Volumes/LaCie/Grouping_1/Post_Phase_1/A_1',
             '/Volumes/LaCie/Grouping_1/Post_Phase_1/N_1',
             '/Volumes/LaCie/Grouping_1/Pre_Phase_2/A_2',
             '/Volumes/LaCie/Grouping_1/Pre_Phase_2/O_2',
             '/Volumes/LaCie/Grouping_1/Pre_Phase_2/B_12',
             '/Volumes/LaCie/Grouping_1/Pre_Phase_2/N_2',
             '/Volumes/LaCie/Grouping_1/Post_Phase_2/A_2',
             '/Volumes/LaCie/Grouping_1/Post_Phase_2/O_2',
             '/Volumes/LaCie/Grouping_1/Post_Phase_2/B_12',
             '/Volumes/LaCie/Grouping_1/Post_Phase_2/N_2']
trdt_smry = pd.DataFrame({'Group':[],'Num':[]})
group = []; num = []
for i in basepaths:
    dates = []
    for date in os.listdir(i):
        if os.path.isdir(os.path.join(i, date)):
            dates.append(i +'/'+ date)
    group.append(i[26:])
    num.append(len(dates))
trdt_smry['Group'] = group; trdt_smry['Num'] = num
# trdt_smry

In [5]:
def _tradesize(files):
    daily_trdsz = pd.DataFrame({'DateTime':[],'Code':[],
                                'Volume':[],'Volume Yen':[],'Transactions':[],
                                'Trade Size_Avg':[],'Trade Size_Med':[],
                                'Yen Trade Size_Avg':[],'Yen Trade Size_Med':[]})
    date = []; code = []
    vol = []; vol_y = []; trans_num = []
    trdsz_Avg = []; trdsz_Med = []; trdsz_Y_Avg = []; trdsz_Y_Med = []
    
    pbar = tqdm(files)
    for f in pbar:
        df = pd.read_csv(f)[['Issue code','Date','Time','Price','Trading volume']]
        df = df.rename(columns={"Issue code":"Code", 'Trading volume':'Volume'})
        
        c = pd.Series(map(int.__add__, df.iloc[:,2], (df.iloc[:,1])*10**9))
        col_1 = pd.to_datetime(c, format='%Y%m%d%H%M%S%f', errors='ignore')
        df['DateTime'] = col_1
        df['Volume Yen'] = df['Price']*df['Volume']
        
        date.append(df['DateTime'][0])
        code.append(df['Code'][0])
        
        df_am = df[(df['DateTime']>=datetime.datetime(int(f[-20:-16]),int(f[-15:-13]),int(f[-12:-10]),9,0,0)) &
                   (df['DateTime']<datetime.datetime(int(f[-20:-16]),int(f[-15:-13]),int(f[-12:-10]),11,30,0))]
        df_am.reset_index(inplace = True, drop = True)
        df_pm = df[(df['DateTime']>=datetime.datetime(int(f[-20:-16]),int(f[-15:-13]),int(f[-12:-10]),12,30,0)) & 
                   (df['DateTime']<datetime.datetime(int(f[-20:-16]),int(f[-15:-13]),int(f[-12:-10]),15,0,0))]
        df_pm.reset_index(inplace = True, drop = True)
        
        df_am = df_am.drop([0])
        df_pm = df_pm.drop([0])

        df = pd.concat([df_am, df_pm], axis =0)
        df.reset_index(inplace = True, drop = True)
        df = df.drop(columns=['Date','Time'])
        
        vol.append(np.nansum(df['Volume']))
        vol_y.append(np.nansum(df['Volume Yen']))
        trans_num.append(len(df)-2)
        trdsz_Avg.append(np.nanmean(df['Volume']))
        trdsz_Med.append(np.nanmedian(df['Volume']))
        trdsz_Y_Avg.append(np.nanmean(df['Volume Yen']))
        trdsz_Y_Med.append(np.nanmean(df['Volume Yen']))
        
    daily_trdsz['DateTime'] = date
    daily_trdsz['Code'] = code
    daily_trdsz['Volume'] = vol
    daily_trdsz['Volume Yen'] = vol_y
    daily_trdsz['Transactions'] = trans_num
    daily_trdsz['Trade Size_Avg'] = trdsz_Avg
    daily_trdsz['Trade Size_Med'] = trdsz_Med
    daily_trdsz['Yen Trade Size_Avg'] = trdsz_Y_Avg
    daily_trdsz['Yen Trade Size_Med'] = trdsz_Y_Med

    path = '/Volumes/LaCie/Sum_Stat/'
    if not os.path.exists(path):
        os.makedirs(path)
    pd.DataFrame(daily_trdsz).to_csv('%s/Trade_Summary.csv'%(path), index=False)

In [3]:
files = []
path_13 = '/Volumes/LaCie/Transactions_2013'
for fpathe, dirs,fs in os.walk(path_13):
    for f in fs:
        if f[-5:]!= 'Store':
            files.append(os.path.join(fpathe,f))
path_14 = '/Volumes/LaCie/Transactions_2014'
for fpathe, dirs,fs in os.walk(path_14):
    for f in fs:
        if f[-5:]!= 'Store':
            files.append(os.path.join(fpathe,f))

In [6]:
_tradesize(files)

100%|██████████| 34307/34307 [23:56<00:00, 23.89it/s]


In [2]:
trd_sum = pd.read_csv('/Volumes/LaCie/Sum_Stat/Trade_Summary.csv')

In [3]:
dt = []
for i in trange(len(trd_sum)):
    trd_sum['DateTime'][i] = trd_sum['DateTime'][i][:-4]
    trd_sum['DateTime'][i] = datetime.datetime.strptime(trd_sum['DateTime'][i], 
                                                        '%Y-%m-%d %H:%M:%S')

100%|██████████| 34307/34307 [30:04<00:00, 19.02it/s]


In [16]:
pre_1 = trd_sum[trd_sum['DateTime'] <= datetime.datetime(2014,1,14,8,0,0)]
post_1 = trd_sum[(trd_sum['DateTime'] > datetime.datetime(2014,1,14,8,0,0)) & 
                 (trd_sum['DateTime'] <= datetime.datetime(2014,7,22,8,0,0))] 
post_2 = trd_sum[trd_sum['DateTime'] > datetime.datetime(2014,7,22,8,0,0)] 

In [31]:
trd_sum_stat = pd.DataFrame({'Info':[],'Pre_1':[],'Post_1':[],'Post_2':[],'Overall':[]})
info = ['Daily Volume Avg',
        'Daily Volume Med',
        'Daily Yen Avg',
        'Daily Yen Med',
        'Transactions Avg',
        'Transactions Med',
        'Trade Size Avg',
        'Trade Size Med',
        'Yen TrdSz Avg',
        'Yen TrdSz Med'
        ]

pre_1_stat = [
    np.nanmean(pre_1['Volume']),
    np.nanmedian(pre_1['Volume']),
    np.nanmean(pre_1['Volume Yen']),
    np.nanmedian(pre_1['Volume Yen']),
    np.nanmean(pre_1['Transactions']),
    np.nanmedian(pre_1['Transactions']),
    np.nanmean(pre_1['Trade Size_Avg']),
    np.nanmedian(pre_1['Trade Size_Med']),
    np.nanmean(pre_1['Yen Trade Size_Avg']),
    np.nanmedian(pre_1['Yen Trade Size_Med'])]

post_1_stat = [
    np.nanmean(post_1['Volume']),
    np.nanmedian(post_1['Volume']),
    np.nanmean(post_1['Volume Yen']),
    np.nanmedian(post_1['Volume Yen']),
    np.nanmean(post_1['Transactions']),
    np.nanmedian(post_1['Transactions']),
    np.nanmean(post_1['Trade Size_Avg']),
    np.nanmedian(post_1['Trade Size_Med']),
    np.nanmean(post_1['Yen Trade Size_Avg']),
    np.nanmedian(post_1['Yen Trade Size_Med'])]

post_2_stat = [
    np.nanmean(post_2['Volume']),
    np.nanmedian(post_2['Volume']),
    np.nanmean(post_2['Volume Yen']),
    np.nanmedian(post_2['Volume Yen']),
    np.nanmean(post_2['Transactions']),
    np.nanmedian(post_2['Transactions']),
    np.nanmean(post_2['Trade Size_Avg']),
    np.nanmedian(post_2['Trade Size_Med']),
    np.nanmean(post_2['Yen Trade Size_Avg']),
    np.nanmedian(post_2['Yen Trade Size_Med'])]

overall_stat = [
    np.nanmean(trd_sum['Volume']),
    np.nanmedian(trd_sum['Volume']),
    np.nanmean(trd_sum['Volume Yen']),
    np.nanmedian(trd_sum['Volume Yen']),
    np.nanmean(trd_sum['Transactions']),
    np.nanmedian(trd_sum['Transactions']),
    np.nanmean(trd_sum['Trade Size_Avg']),
    np.nanmedian(trd_sum['Trade Size_Med']),
    np.nanmean(trd_sum['Yen Trade Size_Avg']),
    np.nanmedian(trd_sum['Yen Trade Size_Med'])]

trd_sum_stat['Info'] = info
trd_sum_stat['Pre_1'] = pre_1_stat
trd_sum_stat['Post_1'] = post_1_stat
trd_sum_stat['Post_2'] = post_2_stat
trd_sum_stat['Overall'] = overall_stat

In [33]:
trd_sum_stat

Unnamed: 0,Info,Pre_1,Post_1,Post_2,Overall
0,Daily Volume Avg,7518100.0,7265775.0,6440036.0,7108395.0
1,Daily Volume Med,2880950.0,2923000.0,2876500.0,2897500.0
2,Daily Yen Avg,9375267000.0,9348114000.0,9449539000.0,9387919000.0
3,Daily Yen Med,5599021000.0,5841764000.0,6250745000.0,5891709000.0
4,Transactions Avg,2678.668,3530.458,5121.138,3704.855
5,Transactions Med,2135.5,2692.0,3959.5,2755.0
6,Trade Size Avg,2659.698,2625.746,1203.291,2213.539
7,Trade Size Med,300.0,300.0,200.0,300.0
8,Yen TrdSz Avg,3414352.0,2732188.0,1973166.0,2746055.0
9,Yen TrdSz Med,3026956.0,2250843.0,1599912.0,2205444.0
