In [2]:
# Import important library and constant definition
import pandas as pd
import time
import math

from datetime import datetime


FILE = './data/tick_price_file.csv'
BASE_FILE = './data/tick_base_file.csv'
CHRONOGRAF_PATH = './data/chronograf_{}.txt'

CHUNK_SIZE = 1

In [3]:
def import_data():
    # Extract main ticks data
    names = ['Id', 'SeqNo', 'TradeDate', 'TimeStamp', 'TradePrice', 'TradeSize', 'AskPrice', 'AskSize', 'BidPrice', 'BidSize', 'Type']
    df = pd.read_csv(FILE, 
                names=names,
                skiprows=1,
                sep='|')
    
    # Transform data and remove null value
    df['TradeDateTime'] = df[['TradeDate', 'TimeStamp']].astype(str).apply(lambda x: convert_to_epoch_time(''.join(x)), axis=1)
    df = df[df['TradeDateTime'].notna()]
    
    # Extract base file data
    base_names = ['Id', 'Ex', 'Descr', 'SIC', 'Cu']
    base_df = pd.read_csv(BASE_FILE, 
                names=base_names,
                skiprows=1,
                sep='|')
    df = df.join(base_df.set_index('Id'), on='Id')
    
    # Convert to line protocol format
    lines = []
    names = ['TradePrice', 'TradeSize', 'AskPrice', 'AskSize', 'BidPrice', 'BidSize']
    for index, row in df.iterrows():  
        line_val = 'findata' +  ',Type=' + row['Type'].strip().replace(' ', '') + ',Id=' + str(row['Id']).strip().replace(' ', '') +\
        ',SeqNo=' + str(row['SeqNo']).strip().replace(' ', '') + ',SIC=' + str(row['SIC']).strip().replace(' ', '') +\
        ',Ex=' + str(row['Ex']).strip().replace(' ', '') + ' '
        
        # Clean the Nil value to 0
        idx = 0
        for name in names:
            if 'Nil' not in str(row[name]):
                if idx == 0:
                    line_val += '{}={}'.format(name, str(row[name]).strip().replace(' ', ''))
                else:
                    line_val += ',{}={}'.format(name, str(row[name]).strip().replace(' ', ''))
                idx += 1
        line_val +=  ' ' + str(row['TradeDateTime']).strip().replace(' ', '')
                
        lines.append(line_val)
        
    # Write file
    chunked_lines = []
    tmp = []
    size = len(lines)
    for idx, item in enumerate(lines):
        chunked_lines.append(item)
        
        if ((idx+1) % (int(math.ceil(size/CHUNK_SIZE)))) == 0:
            tmp.append(chunked_lines)
            chunked_lines = []
    
    if chunked_lines:
        tmp.append(chunked_lines)
            
    for idx, item in enumerate(tmp):
        write_data(item, idx+1)

def write_data(lines, chunk):
    # Export data
    with open(CHRONOGRAF_PATH.format(chunk), 'w+') as thefile:
        for item in lines:
            thefile.write("%s\n" % item) 
                     
def convert_to_epoch_time(date_time):
    try:
        date_time= datetime.strptime(date_time.strip().replace("  ", " "), '%m/%d/%Y %H:%M:%S')
    except:
        return None
        
    # Convert it to nanoseconds precision
    return str(int(time.mktime(date_time.timetuple()))) + "000000000"

import_data()