In [521]:
import pandas as pd
import numpy as np
import json
import warnings

warnings.filterwarnings('ignore')

data = pd.read_csv('/home/andrey/Desktop/full_statement.csv')

In [522]:
base_currency = {
    'USD': 1,
    'AUD': 0.77135,
    'GBP': 1.4112,
    'EUR': 1.21212,
    'NZD': 0.71454,
    'CAD': 0.81,
    'CHF': 1.09,
    'JPY': 0.0090,
    'HKD': 0.13
}

In [523]:
# with open('/home/andrey/Desktop/Features.json', 'w') as f:
#     json.dump(features, f)

In [524]:
def data_separation(data):
    all_data = {}
    for names in ['Trades',
                  'Interest',
                  'Dividends',
                  'Financial Instrument Information']:
        if names in data.Statement.values:
            d = data[data.Statement == names].reset_index(drop=True)
            d.columns = d.iloc[0]
            d = d[d.columns.dropna()]
            all_data[names] = d
            d.to_csv('/home/andrey/Desktop/full/'+names+'.csv', index=False)
    return all_data

all_data = data_separation(data)

In [525]:
def preprocessing_of_trades(data):
    data = data[data['DataDiscriminator'] == 'Order']
    if 'Bonds' in data['Asset Category'].values:
        data.Symbol[data['Asset Category'] == 'Bonds'] = data.Symbol[data['Asset Category'] == 'Bonds'].apply(lambda x: ' '.join(x.split(' ')[:-1]))
    data['Quantity'] = data['Quantity'].apply(
        lambda x: x.replace(',', '') if x is not np.nan else x).astype(float)
    data[['Proceeds', 'T. Price']] = data[['Proceeds', 'T. Price']].astype(float)
#     data['Date/Time'] = data['Date/Time'].astype('datetime64[ns]')
#     data = data.sort_values(['Symbol', 'Date/Time'])
    data.to_csv('/home/andrey/Desktop/full/preprocess_trades.csv', index=False)
    return data

data_preprocess = preprocessing_of_trades(all_data['Trades'])

In [526]:
def multiplier_comparison(df):
    index = df[df.Header == 'Header'].index.append(pd.Index([df.shape[0]]))

    j = index[0]
    multiplier = {}
    for i in index[1:]:
        d = df.loc[j:i - 1].reset_index(drop=True)
        d.columns = d.iloc[0]
        d.drop(index=0, inplace=True)
        d = d.reset_index(drop=True)
        d['Multiplier'] = d['Multiplier'].apply(
            lambda x: x.replace(',', '')).astype(float)
        if d['Asset Category'][0] in ['Stocks', 'Futures', 'Bonds']:
            multiplier[d['Asset Category'][0]] = d[['Symbol', 'Multiplier']]
        else:
            multiplier[d['Asset Category'][0]] = d[['Description', 'Multiplier']].rename(
                columns={'Description': 'Symbol'})
        j = i
    return multiplier

financial_instrument = all_data['Financial Instrument Information']
multiplier = multiplier_comparison(financial_instrument)

In [527]:
def preprocessing_of_trades(data):
    data = data[data['DataDiscriminator'] == 'Order']
    if 'Bonds' in data['Asset Category'].values:
        data.Symbol[data['Asset Category'] == 'Bonds'] = data.Symbol[data['Asset Category'] == 'Bonds'].apply(lambda x: ' '.join(x.split(' ')[:-1]))
    data['Quantity'] = data['Quantity'].apply(
        lambda x: x.replace(',', '') if x is not np.nan else x).astype(float)
    data[['Proceeds', 'T. Price']] = data[['Proceeds', 'T. Price']].astype(float)
#     data['Date/Time'] = data['Date/Time'].astype('datetime64[ns]')
#     data = data.sort_values(['Symbol', 'Date/Time'])
    data.to_csv('/home/andrey/Desktop/full/preprocess_trades.csv', index=False)
    return data

data_preprocess = preprocessing_of_trades(all_data['Trades'])

In [528]:
def signum(num):
    return -1 if num<0 else 1

In [534]:
def matching_trades(data, multiplier, base_currency):
    d = {}
    count = 0
    predict = pd.DataFrame(
        columns=['Instrument',
                 'Currency',
                 'Result',
                 'Quantity',
                 'EntryDt',
                 'Entry',
                 'Closure Dt',
                 'Closure',
                 'Result in BC',
                 'Multiplier',
                 'Type']
    )
    for i in data.index:
        instrument = data.Symbol[i]
        quantity = data.Quantity[i]
        price = data['T. Price'][i]
        if instrument not in d:
            category = data['Asset Category'][i]
            d[instrument] = {'Instrument': instrument,
                             'Currency': data['Currency'][i],
                             'Result': None,
                             'Quantity': None,
                             'EntryDt': data['Date/Time'][i],
                             'Entry': price,
                             'Closure Dt': None,
                             'Closure': None,
                             'Result in BC': None,
                             'Multiplier': multiplier[category][
                                 multiplier[category][
                                     'Symbol'] == instrument].Multiplier.item() if category in multiplier.keys() else 1,
                             'Type': category}

            sign = signum(data.Quantity[i])
            count = quantity
        elif signum(quantity) * count < 0:
            d[instrument]['Closure Dt'] = data['Date/Time'][i]
            d[instrument]['Quantity'] = -quantity
            d[instrument]['Closure'] = price
            d[instrument]['Result'] = np.abs(quantity) * (
                    d[instrument]['Closure'] - d[instrument]['Entry']) * d[instrument]['Multiplier']
            d[instrument]['Result in BC'] = d[instrument]['Result'] * base_currency[d[instrument]['Currency']]
            predict = predict.append(pd.Series(d[instrument].values(), index=predict.columns),
            ignore_index=True)
            count += quantity
        else:
            d[instrument]['Entry'] = (count * d[instrument]['Entry'] + quantity * price) / (count + quantity)
            count += quantity

    return predict

In [535]:
result = matching_trades(data_preprocess, multiplier, base_currency)
result.to_csv('/home/andrey/Desktop/matching_trades.csv', index=False)