In [346]:
import pandas as pd
import numpy as np
import json
import warnings

warnings.filterwarnings('ignore')

data = pd.read_csv('/home/andrey/Desktop/Trades/data/full_statement.csv')

In [347]:
base_currency = {
    'USD': 1,
    'AUD': 0.77135,
    'GBP': 1.4112,
    'EUR': 1.21212,
    'NZD': 0.71454,
    'CAD': 0.81,
    'CHF': 1.09,
    'JPY': 0.0090,
    'HKD': 0.13
}

In [348]:
# with open('/home/andrey/Desktop/Features.json', 'w') as f:
#     json.dump(features, f)

In [349]:
def data_separation(data):
    all_data = {}
    for names in ['Trades',
                  'Interest',
                  'Dividends',
                  'Financial Instrument Information']:
        if names in data.Statement.values:
            d = data[data.Statement == names].reset_index(drop=True)
            d.columns = d.iloc[0]
            d = d[d.columns.dropna()]
            all_data[names] = d
            d.to_csv('/home/andrey/Desktop/'+names+'.csv', index=False)
    return all_data

all_data = data_separation(data)

In [350]:
def preprocessing_of_trades(data):
    data = data[data['DataDiscriminator'] == 'Order']
    if 'Bonds' in data['Asset Category'].values:
        data.Symbol[data['Asset Category'] == 'Bonds'] = data.Symbol[data['Asset Category'] == 'Bonds'].apply(lambda x: ' '.join(x.split(' ')[:-1]))
    data['Quantity'] = data['Quantity'].apply(
        lambda x: x.replace(',', '') if x is not np.nan else x).astype(float)
    data[['Proceeds', 'T. Price']] = data[['Proceeds', 'T. Price']].astype(float)
    data['Date/Time'] = pd.to_datetime(data['Date/Time']).dt.date

    data.to_csv('/home/andrey/Desktop/preprocess_trades.csv', index=False)
    return data

data_preprocess = preprocessing_of_trades(all_data['Trades'])

In [351]:
def preprocessing_of_dividends(div):
    div = div.dropna()
    div.drop(index=0, inplace=True)
    div.Description = div.Description.apply(lambda x: x[x.find('(')+1:x.find(')')])
    div.reset_index(drop=True, inplace=True)
    div = div.rename(columns={'Description': 'Security ID'})
    div.Amount = div.Amount.astype(float)
    div.Amount = div.Amount.apply(lambda x: x*base_currency[div.Currency[div.Amount==x].item()])
    
    return div

div = preprocessing_of_dividends(all_data['Dividends'])

In [352]:
def multiplier_comparison(df):
    index = df[df.Header == 'Header'].index.append(pd.Index([df.shape[0]]))

    j = index[0]
    multiplier = {}
    for i in index[1:]:
        d = df.loc[j:i - 1].reset_index(drop=True)
        d.columns = d.iloc[0]
        d.drop(index=0, inplace=True)
        d = d.reset_index(drop=True)
        d['Multiplier'] = d['Multiplier'].apply(
            lambda x: x.replace(',', '')).astype(float)
        if d['Asset Category'][0] in ['Stocks', 'Futures', 'Bonds']:
            multiplier[d['Asset Category'][0]] = d[['Symbol', 'Multiplier']]
        else:
            multiplier[d['Asset Category'][0]] = d[['Description', 'Multiplier']].rename(
                columns={'Description': 'Symbol'})
        j = i
    return multiplier

financial_instrument = all_data['Financial Instrument Information']
multiplier = multiplier_comparison(financial_instrument)

In [353]:
def signum(num):
    return -1 if num<0 else 1

In [354]:
def matching_trades(data, div, multiplier, base_currency):
    d = {}
    count = 0
    predict = pd.DataFrame(
        columns=['Instrument',
                 'Currency',
                 'Result',
                 'Quantity',
                 'EntryDt',
                 'Entry',
                 'Closure Dt',
                 'Closure',
                 'Result in BC',
                 'Dividends',
                 'Multiplier',
                 'Type']
    )
    for i in data.index:
        instrument = data.Symbol[i]
        quantity = data.Quantity[i]
        price = data['T. Price'][i]
        if instrument not in d:
            category = data['Asset Category'][i]
            d[instrument] = {'Instrument': instrument,
                             'Currency': data['Currency'][i],
                             'Result': None,
                             'Quantity': None,
                             'EntryDt': data['Date/Time'][i],
                             'Entry': price,
                             'Closure Dt': None,
                             'Closure': None,
                             'Result in BC': None,
                             'Dividends': None,
                             'Multiplier': multiplier[category][
                                 multiplier[category][
                                     'Symbol'] == instrument].Multiplier.item() if category in multiplier.keys() else 1,
                             'Type': category}

            sign = signum(data.Quantity[i])
            count = quantity
        elif signum(quantity) * count < 0:
            d[instrument]['Closure Dt'] = data['Date/Time'][i]
            d[instrument]['Quantity'] = -quantity
            d[instrument]['Closure'] = price
            d[instrument]['Result'] = np.abs(quantity) * (
                    d[instrument]['Closure'] - d[instrument]['Entry']) * d[instrument]['Multiplier']
            d[instrument]['Result in BC'] = d[instrument]['Result'] * base_currency[d[instrument]['Currency']]
            predict = predict.append(pd.Series(d[instrument].values(), index=predict.columns),
            ignore_index=True)
            count += quantity
        else:
            d[instrument]['Entry'] = (count * d[instrument]['Entry'] + quantity * price) / (count + quantity)
            count += quantity
    
    predict = sort_by_day(predict)
    
    for symbol in predict[predict.Type=='Stocks'].Instrument:
        sec_id = financial_instrument[financial_instrument.Symbol==symbol]['Security ID'].item()
        if sec_id in div['Security ID'].values:
            index = div[div['Security ID']==sec_id].index
            amount = div.Amount[index].item()
            predict['Dividends'][predict.Instrument==symbol] = amount
    
    return predict

In [355]:
def sort_by_day(data):
    j = 0 
    drop_index = []
    for i in data.index[1:]:
        value_1 = data.iloc[j][['Instrument', 'Closure Dt']]
        value_2 = data.iloc[i][['Instrument', 'Closure Dt']]
        if value_2.equals(value_1) and data.Quantity[i]*data.Quantity[j]>0:
            data['Closure'][i] = (data['Closure'][i]*data['Quantity'][i]+data['Closure'][j]*data['Quantity'][j])/(data['Quantity'][i]+data['Quantity'][j])
            data['Quantity'][i] += data['Quantity'][j]
            data['Result'][i] += data['Result'][j]
            data['Result in BC'][i] += data['Result in BC'][j]
            drop_index.append(j)
        j = i
    data.drop(index=drop_index, inplace=True)
    data.reset_index(drop=True, inplace=True)
    return data

In [356]:
result = matching_trades(data_preprocess, div, multiplier, base_currency)
result.to_csv('/home/andrey/Desktop/matching_trades.csv', index=False)
result

Unnamed: 0,Instrument,Currency,Result,Quantity,EntryDt,Entry,Closure Dt,Closure,Result in BC,Dividends,Multiplier,Type
0,NCM,AUD,2700.000000,2000.0,2021-02-21,24.340000,2021-04-06,25.69000,2082.645000,,1.0,Stocks
1,TYR,AUD,2500.000000,10000.0,2021-02-02,2.650000,2021-02-21,2.90000,1928.375000,,1.0,Stocks
2,SSRM,CAD,857.999996,3000.0,2021-02-18,19.500000,2021-04-06,19.78600,694.979997,153.9162,1.0,Stocks
3,ABN,EUR,840.000000,3000.0,2021-02-01,8.720000,2021-02-16,9.00000,1018.180800,,1.0,Stocks
4,AGS,EUR,442.500000,250.0,2021-01-28,42.530000,2021-02-03,44.30000,536.363100,,1.0,Stocks
...,...,...,...,...,...,...,...,...,...,...,...,...
106,EUR.USD,USD,596.250000,125000.0,2021-01-07,1.184900,2021-03-09,1.18967,596.250000,,1.0,Forex
107,GBP.USD,USD,1266.000000,-300000.0,2021-02-18,1.397100,2021-02-19,1.40132,1266.000000,,1.0,Forex
108,NZD.USD,USD,1241.000000,200000.0,2021-03-23,0.700615,2021-04-05,0.70682,1241.000000,,1.0,Forex
109,MUR 6 7/8 08/15/24,USD,15000.000000,10000.0,2021-02-04,100.500000,2021-03-29,102.00000,15000.000000,,1.0,Bonds
