In [259]:
import pandas as pd
import numpy as np
import json
import warnings

warnings.filterwarnings('ignore')

data = pd.read_csv('/home/andrey/PycharmProjects/Trades/data/full_statement.csv')

In [260]:
base_currency = {
    'USD': 1,
    'AUD': 0.77135,
    'GBP': 1.4112,
    'EUR': 1.21212,
    'NZD': 0.71454,
    'CAD': 0.81,
    'CHF': 1.09,
    'JPY': 0.0090,
    'HKD': 0.13
}

In [261]:
def data_separation(data):
    all_data = {}
    for names in ['Trades',
                  'Interest',
                  'Dividends',
                  'Financial Instrument Information']:
        if names in data.Statement.values:
            d = data[data.Statement == names].reset_index(drop=True)
            d.columns = d.iloc[0]
            d = d[d.columns.dropna()]
            all_data[names] = d
#             d.to_csv('/home/andrey/PycharmProjects/Trades/data/'+names+'.csv', index=False)
    return all_data

all_data = data_separation(data)

In [262]:
def preprocessing_of_trades(data):
    data = data[data['DataDiscriminator'] == 'Order']
    data[['Proceeds', 'T. Price']] = data[['Proceeds', 'T. Price']].astype(float)
    data['Date'] = pd.to_datetime(data['Date/Time']).dt.date
    data['Quantity'] = data['Quantity'].apply(
        lambda x: x.replace(',', '') if x is not np.nan else x).astype(float)
    
    if 'Bonds' in data['Asset Category'].values:
#         data.Symbol[data['Asset Category'] == 'Bonds'] = data.Symbol[data['Asset Category'] == 'Bonds'].apply(lambda x: ' '.join(x.split(' ')[:-1]))
        bonds = data[data['Asset Category'] == 'Bonds']
        bonds.Symbol = bonds.Symbol.apply(lambda x: ' '.join(x.split(' ')[:-1]))
        bonds['T. Price'] = bonds['T. Price']/100
        data[data['Asset Category'] == 'Bonds'] = bonds

#     data.to_csv('/home/andrey/PycharmProjects/Trades/data/preprocess_trades.csv', index=False)
    return data

data_trades = preprocessing_of_trades(all_data['Trades'])
data_trades

Unnamed: 0,Trades,Header,DataDiscriminator,Asset Category,Currency,Symbol,Date/Time,Quantity,T. Price,C. Price,Proceeds,Comm/Fee,Basis,Realized P/L,MTM P/L,Code,Date
1,Trades,Data,Order,Stocks,AUD,NCM,"2021-02-21, 18:06:50",2000.0,24.34000,24.37,-48680.0,-38.944,48718.944,0,60,O,2021-02-21
2,Trades,Data,Order,Stocks,AUD,NCM,"2021-04-06, 20:06:30",-2000.0,25.69000,25.91,51380.0,-41.104,-48718.944,2619.952,-440,C;P,2021-04-06
4,Trades,Data,Order,Stocks,AUD,TYR,"2021-02-02, 18:12:18",10000.0,2.65000,2.62,-26500.0,-21.2,26521.2,0,-300,O,2021-02-02
5,Trades,Data,Order,Stocks,AUD,TYR,"2021-02-21, 18:17:59",-10000.0,2.90000,3.04,29000.0,-23.2,-26521.2,2455.6,-1400,C,2021-02-21
9,Trades,Data,Order,Stocks,CAD,SSRM,"2021-02-18, 09:30:05",3000.0,19.50000,19.1,-58500.0,-30,58530,0,-1200,O;P,2021-02-18
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
420,Trades,Data,Order,Bonds,USD,FLR 3 1/2 12/15/24,"2021-03-23, 10:49:21",-50000.0,1.02100,102.279999,51050.0,-20.75,-49522,1507.25,-89.9995,C,2021-03-23
422,Trades,Data,Order,Bonds,USD,MUR 6 7/8 08/15/24,"2021-02-04, 15:45:37",10000.0,1.00500,100.5,-10050.0,-10,10060,0,0,O,2021-02-04
423,Trades,Data,Order,Bonds,USD,MUR 6 7/8 08/15/24,"2021-03-29, 10:17:04",-10000.0,1.02000,102.498001,10200.0,-19.75,-10060,120.25,-49.8001,C,2021-03-29
425,Trades,Data,Order,Bonds,USD,SIG 4.7 06/15/24,"2021-03-10, 09:53:11",50000.0,1.00250,100,-50125.0,-47.5,50172.5,0,-125,O,2021-03-10


In [263]:
def preprocessing_of_dividends(div, bc):
    div = div.dropna()
    div.drop(index=0, inplace=True)
    div.Description = div.Description.apply(lambda x: x[x.find('(')+1:x.find(')')])
    div.reset_index(drop=True, inplace=True)
    div = div.rename(columns={'Description': 'Security ID'})
    div.Amount = div.Amount.astype(float)
    div.Amount = div.Amount.apply(lambda x: x*bc[div.Currency[div.Amount==x].item()])
    
    return div

data_dividends = preprocessing_of_dividends(all_data['Dividends'], base_currency)
# data_dividends

In [264]:
def preprocessing_of_interest(data, bc):
    data = data.dropna()
    data.drop(index=0, inplace=True)
    data.Amount = data.Amount.astype(float)
    data.Amount = data.Amount.apply(lambda x: x*bc[data.Currency[data.Amount==x].item()])
    group_data = data[['Description','Amount']].groupby('Description').sum().reset_index()
    
    return group_data

data_interest = preprocess_of_interest(all_data['Interest'], base_currency)
data_interest

Unnamed: 0,Description,Amount
0,AUD Debit Interest for Apr-2021,-11.739947
1,AUD Debit Interest for Feb-2021,-23.325624
2,AUD Debit Interest for Mar-2021,-45.455655
3,Bond Coupon Payment (ATI 5 7/8 08/15/23),984.38
4,Bond Coupon Payment (BRASKM 6.45 02/03/24),6450.0
5,Bond Coupon Payment (DSPORT 5 3/8 08/15/26),268.75
6,Bond Coupon Payment (MUR 6 7/8 08/15/24),343.75
7,CAD Debit Interest for Apr-2021,-13.4946
8,CAD Debit Interest for Feb-2021,-13.5675
9,CAD Debit Interest for Mar-2021,-62.2971


In [265]:
def multiplier_comparison(df):
    index = df[df.Header == 'Header'].index.append(pd.Index([df.shape[0]]))

    j = index[0]
    multiplier = {}
    for i in index[1:]:
        d = df.loc[j:i - 1].reset_index(drop=True)
        d.columns = d.iloc[0]
        d.drop(index=0, inplace=True)
        d = d.reset_index(drop=True)
        d['Multiplier'] = d['Multiplier'].apply(
            lambda x: x.replace(',', '')).astype(float)
        if d['Asset Category'][0] in ['Stocks', 'Futures', 'Bonds']:
            multiplier[d['Asset Category'][0]] = d[['Symbol', 'Multiplier']]
        else:
            multiplier[d['Asset Category'][0]] = d[['Description', 'Multiplier']].rename(
                columns={'Description': 'Symbol'})
        j = i
    return multiplier

financial_instrument = all_data['Financial Instrument Information']
multiplier = multiplier_comparison(financial_instrument)
# financial_instrument

In [266]:
# multiplier

In [267]:
def signum(num):
    return -1 if num<0 else 1

In [268]:
def group_by_day(data):
    data = data.sort_values(['Type', 'Instrument', 'Closure Dt', 'Quantity']).reset_index(drop=True)
    j = 0 
    drop_index = []
    for i in data.index[1:]:
        value_1 = data.iloc[j][['Instrument', 'Closure Dt']]
        value_2 = data.iloc[i][['Instrument', 'Closure Dt']]
        if value_2.equals(value_1) and data.Quantity[i]*data.Quantity[j]>0:
            data['Closure'][i] = (data['Closure'][i]*data['Quantity'][i]+data['Closure'][j]*data['Quantity'][j])/(data['Quantity'][i]+data['Quantity'][j])
            data['Quantity'][i] += data['Quantity'][j]
            data['Result'][i] += data['Result'][j]
            data['Result in BC'][i] += data['Result in BC'][j]
            drop_index.append(j)
        j = i
    data.drop(index=drop_index, inplace=True)
    data.reset_index(drop=True, inplace=True)
    return data

In [269]:
def matching_trades(data_trades,
                    data_dividends,
                    data_interest,
                    financial_instrument,
                    multiplier,
                    base_currency):
    d = {}
    count = 0
    predict = pd.DataFrame(
        columns=['Instrument',
                 'Currency',
                 'Result',
                 'Quantity',
                 'Entry Dt',
                 'Entry',
                 'Closure Dt',
                 'Closure',
                 'Result in BC',
                 'Dividends',
                 'Multiplier',
                 'Type']
    )
    for i in data_trades.index:
        instrument = data_trades.Symbol[i]
        quantity = data_trades.Quantity[i]
        price = data_trades['T. Price'][i]
        sign = signum(quantity)
        if instrument not in d:
            category = data_trades['Asset Category'][i]
            d[instrument] = {'Instrument': instrument,
                             'Currency': data_trades['Currency'][i],
                             'Result': None,
                             'Quantity': None,
                             'EntryDt': data_trades['Date'][i],
                             'Entry': price,
                             'Closure Dt': None,
                             'Closure': None,
                             'Result in BC': None,
                             'Dividends': None,
                             'Multiplier': 1,
                             'Type': category}

            if category in multiplier.keys():
                d[instrument]['Multiplier'] = multiplier[category][
                    multiplier[category][
                        'Symbol'] == instrument].Multiplier.item()

            count = quantity
        elif sign * count < 0:
            d[instrument]['Closure Dt'] = data_trades['Date'][i]
            d[instrument]['Quantity'] = -quantity
            d[instrument]['Closure'] = price
            d[instrument]['Result'] = np.abs(quantity) * (
                    d[instrument]['Closure'] - d[instrument]['Entry']) * \
                                      d[instrument]['Multiplier']
            d[instrument]['Result in BC'] = d[instrument]['Result'] * \
                                            base_currency[
                                                d[instrument]['Currency']]
            predict = predict.append(
                pd.Series(d[instrument].values(), index=predict.columns),
                ignore_index=True)

            count += quantity
        else:
            d[instrument]['Entry'] = (count * d[instrument][
                'Entry'] + quantity * price) / (count + quantity)
            count += quantity

    predict = group_by_day(predict)

    if data_dividends is not None:
        for symbol in predict[predict.Type == 'Stocks'].Instrument:
            sec_id = \
                financial_instrument[financial_instrument.Symbol == symbol][
                    'Security ID'].item()
            if sec_id in data_dividends['Security ID'].values:
                index = data_dividends[
                    data_dividends['Security ID'] == sec_id].index
                amount = data_dividends.Amount[index].item()
                predict['Dividends'][predict.Instrument == symbol] = amount
                
    if data_interest is not None:    
        for symbol in predict[predict.Type=='Bonds'].Instrument.values:
            for name in data_interest.Description.values:
                if symbol in name:
                    amount = data_interest.Amount[data_interest.Description==name].item()
                    predict['Dividends'][predict.Instrument == symbol] = amount 
#     predict = predict.sort_values(['Type', 'Instrument', 'Closure Dt'])
    return predict

In [270]:
if 'Dividends' in all_data.keys():
    data_dividends = preprocessing_of_dividends(all_data['Dividends'],
                                                base_currency)
else:
    data_dividends = None
    
if 'Interest' in all_data.keys():
    data_interest = preprocessing_of_interest(all_data['Interest'],
                                                base_currency)
else:
    data_interest = None

result = matching_trades(data_trades,
                         data_dividends,
                         data_interest,
                         financial_instrument,
                         multiplier,
                         base_currency)

result.to_csv('/home/andrey/PycharmProjects/Trades/data/result2.csv',
              index=False)
result

Unnamed: 0,Instrument,Currency,Result,Quantity,Entry Dt,Entry,Closure Dt,Closure,Result in BC,Dividends,Multiplier,Type
0,MUR 6 7/8 08/15/24,USD,150.000000,10000.0,2021-02-04,1.0050,2021-03-29,1.02000,150.000000,87.85,1.0,Bonds
1,SIG 4.7 06/15/24,USD,62.500000,50000.0,2021-03-10,1.0025,2021-04-06,1.00375,62.500000,737.64,1.0,Bonds
2,CI 16APR21 190.0 P,USD,-170.000000,-1.0,2021-02-22,6.0000,2021-02-25,4.30000,-170.000000,,100.0,Equity and Index Options
3,GME 05FEB21 400.0 C,USD,-8690.000000,-2.0,2021-02-01,43.4500,2021-02-05,0.00000,-8690.000000,,100.0,Equity and Index Options
4,GME 05MAR21 45.0 C,USD,-2560.000000,-2.0,2021-02-12,12.8000,2021-03-05,0.00000,-2560.000000,,100.0,Equity and Index Options
...,...,...,...,...,...,...,...,...,...,...,...,...
104,SSRM,CAD,857.999996,3000.0,2021-02-18,19.5000,2021-04-06,19.78600,694.979997,153.9162,1.0,Stocks
105,TYR,AUD,2500.000000,10000.0,2021-02-02,2.6500,2021-02-21,2.90000,1928.375000,,1.0,Stocks
106,UNVB,EUR,166.800000,800.0,2021-02-05,46.1315,2021-03-15,46.34000,202.181616,413.866253,1.0,Stocks
107,VIV,EUR,870.000000,1000.0,2021-02-22,29.2800,2021-05-18,30.15000,1054.544400,,1.0,Stocks


In [271]:
None + 1

TypeError: unsupported operand type(s) for +: 'NoneType' and 'int'