In [1]:
import pandas as pd
import numpy as np

import plotly.express as px
from datetime import datetime

In [2]:



def get_data(path):
    data = pd.read_excel('../dataset/ReportHistory.xlsx', header=5, engine='openpyxl')
    data.rename(columns=data.iloc[0], inplace= True)
    data = data.drop(labels=0, axis=0)
    return data



def set_feature(data):

    data = data[['Horário',    'Position',       'Ativo',        'Tipo',
            'Volume',    'Preço In',       'S / L',       'T / P',
       'Horário Out',   'Preço Out', 'Lucro']]
    
    # change dtypes
    data['Horário'] = pd.to_datetime(data['Horário'], format= '%Y-%m-%d ')
    data['Horário Out'] = pd.to_datetime(data['Horário Out'], format= '%Y-%m-%d ')

    data['Position'] = data['Position'].astype( np.int64)
    data['Preço In'] = data['Preço In'].astype( float )
    data['Preço Out'] = data['Preço Out'].astype( float )

    data['Lucro'] = data['Lucro'].astype( np.int64)
    
    
    data['Date'] = data['Horário'].dt.normalize()
    data['year'] =  data['Horário'].dt.year
    data['month'] = data['Horário'].dt.month
    data['day'] =   data['Horário'].dt.day

    # Market classification

    data['Market']= data['Ativo'].apply(lambda x: 'futures' if x.startswith(('WIN', 'WDO')) else

                       'stock' if len(x) == 5 else 'options' )


    # day trade = 1 if day trade, 0 if not
    data['day_trade']= data.apply(lambda x: 1 if x['day'] == x['Horário Out'].day else 0, axis=1 )
    
    return data

def cash_flow(df1):
    
    #Filtering Variables
    daily_spend = df1.groupby('Date').agg({'Lucro':['count','sum']}).reset_index()
    daily_spend= daily_spend.rename(columns={"count": "Nr. Trades", "sum": "Profit/Loss"})
    
    #preparing DataFrame
    c_flow = pd.DataFrame()

    # start initial blance
    initial = 10000

    initial_balance=[]
    initial_balance.append(initial)


    #c_flow.values
    lucro = daily_spend['Lucro']['Profit/Loss'].values
    tax = abs(lucro*0.01)

    for i in range(daily_spend.shape[0] -1):
        initial = initial + lucro[i] - tax[i]
        initial_balance.append(initial)

    #initial_balance
    c_flow['Date'] = daily_spend['Date']
    c_flow['initial_balance'] = initial_balance
    c_flow['Lucro'] = daily_spend['Lucro']['Profit/Loss']
    c_flow['withholding_tax']= c_flow['Lucro'].apply(lambda x: abs(0.01*x) )
    c_flow['final_balance'] = c_flow['initial_balance'] + c_flow['Lucro']- c_flow['withholding_tax']

    c_flow.sort_values('Date', ascending = False)
   
    return c_flow



data = get_data('ReportHistory.xlsx')

data = set_feature(data)
c_flow = cash_flow(data)
c_flow.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['Horário'] = pd.to_datetime(data['Horário'], format= '%Y-%m-%d ')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['Horário Out'] = pd.to_datetime(data['Horário Out'], format= '%Y-%m-%d ')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['Position'] = data['Position'].astype( np.int64)
A

Unnamed: 0,Date,initial_balance,Lucro,withholding_tax,final_balance
0,2020-10-01,10000.0,2225,22.25,12202.75
1,2020-10-02,12202.75,4427,44.27,16585.48
2,2020-10-05,16585.48,10813,108.13,27290.35
3,2020-10-06,27290.35,7603,76.03,34817.32
4,2020-10-07,34817.32,7083,70.83,41829.49


In [8]:
cash_flow.head()
#px.line( c_flow, x ='Date', y = 'final_balance', title = '----' , color_discrete_sequence = px.colors.colorbrewer.Paired )

AttributeError: 'function' object has no attribute 'head'

In [90]:
### Tax Rules
# each trade = 1% withholding tax if profit > 0 
# income tax is calculated by month = 20% * profit - withholdtax if it´s a day trade, normal trade 15%, if loss no tax

data['withholding_tax'] = data['Lucro'].apply(lambda x: abs(0.01*x) if x > 0 else 0 )

data['Profit_minus_withholding_tax'] = data.apply (lambda x: x['Lucro'] - x['withholding_tax'] if x['Lucro'] > 0 else x['Lucro'], axis =1 )



aux_tax_0 =  data.groupby(['year','month', 'Market', 'day_trade']).agg( Profit= ('Lucro', 'sum'),Withholding_tax=('withholding_tax','sum'),Deals=('Ativo','count'),Profit_minus_tax= ('Profit_minus_withholding_tax', 'sum')).reset_index()


aux_tax_0['income_tax'] = aux_tax_0.apply(lambda x: 0.20 * x['Profit_minus_tax'] if (x['day_trade'] == 1) & (x['Profit_minus_tax'] > 0) else 0.15* x['Profit_minus_tax'] if   (x['day_trade'] == 0 )& (x['Profit_minus_tax'] > 0)   else 0, axis=1 )
aux_tax_0['Profit_net'] = aux_tax_0.apply(lambda x: x['Profit_minus_tax'] - x['income_tax'], axis=1 )


aux_tx_summary = aux_tax_0.groupby(['year','month', 'day_trade']).agg(Deals=('Deals','count'),Profit= ('Profit', 'sum'),Withholding_tax=('Withholding_tax','sum'), Income_tax = ('income_tax', 'sum'), Profit_net = ('Profit_net', 'sum') ).sort_values('month')

aux_tx_summary


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Deals,Profit,Withholding_tax,Income_tax,Profit_net
year,month,day_trade,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2021,2,0,3,4500,45.0,668.25,3786.75
2021,2,1,3,5938,72.62,1173.076,4692.304
2021,3,0,3,937,20.4,162.84,753.76
2021,3,1,3,16314,177.09,3227.382,12909.528
2020,10,1,1,105197,1128.45,20813.71,83254.84
2020,11,1,1,111954,1214.95,22147.81,88591.24
2020,12,1,1,57995,644.21,11470.158,45880.632
