In [1]:
import boto3
import numpy as np
import pandas as pd
import json

import seaborn as sns
import matplotlib.pyplot as plt

from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score


In [2]:
bucket_name = "team-forex"
login_key = "dataset_updated/login/login.csv"
trades_key = "dataset_updated/trades/trades.csv"
symbol_key = "dataset_updated/symbol/symbol.csv"
reason_key = "dataset_updated/reason/reason.csv"
daily_report_key = "dataset_updated/daily report/daily_report.csv"



In [3]:
def get_csv_from_s3(bucket, key):
    try:
        s3_client = boto3.client("s3")
        response = s3_client.get_object(Bucket=bucket, Key=key)
        csv_data = pd.read_csv(response["Body"])
        return csv_data
    except Exception as e:
        print(f"Error retrieving file from S3: {e}")
        return None

login = get_csv_from_s3(bucket_name, login_key)


In [4]:
trades = get_csv_from_s3(bucket_name, trades_key)


In [5]:
symbol = get_csv_from_s3(bucket_name, symbol_key)


In [6]:
reason = get_csv_from_s3(bucket_name, reason_key)


In [7]:
daily = get_csv_from_s3(bucket_name, daily_report_key)


In [8]:
trades['open_time'] = pd.to_datetime(trades['open_time'], unit='s')
trades['close_time'] = pd.to_datetime(trades['close_time'], unit='s')
login['reg_date'] = pd.to_datetime(login['reg_date'], unit='s')



In [9]:
trades = trades.merge(login, on='login')
trades = trades.merge(symbol, on='symbol')
trades = trades.merge(reason, left_on='reason', right_on='code')


In [10]:
trades['DPM'] = trades['profit'] / (trades['volume_usd'] / 1_000_000)


In [11]:
trades


Unnamed: 0,ticket,login,symbol,cmd,volume,open_time,open_price,close_time,close_price,tp,...,profit,volume_usd,country,account_currency,reg_date,description,type,code,reason_y,DPM
0,68880703,7062462,XAUUSD,0,0.01,2024-02-13 17:05:41,1991.35000,2024-02-15 16:05:14,2003.01000,0.00000,...,10.83,3994.36,Latvia,EUR,2023-12-22 18:51:59,Gold vs US Dollar,Metal,1,Expert,2711.322965
1,68880910,7062462,XAUUSD,0,0.02,2024-02-13 17:08:49,1990.30000,2024-02-15 16:05:10,2002.76000,0.00000,...,23.16,7986.12,Latvia,EUR,2023-12-22 18:51:59,Gold vs US Dollar,Metal,1,Expert,2900.031555
2,68120690,813125,US2000,0,1.00,2024-01-24 07:56:45,1983.22000,2024-01-24 16:41:43,1996.68000,2012.34000,...,105.42,39799.00,United Kingdom,GBP,2023-05-20 18:43:18,US Small Cap 2000 Cash,Index,1,Expert,2648.810272
3,68169249,813125,US2000,0,2.00,2024-01-24 23:36:11,1960.62000,2024-01-25 16:29:03,1991.51000,2331.85000,...,485.26,79042.60,United Kingdom,GBP,2023-05-20 18:43:18,US Small Cap 2000 Cash,Index,1,Expert,6139.221129
4,68186877,88945036,NZDUSD,0,0.13,2024-01-25 08:00:00,0.61062,2024-01-25 15:38:26,0.61317,0.61312,...,33.15,15909.27,Nigeria,USD,2023-05-01 04:36:26,New Zealand Dollar vs US Dollar,Forex,1,Expert,2083.690829
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4521772,68795989,810950,XAUUSD,0,0.10,2024-02-12 11:53:03,2020.74000,2024-02-13 07:53:25,2021.51000,2026.00000,...,7.70,40422.50,United Kingdom,USD,2023-04-14 19:29:33,Gold vs US Dollar,Metal,5,Mobile,190.487971
4521773,68796026,810950,XAUUSD,0,0.10,2024-02-12 11:53:14,2020.70000,2024-02-13 07:53:29,2021.50000,2026.00000,...,8.00,40422.00,United Kingdom,USD,2023-04-14 19:29:33,Gold vs US Dollar,Metal,5,Mobile,197.912028
4521774,68854039,810799,EURUSD,1,0.10,2024-02-13 11:56:54,1.07654,2024-02-13 12:00:16,1.07693,0.00000,...,-3.62,21534.70,France,EUR,2023-04-12 14:29:35,Euro vs US Dollar,Forex,5,Mobile,-168.100786
4521775,68785760,810710,GER30,1,0.03,2024-02-12 09:09:05,16994.30000,2024-02-13 11:41:03,16934.00000,0.00000,...,18.09,10931.50,France,EUR,2023-04-11 18:29:39,GER30 Cash,Index,5,Mobile,1654.850661


In [12]:
account_df = trades.groupby('login').agg({
    'DPM': 'mean',
    'profit': 'sum',
    'volume': 'sum',
    'commission': 'sum',
    'swaps': 'sum',
    'country': 'first',
    'account_currency': 'first'
}).reset_index()


In [13]:
account_df


Unnamed: 0,login,DPM,profit,volume,commission,swaps,country,account_currency
0,504326,-1427.027553,-7319.78,155.20,0.00,0.00,CA,USD
1,504336,-81.179288,-89.60,2.18,0.00,-0.18,CI,USD
2,504357,-0.027770,-3.04,0.02,-0.14,0.00,CA,CAD
3,504359,-0.006641,-27717.86,1127.46,0.00,-472.40,CA,CAD
4,504361,-0.190902,-8.66,0.01,0.00,-1.04,CA,USD
...,...,...,...,...,...,...,...,...
11971,88945036,78.987175,20488.73,517.04,-3619.00,-885.58,Nigeria,USD
11972,88945038,190.075689,-192.64,103.06,-1236.72,-221.86,Singapore,SGD
11973,1000054825,14.333698,-5114.25,32.34,0.00,-296.69,Singapore,SGD
11974,1000054838,-48.437138,-9003.28,437.17,0.00,-109.33,Saudi Arabia,USD


In [17]:

# Distribution of DPM
#sns.histplot(account_df['DPM'])
#plt.title('Distribution of DPM')
#plt.show()
