In [4]:
import pandas as pd
import glob

import warnings
warnings.filterwarnings('ignore')

In [5]:
top100_file_list = glob.glob("data/top100*csv")
top100_file_list.sort()
top100_file_list

['data/top100_traders_20220101_20220131.csv',
 'data/top100_traders_20220201_20220228.csv',
 'data/top100_traders_20220301_20220331.csv',
 'data/top100_traders_20220401_20220430.csv',
 'data/top100_traders_20220501_20220531.csv',
 'data/top100_traders_20220601_20220630.csv',
 'data/top100_traders_20220701_20220731.csv',
 'data/top100_traders_20220801_20220831.csv',
 'data/top100_traders_20220901_20220930.csv',
 'data/top100_traders_20221001_20221031.csv',
 'data/top100_traders_20221101_20221130.csv',
 'data/top100_traders_20221201_20221231.csv',
 'data/top100_traders_20230101_20230131.csv',
 'data/top100_traders_20230201_20230228.csv',
 'data/top100_traders_20230301_20230331.csv',
 'data/top100_traders_20230401_20230430.csv',
 'data/top100_traders_20230501_20230531.csv',
 'data/top100_traders_20230601_20230630.csv',
 'data/top100_traders_20230701_20230731.csv',
 'data/top100_traders_20230801_20230831.csv',
 'data/top100_traders_20230901_20230930.csv',
 'data/top100_traders_20231001_202

## STEP1: アカウント抽出 
- 前月のアカウント一覧から上位200と下位200を抽出
- 過去6ヶ月のアカウント一覧から6ヶ月連続でプラスとなっているアカウントを全て抽出

このデータをOANDAのエンジニアに渡して、ユーザーごとの取引データを抽出してもらう

In [6]:
print(top100_file_list[-1])
df_top100 = pd.read_csv(top100_file_list[-1], encoding="shift-jis")


df_top100.dropna(subset=["account_id"], inplace=True)

df_top100["percent_change"] = df_top100["percent_change"].astype(float)
df_top100.dropna(subset=["percent_change"], inplace=True)

df_top100["percent_change"] = df_top100["percent_change"] - 100
df_top100 = df_top100[df_top100["percent_change"]!=0]


data/top100_traders_20240901_20240930.csv


## 上位200

In [7]:
top200 = df_top100.sort_values("percent_change", ascending=False).head(200)
top200.head()[["account_id", "percent_change"]]

Unnamed: 0,account_id,percent_change
167667,001-009-6570488-001,349.679012
25140,811646100,262.221893
80200,001-009-11096135-001,182.776937
136810,001-009-400924-001,163.092845
121518,001-009-3351587-001,145.912186


## 下位200

In [8]:
worst200 = df_top100.sort_values("percent_change", ascending=True).head(200)
worst200.head()[["account_id", "percent_change"]]

Unnamed: 0,account_id,percent_change
43680,811664648,-89.014917
19295,811640201,-88.740138
86478,001-009-1381402-001,-88.560693
158654,001-009-5509204-001,-88.226235
180137,001-009-8160181-001,-86.293785


## 6ヶ月連続にプラス成績

In [9]:
top100_file_list = glob.glob("data/top100*csv")
top100_file_list.sort()
top100_file_list = top100_file_list[-6:]
top100_file_list

['data/top100_traders_20240401_20240430.csv',
 'data/top100_traders_20240501_20240531.csv',
 'data/top100_traders_20240601_20240630.csv',
 'data/top100_traders_20240701_20240731.csv',
 'data/top100_traders_20240801_20240831.csv',
 'data/top100_traders_20240901_20240930.csv']

In [10]:
# read dataframes
df_list = []
for path in top100_file_list:
    df_tmp = pd.read_csv(path, encoding="shift-jis")
    df_tmp["period"] = path.split("_")[-2][:6]
    df_tmp.dropna(subset=["account_id"], inplace=True)
    df_list.append(df_tmp)

# concat dataframes
df = pd.concat(df_list, ignore_index=True)

# change format
df["percent_change"] = df["percent_change"].astype(float)
df["account_id"] = df["account_id"].astype(str)

# remove NaN data
df.dropna(subset=["percent_change"], inplace=True)

In [11]:
df_agg = df.groupby("account_id").agg({"percent_change": ["min", "mean", "max", "count"], "period": "max"})

df_6months = df_agg[
    (df_agg[("percent_change", "min")]>100) & 
    (df_agg[("period", "max")]==df_tmp["period"].max()) & 
    (df_agg[("percent_change", "count")]==6)
]

In [12]:
df_6months["percent_change"].reset_index()[["account_id"]].drop_duplicates()

Unnamed: 0,account_id
0,001-009-10169545-001
1,001-009-11164392-001
2,001-009-1166306-001
3,001-009-1247478-002
4,001-009-1567287-002
5,001-009-1613858-002
6,001-009-1843675-001
7,001-009-2458940-002
8,001-009-2675730-002
9,001-009-2814456-002


# Excelで保存

In [15]:
period = top100_file_list[-1].split("_")[-1][:6]

# genereta excel file
writer = pd.ExcelWriter("accounts_{}.xlsx".format(period), engine='xlsxwriter')

# update sheets
top200[["account_id"]].to_excel(writer, sheet_name="Top200", index=False)
worst200[["account_id"]].to_excel(writer, sheet_name="Worst200", index=False)
df_6months["percent_change"].reset_index()[["account_id"]].to_excel(writer, sheet_name="6months", index=False)

# save 
#writer.save()

# close
writer.close()