In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt 

### 复原event data
该文件的目的是复原系统记录下来的event文件，以实现对其它竞争者策略推测的目的

In [2]:
### 计算无compeitor情况下订单最大值
target = 'match488'
df = pd.read_csv(target+'_events.csv')
df = df.loc[df.Operation.apply(lambda x: x in ['Insert', 'Cancel', 'Amend'])]
df = df.loc[df.Competitor.isna(),].reset_index(drop=True)
max_id = df.OrderId.max()+1

## 先处理competitor下的订单
target = 'match488'
df = pd.read_csv(target+'_events.csv')
df = df.loc[df.Operation.apply(lambda x: x in ['Insert', 'Cancel', 'Amend'])]
df = df.loc[df.Competitor.notna(),].reset_index(drop=True)

# 我们要赋予他们独立的OrderId, 因此赋予每一个competitor一个字典
print('competitors are:', df.Competitor.unique().tolist())
competitorlist = df.Competitor.unique().tolist()
competitor_dict = dict()
for i in range(len(competitorlist)):
    competitor_dict[competitorlist[i]] = i

df['OrderId'] = df.OrderId*len(df.Competitor.unique().tolist()) + df.Competitor.apply(lambda x: competitor_dict[x]) + max_id
competitorOrder = df.copy()

competitors are: ['AlphaPL_411325', 'Algorizz_12321', 'StanfordCardinal_666', 'SolowomanTrader_350014', 'usctradergo_202201', '10L_120212', 'Ni_Club_202131', 'PipInstallLiquidity_80271']


In [3]:
# 这是我们需要复原的文件
### 订单文件分为['Insert', 'Cancel', 'Amend', 'Trade', 'hedge']五部分，我们分开还原
target = 'match488'
df = pd.read_csv(target+'_events.csv')
df = df.loc[df.Operation.apply(lambda x: x in ['Insert', 'Cancel', 'Amend'])]
df = df.loc[df.Competitor.isna(),].reset_index(drop=True)
df = pd.concat([df,competitorOrder])

### 这是我们复原参考的模板
sample = pd.read_csv('market_data1.csv')
sample

Unnamed: 0,Time,Instrument,Operation,OrderId,Side,Volume,Price,Lifespan
0,0.000000,0,Insert,177349435,B,1000.0,1469.0,G
1,0.000000,0,Insert,177349439,A,8000.0,1470.0,G
2,0.000000,0,Insert,177349467,A,3000.0,1472.0,G
3,0.000000,0,Insert,177349475,A,2000.0,1471.0,G
4,0.000000,0,Insert,177349487,A,2000.0,1816.0,G
...,...,...,...,...,...,...,...,...
461479,899.901512,1,Insert,7034062,A,10000.0,1522.0,G
461480,899.901512,1,Insert,7034063,B,10000.0,1510.0,G
461481,899.902420,0,Insert,205198743,B,1000.0,1501.0,G
461482,899.955806,0,Insert,205199427,A,1000.0,1533.0,G


In [4]:
# 复原 Cancel Order
cancelorder = df.loc[df.Operation=='Cancel',['Time','Competitor','Operation','OrderId','Side','Volume','Price','Fee']]
tmp = df.loc[df.Operation=='Insert',]
cancelorder = pd.merge(cancelorder,tmp[['OrderId','Instrument','Lifespan']],on=['OrderId'],how='left')
cancelorder['Volume'] = np.nan
cancelorder = cancelorder[sample.columns.tolist()]

cancelorder

Unnamed: 0,Time,Instrument,Operation,OrderId,Side,Volume,Price,Lifespan
0,0.000000,0.0,Cancel,546346383,,,,G
1,0.000000,0.0,Cancel,546346387,,,,G
2,0.000000,0.0,Cancel,546347235,,,,G
3,0.000000,0.0,Cancel,546347367,,,,G
4,0.000000,0.0,Cancel,546348523,,,,G
...,...,...,...,...,...,...,...,...
265843,899.734164,1.0,Cancel,576265986,,,,G
265844,899.734441,1.0,Cancel,576265978,,,,G
265845,899.734502,1.0,Cancel,576266002,,,,G
265846,899.734512,1.0,Cancel,576265994,,,,G


In [5]:
# 复原 Insert Order
insertorder = df.loc[df.Operation=='Insert',]
insertorder = insertorder[sample.columns.tolist()]
insertorder['Price'] = insertorder.Price.astype(int)/100

insertorder

Unnamed: 0,Time,Instrument,Operation,OrderId,Side,Volume,Price,Lifespan
0,0.000000,0.0,Insert,546346379,A,640,1281.0,G
1,0.000000,0.0,Insert,546346383,A,2000,1624.0,G
3,0.000000,0.0,Insert,546346387,A,2000,1625.0,G
4,0.000000,0.0,Insert,546346523,A,8000,1281.0,G
5,0.000000,0.0,Insert,546346551,A,1000,1284.0,G
...,...,...,...,...,...,...,...,...
72527,899.734493,1.0,Insert,576266026,A,15,1271.0,G
72530,899.734521,1.0,Insert,576266034,B,15,1265.0,G
72531,899.734529,1.0,Insert,576266042,B,15,1266.0,G
72532,899.734553,1.0,Insert,576204819,B,2,1266.0,F


In [6]:
# 复原 Amend Order
Amendorder = df.loc[df.Operation=='Amend',]
Amendorder = Amendorder[sample.columns.tolist()].drop(['Instrument'],axis=1)
Amendorder['Lifespan'] = 'G'
Amendorder = pd.merge(Amendorder,insertorder[['OrderId','Instrument']],on=['OrderId'],how='left')
Amendorder = Amendorder[sample.columns.tolist()]

Amendorder

Unnamed: 0,Time,Instrument,Operation,OrderId,Side,Volume,Price,Lifespan
0,6.121116,0.0,Amend,546636311,,-20000,,G
1,13.499495,0.0,Amend,546859979,,-20000,,G
2,16.688915,0.0,Amend,547318003,,-20000,,G
3,19.579683,0.0,Amend,547398683,,-20000,,G
4,22.460849,0.0,Amend,547424439,,-20000,,G
...,...,...,...,...,...,...,...,...
221,889.995073,0.0,Amend,575011639,,-20000,,G
222,890.033756,0.0,Amend,575762643,,-20000,,G
223,890.425079,0.0,Amend,575764475,,-20000,,G
224,896.740216,0.0,Amend,575962111,,-1000,,G


In [7]:
finaldata = pd.concat([insertorder, Amendorder, cancelorder])
finaldata = finaldata[sample.columns.tolist()]
finaldata = finaldata.sort_values(by=['Time','Operation'],ascending=[True,False])
finaldata['Instrument'] = finaldata.Instrument.astype(int).astype(str)

finaldata.to_csv(target+'_competitor_orders.csv',index=False)
finaldata

Unnamed: 0,Time,Instrument,Operation,OrderId,Side,Volume,Price,Lifespan
0,0.000000,0,Insert,546346379,A,640.0,1281.0,G
1,0.000000,0,Insert,546346383,A,2000.0,1624.0,G
3,0.000000,0,Insert,546346387,A,2000.0,1625.0,G
4,0.000000,0,Insert,546346523,A,8000.0,1281.0,G
5,0.000000,0,Insert,546346551,A,1000.0,1284.0,G
...,...,...,...,...,...,...,...,...
230663,899.943216,1,Cancel,7094366,,,,G
230664,899.943216,1,Cancel,7094367,,,,G
469350,899.950413,0,Insert,576140035,B,1000.0,1253.0,G
469351,899.959912,0,Insert,576140179,B,1000.0,1264.0,G


## 查看两个队伍下单时间之间的差别

In [8]:
competitor01 = '10L_120212'
competitor02 = 'AlphaPL_411325'
temp = pd.read_csv(target+'_events.csv')
(temp.loc[temp.Competitor==competitor01,]).to_csv(target+competitor01+'orders.csv',index=False)
(temp.loc[temp.Competitor==competitor02,]).to_csv(target+competitor02+'orders.csv',index=False)

In [9]:
target+competitor01

'match48810L_120212'