In [1]:
import pandas as pd
import plotly.graph_objects as go
from dateutil.parser import parse

In [2]:
buy_path = r"C:\Users\byqpz\PycharmProjects\数据分析\量化\招财猫量化分析\BTC-USD-200327多头仓位记录.txt"
sell_path = r"C:\Users\byqpz\PycharmProjects\数据分析\量化\招财猫量化分析\BTC-USD-200327空头仓位记录.txt"

In [3]:
buy_records = pd.read_json(buy_path, lines=True)
sell_records = pd.read_json(sell_path, lines=True)

多单交易记录处理

In [4]:
buy_records.head()

Unnamed: 0,time,amount,price,direction
0,01-19 14:54,30张,$9243,买入开多
1,01-19 14:54,50张,$9228,买入开多
2,01-19 14:54,50张,$9238,买入开多
3,01-19 14:54,50张,$9245.31,卖出平多
4,01-19 15:19,30张,$9263.58,买入开多


In [5]:
buy_records['price'] = buy_records['price'].str.replace('$', '').astype(float)
buy_records['amount'] = buy_records['amount'].str.replace('张', '').astype(float)
buy_records['time'] = pd.Series(['2020']*buy_records.shape[0]).str.cat(buy_records['time'], sep='-')
buy_records['time'] = pd.to_datetime(buy_records['time'])

In [6]:
buy_records.head()

Unnamed: 0,time,amount,price,direction
0,2020-01-19 14:54:00,30.0,9243.0,买入开多
1,2020-01-19 14:54:00,50.0,9228.0,买入开多
2,2020-01-19 14:54:00,50.0,9238.0,买入开多
3,2020-01-19 14:54:00,50.0,9245.31,卖出平多
4,2020-01-19 15:19:00,30.0,9263.58,买入开多


In [7]:
# buy_records.to_excel(r"C:\Users\byqpz\PycharmProjects\数据分析\量化\招财猫量化分析\BTC-USD-200327多头仓位记录.xlsx", index=False) 

空单交易记录处理

In [8]:
sell_records['price'] = sell_records['price'].str.replace('$', '').astype(float)
sell_records['amount'] = sell_records['amount'].str.replace('张', '').astype(float)
sell_records['time'] = pd.Series(['2020']*sell_records.shape[0]).str.cat(sell_records['time'], sep='-')
sell_records['time'] = pd.to_datetime(sell_records['time'])

In [9]:
sell_records.head()

Unnamed: 0,time,amount,price,direction
0,2020-01-19 14:47:00,30.0,9285.15,卖出开空
1,2020-01-19 14:47:00,30.0,9280.61,买入平空
2,2020-01-19 14:54:00,50.0,9245.0,卖出开空
3,2020-01-19 14:54:00,30.0,9246.0,卖出开空
4,2020-01-19 15:17:00,30.0,9263.64,卖出开空


现货原始数据处理

In [10]:
def spot_data_processing(path):
    '''okex现货原始数据处理'''
    df = pd.read_json(path, lines=True)
    df.columns = ['timestamp', 'open', 'high', 'low', 'close', 'vol']
    df['timestamp'] = df['timestamp'].map(lambda x: parse(x[:10]+' '+x[11:19]))
    df.set_index('timestamp', inplace=True)
    df.dropna(inplace=True)
    df.drop_duplicates(inplace=True)
    
    return df

In [11]:
spot_path = r"C:\Users\byqpz\PycharmProjects\数据分析\量化\招财猫量化分析\okex_btc_usdt_1min_kline_现货.txt"

In [12]:
spot_data = spot_data_processing(spot_path)

In [13]:
spot_data.head()

Unnamed: 0_level_0,open,high,low,close,vol
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2020-02-05 15:00:00,9444.2,9448.5,9444.2,9445.4,17.459382
2020-02-05 15:01:00,9445.4,9445.4,9440.5,9441.3,2.404055
2020-02-05 15:02:00,9441.3,9441.3,9431.2,9431.2,30.405252
2020-02-05 15:03:00,9431.0,9437.3,9430.3,9436.4,7.431054
2020-02-05 15:04:00,9436.4,9437.3,9435.4,9435.4,5.850868


期货原始数据处理

In [14]:
def future_data_processing(path):
    '''okex期货原始数据处理'''
    df = pd.read_json(path, lines=True)
    df.columns = ['timestamp', 'open', 'high', 'low', 'close', 'vol1', 'vol2']
    df['timestamp'] = df['timestamp'].map(lambda x: parse(x[:10]+' '+x[11:19]))
    df.set_index('timestamp', inplace=True)
    df.dropna(inplace=True)
    df.drop_duplicates(inplace=True)
    
    return df

In [15]:
future_path = r"C:\Users\byqpz\PycharmProjects\数据分析\量化\招财猫量化分析\okex_btc_usdt_200327_1min_kline_期货.txt"

In [16]:
future_data = future_data_processing(future_path)

In [17]:
future_data

Unnamed: 0_level_0,open,high,low,close,vol1,vol2
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2020-02-05 15:00:00,9706.76,9714.22,9706.76,9708.36,3386,34.8438
2020-02-05 15:01:00,9708.36,9708.36,9700.76,9702.71,1857,19.1303
2020-02-05 15:02:00,9702.71,9702.71,9691.11,9691.11,2446,25.2131
2020-02-05 15:03:00,9691.11,9699.21,9691.11,9699.20,4089,42.1608
2020-02-05 15:04:00,9699.20,9699.46,9699.20,9699.45,1093,11.2668
...,...,...,...,...,...,...
2020-02-06 06:01:00,9937.02,9938.11,9935.92,9938.11,1897,19.0767
2020-02-06 06:02:00,9938.12,9938.23,9935.57,9938.23,1063,10.6865
2020-02-06 06:03:00,9938.23,9938.69,9937.45,9937.50,2272,22.8484
2020-02-06 06:04:00,9937.50,9939.04,9930.00,9930.00,5337,53.6977


# 1. 多单分析

## 1.1 合约张数唯一值计数（频数+百分比）

In [18]:
freq = buy_records['amount'].value_counts()
norm = buy_records['amount'].value_counts(normalize=True)
result = pd.concat([freq, norm], axis=1, keys=['频数', '百分比'])
result.index.name = '合约张数'

In [19]:
result

Unnamed: 0_level_0,频数,百分比
合约张数,Unnamed: 1_level_1,Unnamed: 2_level_1
30.0,968,0.712813
50.0,261,0.192194
80.0,22,0.0162
19.0,8,0.005891
4.0,7,0.005155
23.0,7,0.005155
8.0,6,0.004418
26.0,6,0.004418
12.0,5,0.003682
10.0,5,0.003682


数量为30和50的合约占所有多单合约数的90.5%

In [20]:
0.712813 + 0.192194

0.905007

## 1.2 操作统计

In [21]:
buy_records['direction'].value_counts()

卖出平多    705
买入开多    653
Name: direction, dtype: int64

## 1.3 多单开单点标注

In [35]:
fig = go.Figure(layout = {'title': {'text': '多单开单点标注（期货）', 'xref': 'paper', 'x': 0.5, 'font_size': 22},
                          'template': 'plotly_white', 
                          'xaxis_range': ['2020-02-05 15:00:00', '2020-02-06 06:05:00'],
                          'xaxis_rangeslider_visible': False,
                          })

In [36]:
fig.add_traces([go.Candlestick(x = future_data.index,
                               open = future_data['open'],
                               high = future_data['high'],
                               low = future_data['low'],
                               close = future_data['close'],
                               name = 'K线'),
                
                go.Scatter(x = buy_records['time'],
                           y = buy_records['price'],
                           mode = 'markers',
                           marker_size = 5,
                           marker_color = 'rgb(106,137,204)',
                           name = '多单开单点标注（期货）')
               ])

# 2 空单分析

## 2.1 合约张数唯一值计数（频数+百分比）

In [24]:
freq = sell_records['amount'].value_counts()
norm = sell_records['amount'].value_counts(normalize=True)
result = pd.concat([freq, norm], axis=1, keys=['频数', '百分比'])
result.index.name = '合约张数'

In [25]:
result

Unnamed: 0_level_0,频数,百分比
合约张数,Unnamed: 1_level_1,Unnamed: 2_level_1
30.0,1120,0.716113
50.0,313,0.200128
80.0,21,0.013427
18.0,9,0.005754
2.0,8,0.005115
8.0,8,0.005115
7.0,8,0.005115
4.0,7,0.004476
22.0,6,0.003836
6.0,6,0.003836


数量为30和50的合约占所有多单合约数的91.6%

In [26]:
0.716113 + 0.200128

0.916241

## 2.2 操作统计

In [27]:
sell_records['direction'].value_counts()

买入平空    799
卖出开空    765
Name: direction, dtype: int64

## 2.3 空单开单标注

In [39]:
fig2 = go.Figure(layout = {'title': {'text': '空单开单点标注（期货）', 'xref': 'paper', 'x': 0.5, 'font_size': 22},
                           'template': 'plotly_white', 
                           'xaxis_range': ['2020-02-05 15:00:00', '2020-02-06 06:05:00'],
                           'xaxis_rangeslider_visible': False,
                           })

In [40]:
fig2.add_traces([go.Candlestick(x = future_data.index,
                                open = future_data['open'],
                                high = future_data['high'],
                                low = future_data['low'],
                                close = future_data['close'],
                                name = 'K线'),
                
                 go.Scatter(x = sell_records['time'],
                            y = sell_records['price'],
                            mode = 'markers',
                            marker_size = 5,
                            marker_color = 'rgb(139, 0, 139)',
                            name = '空单开单点标注（期货）')
                ])