In [6]:
# new data
import json
import pandas as pd

# Path to your file
file_path = "/home/asevlad/new_data_14.json"

# Read line by line
records = []
with open(file_path, "r") as f:
    for line in f:
        wallet, trade = json.loads(line)
        trade['wallet'] = wallet  # add wallet to the dict
        records.append(trade)

# Convert to DataFrame
df_new = pd.DataFrame(records)

# Show the first few rows
df_new

Unnamed: 0,coin,px,sz,side,time,startPosition,dir,closedPnl,hash,oid,crossed,fee,tid,cloid,feeToken,wallet,builderFee,liquidation
0,ETH,2511.2,0.486,B,1748183573109,-946.9372,Close Short,-2.5515,0x71fe8c1aa48f3a43971504242beb2502023600245b77...,97128733943,False,-0.036613,995829841758483,0x00000000000000002bca2ffb27459396,USDC,0xe3b6e3443c8f2080704e7421bad9340f13950acb,,
1,ETH,2511.2,0.486,A,1748183573109,0.486,Close Long,10.5462,0x71fe8c1aa48f3a43971504242beb2502023600245b77...,97128568129,True,0.649275,995829841758483,,USDC,0xb82f1e1aae992c7e617a6783daa08e1f6e55e6be,0.122044,
2,XRP,2.2978,103.0,B,1748183744120,-30998.0,Close Short,0.775899,0x00000000000000000000000000000000000000000000...,97128751935,False,-0.002366,824984087202891,0x00000000000000000000019707dd031d,USDC,0x8e80c4b533dd977cf716b5c24fd9223129272804,,
3,XRP,2.2978,103.0,A,1748183744120,-29681.0,Open Short,0.0,0x00000000000000000000000000000000000000000000...,97128861714,True,0.094669,824984087202891,,USDC,0xbae5e1fd212efdfae8a9ff44d5667c95f10f7902,,
4,@107,38.428,1.18,B,1748183744120,37.46044495,Buy,0.0,0x00000000000000000000000000000000000000000000...,97128861715,True,0.00082599,295005460095865,,HYPE,0xde99036334b052dac34a26ff4e56815d8060641c,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
166589,@107,37.954,1.05,A,1748185199611,67169.04869607,Sell,1.78932,0x18032977cc6d85ea62c404242c355c01ed00d13ea752...,97132359607,False,-0.00003149,857534405650200,0x7e1afb945ede40bc945d82d6da5e9320,HYPE,0xf9109ada2f73c62e9889b45453065f0d99260a2d,,
166590,ONDO,0.91804,272.0,B,1748185199611,-204540.0,Close Short,-1.191904,0x268fda06323e1877049104242c355c01f200c133b78b...,97132364308,True,0.079906,162299875292987,0x00003230323530353235383534333438,USDC,0x5647249079e257329ecb6f46ed41821d3c69e7de,,
166591,ONDO,0.91804,272.0,A,1748185199611,-42270.0,Open Short,0.0,0x268fda06323e1877049104242c355c01f200c133b78b...,97132234733,False,0.0,162299875292987,0x20c267004988831601471ac9b395541b,USDC,0x59891acadc0f8b2bcab10c043ac5fc63628b1716,,
166592,BLAST,0.003127,5921.0,B,1748185199748,-704542.0,Close Short,0.023684,0xc35e13434b30915f85e904242c355d014400bb6fc753...,97132335493,False,-0.000185,99808174810734,,USDC,0xdcac85ecae7148886029c20e661d848a4de99ce2,,


# Features

#### Useful features:
- coin
- px (the execution price)
- sz (The size of the cryptocurrency traded)
- time (executing time)
- side (The market **side** of the trade. `B` = Bid (Buy side), `A` = Ask (Sell side).) (correlated with `dir`)
- wallet (wallet that executed the operation)

#### Maybe useful features:
- dir (Trade direction like `Open Long`, `Close Long`, `Open Short`, `Close Short`, `Buy`, `Sell`, `Long > Short`, `Net Child Vaults`)
- startPosition (The wallet’s position before this trade. Positive value = long position, Negative value = short position)
- closedPnl (Realized profit/loss from the trade)
- oid (Unique identifier for the order) two per transaction for sell person and buy person
- crossed (`True` = Aggressive order (market taker), `False` = Passive order)
- feeToken (The token in which the fee was paid)

#### Maybe not useful:
- hash (The transaction hash)
- fee (Trading fee paid)
- builderFee (A specialized fee)
- tid (Unique Trade ID.)
- cloid
- liquidation (information about the liquidation. I could be info about the person whose wallet were liquidated or ifo about the person whose order was triggered by executing the liquidation of another person)

In [7]:
# check if side correlates with dir

In [9]:
df_new[~df_new["liquidation"].isna()]

Unnamed: 0,coin,px,sz,side,time,startPosition,dir,closedPnl,hash,oid,crossed,fee,tid,cloid,feeToken,wallet,builderFee,liquidation
20258,HYPE,38.501,2.63,B,1748183818596,-7.96,Close Short,-8.51331,0x07834d8c34f320d014f404242bf05701010092bb505b...,97129050816,True,0.043742,492149147660631,,USDC,0x91c04663019c213865faa19cc2007f1afcdbfb72,,{'liquidatedUser': '0x91c04663019c213865faa19c...
20259,HYPE,38.501,2.63,A,1748183818596,728.52,Close Long,1.474378,0x07834d8c34f320d014f404242bf05701010092bb505b...,97127595287,False,0.00324,492149147660631,,USDC,0x493364de7f4a39ed24f9c68ece229973f324d369,,{'liquidatedUser': '0x91c04663019c213865faa19c...
20260,HYPE,38.502,5.33,B,1748183818596,-5.33,Close Short,-17.25854,0x07834d8c34f320d014f404242bf05701010092bb505b...,97129050816,True,0.088653,825545611410633,,USDC,0x91c04663019c213865faa19cc2007f1afcdbfb72,,{'liquidatedUser': '0x91c04663019c213865faa19c...
20261,HYPE,38.502,5.33,A,1748183818596,150.0,Close Long,9.594,0x07834d8c34f320d014f404242bf05701010092bb505b...,96849264764,False,0.024625,825545611410633,,USDC,0xe8e2eed844d827dd449cc103de7bc6467457a69b,,{'liquidatedUser': '0x91c04663019c213865faa19c...
20676,HYPE,38.546,0.86,B,1748183821501,-0.86,Close Short,-9.09192,0x3ae3abaf306d5c3c5e5604242bf07b010200140417bc...,97129058683,True,0.014917,778886468968895,,USDC,0x5c5a37e4ee62af10e42e9a961d58796715422a90,,{'liquidatedUser': '0x5c5a37e4ee62af10e42e9a96...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95731,TAO,413.23,1.903,A,1748184450498,1.903,Close Long,-98.51831,0x5250b746166516a3492204242c0f98010800002989d3...,97130562464,True,0.339714,413278255017040,,USDC,0x252b0d1c7077894e5ba58b22c83118b26805e6b5,,{'liquidatedUser': '0x252b0d1c7077894e5ba58b22...
98582,VINE,0.042967,5867.0,B,1748184467079,-674415.0,Close Short,6.412631,0xae7c44bec1dce6d3dd0c04242c1076014900e322540e...,97130631209,False,0.0,234323492868977,,USDC,0x31ca8395cf837de08b24da3f660e77761dfb974b,,{'liquidatedUser': '0x98686732059cb99421e16055...
98583,VINE,0.042967,5867.0,A,1748184467079,5867.0,Close Long,-48.091799,0xae7c44bec1dce6d3dd0c04242c1076014900e322540e...,97130633082,True,0.113439,234323492868977,,USDC,0x98686732059cb99421e16055769b0cd2f2795268,,{'liquidatedUser': '0x98686732059cb99421e16055...
138680,PURR,0.33431,2086.0,B,1748184849753,-2086.0,Close Short,-143.404156,0xda781a5f4e62c615541904242c23ad01ef00ee4f9cde...,97131630473,True,0.313816,303593767265141,,USDC,0x884ead38e5e6132e20ac28bf2b21244c2a6c409b,,{'liquidatedUser': '0x884ead38e5e6132e20ac28bf...


In [10]:
df_new.head()

Unnamed: 0,coin,px,sz,side,time,startPosition,dir,closedPnl,hash,oid,crossed,fee,tid,cloid,feeToken,wallet,builderFee,liquidation
0,ETH,2511.2,0.486,B,1748183573109,-946.9372,Close Short,-2.5515,0x71fe8c1aa48f3a43971504242beb2502023600245b77...,97128733943,False,-0.036613,995829841758483,0x00000000000000002bca2ffb27459396,USDC,0xe3b6e3443c8f2080704e7421bad9340f13950acb,,
1,ETH,2511.2,0.486,A,1748183573109,0.486,Close Long,10.5462,0x71fe8c1aa48f3a43971504242beb2502023600245b77...,97128568129,True,0.649275,995829841758483,,USDC,0xb82f1e1aae992c7e617a6783daa08e1f6e55e6be,0.122044,
2,XRP,2.2978,103.0,B,1748183744120,-30998.0,Close Short,0.775899,0x00000000000000000000000000000000000000000000...,97128751935,False,-0.002366,824984087202891,0x00000000000000000000019707dd031d,USDC,0x8e80c4b533dd977cf716b5c24fd9223129272804,,
3,XRP,2.2978,103.0,A,1748183744120,-29681.0,Open Short,0.0,0x00000000000000000000000000000000000000000000...,97128861714,True,0.094669,824984087202891,,USDC,0xbae5e1fd212efdfae8a9ff44d5667c95f10f7902,,
4,@107,38.428,1.18,B,1748183744120,37.46044495,Buy,0.0,0x00000000000000000000000000000000000000000000...,97128861715,True,0.00082599,295005460095865,,HYPE,0xde99036334b052dac34a26ff4e56815d8060641c,,


In [11]:
df_new["crossed"] = df_new["crossed"].astype("boolean")

In [12]:
df_new["time"] = pd.to_datetime(df_new["time"], unit="ms")

df_new["crossed"] = df_new["crossed"].astype("boolean")

df_new["px"] = df_new["px"].astype("float")
df_new["sz"] = df_new["sz"].astype("float")
df_new["startPosition"] = df_new["startPosition"].astype("float")
df_new["closedPnl"] = df_new["closedPnl"].astype("float")
df_new["fee"] = df_new["fee"].astype("float")
df_new["builderFee"] = df_new["builderFee"].astype("float")

df_new["oid"] = df_new["oid"].astype("int")
df_new["tid"] = df_new["tid"].astype("int")

In [13]:
df_new.info()
# cloid, builderFee, liquidation could be NULL

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 166594 entries, 0 to 166593
Data columns (total 18 columns):
 #   Column         Non-Null Count   Dtype         
---  ------         --------------   -----         
 0   coin           166594 non-null  object        
 1   px             166594 non-null  float64       
 2   sz             166594 non-null  float64       
 3   side           166594 non-null  object        
 4   time           166594 non-null  datetime64[ns]
 5   startPosition  166594 non-null  float64       
 6   dir            166594 non-null  object        
 7   closedPnl      166594 non-null  float64       
 8   hash           166594 non-null  object        
 9   oid            166594 non-null  int64         
 10  crossed        166594 non-null  boolean       
 11  fee            166594 non-null  float64       
 12  tid            166594 non-null  int64         
 13  cloid          77877 non-null   object        
 14  feeToken       166594 non-null  object        
 15  

In [14]:
df_new.dtypes

coin                     object
px                      float64
sz                      float64
side                     object
time             datetime64[ns]
startPosition           float64
dir                      object
closedPnl               float64
hash                     object
oid                       int64
crossed                 boolean
fee                     float64
tid                       int64
cloid                    object
feeToken                 object
wallet                   object
builderFee              float64
liquidation              object
dtype: object

In [15]:
df_new[df_new["hash"] == "0x0000000000000000000000000000000000000000000000000000000000000000"]
# 27426 out of 166594 has has 0x0000...0000. This is 16.46% out of all transactions

Unnamed: 0,coin,px,sz,side,time,startPosition,dir,closedPnl,hash,oid,crossed,fee,tid,cloid,feeToken,wallet,builderFee,liquidation
2,XRP,2.29780,103.00000,B,2025-05-25 14:35:44.120,-30998.000000,Close Short,0.775899,0x00000000000000000000000000000000000000000000...,97128751935,False,-0.002366,824984087202891,0x00000000000000000000019707dd031d,USDC,0x8e80c4b533dd977cf716b5c24fd9223129272804,,
3,XRP,2.29780,103.00000,A,2025-05-25 14:35:44.120,-29681.000000,Open Short,0.000000,0x00000000000000000000000000000000000000000000...,97128861714,True,0.094669,824984087202891,,USDC,0xbae5e1fd212efdfae8a9ff44d5667c95f10f7902,,
4,@107,38.42800,1.18000,B,2025-05-25 14:35:44.120,37.460445,Buy,0.000000,0x00000000000000000000000000000000000000000000...,97128861715,True,0.000826,295005460095865,,HYPE,0xde99036334b052dac34a26ff4e56815d8060641c,,
5,@107,38.42800,1.18000,A,2025-05-25 14:35:44.120,908.817533,Sell,1.271512,0x00000000000000000000000000000000000000000000...,97128674303,False,0.009522,295005460095865,,USDC,0x4c226d5a21ae3b9883f98cbda1a3be63e565f308,,
1262,HYPE,38.47300,2.98000,B,2025-05-25 14:35:45.933,16002.860000,Open Long,0.000000,0x00000000000000000000000000000000000000000000...,97128863460,True,0.036687,926265636395986,,USDC,0x290ab3128a03835b8383be688bec64ea993ff539,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
166557,@142,107412.00000,0.00017,A,2025-05-25 14:59:59.006,1.227704,Sell,-0.179509,0x00000000000000000000000000000000000000000000...,97132363505,True,0.010226,722013394781947,,USDC,0x563bf0731cf71ad7cea05b9c48a802d8c53e7dc4,,
166558,@163,0.86482,44.55000,B,2025-05-25 14:59:59.006,1568.561238,Buy,0.000000,0x00000000000000000000000000000000000000000000...,97132363506,True,0.031185,890822400389376,,FUND,0x6635159add6b4c53fddf64002c2db7905ff52e7c,,
166559,@163,0.86482,44.55000,A,2025-05-25 14:59:59.006,169170.460000,Sell,22.759369,0x00000000000000000000000000000000000000000000...,97132361480,False,0.000000,890822400389376,,USDC,0xffffffffffffffffffffffffffffffffffffffff,,
166560,@107,37.95400,8.32000,B,2025-05-25 14:59:59.006,25148.709793,Buy,0.000000,0x00000000000000000000000000000000000000000000...,97132363507,True,0.003328,871266623867994,,HYPE,0x2ac1fa38a82738c3875b3ac385fbade067e3d123,,


In [16]:
# share of market by coins
df_new["coin"].value_counts() / df_new["coin"].value_counts().sum()

coin
HYPE    0.280766
@107    0.098623
BTC     0.077698
SOL     0.037324
ETH     0.036136
          ...   
@122    0.000012
@148    0.000012
@11     0.000012
@12     0.000012
@61     0.000012
Name: count, Length: 263, dtype: float64

In [20]:
df_new[df_new["side"] == "B"]["dir"].value_counts()

dir
Open Long           32154
Close Short         29339
Buy                 20688
Short > Long         1054
Net Child Vaults       62
Name: count, dtype: int64

In [21]:
df_new["dir"].value_counts()

dir
Open Long           32154
Close Long          31898
Open Short          29560
Close Short         29339
Buy                 20688
Sell                20688
Long > Short         1089
Short > Long         1054
Net Child Vaults      124
Name: count, dtype: int64

In [1]:
import json
import pandas as pd

file_path = "/home/asevlad/old_data_19_small_part.json"
records = []

with open(file_path) as f:
    for line in f:
        trade = json.loads(line)
        records.append(trade)

df_old = pd.DataFrame(records)
df_old.head()


Unnamed: 0,coin,side,time,px,sz,hash,trade_dir_override,side_info
0,TURBO,B,2025-03-25T18:59:59.950677625,0.002467,16809.0,0x4f3245a0bdf08f688efb0420427ae901a4003adf3ad5...,Na,[{'user': '0x31ca8395cf837de08b24da3f660e77761...
1,HYPE,B,2025-03-25T19:00:00.045215016,16.071,0.8,0x00000000000000000000000000000000000000000000...,Na,[{'user': '0x2d689f316cd3b7a0faa03f341ef0137fb...
2,ETH,B,2025-03-25T19:00:00.134682625,2061.5,0.0052,0xba3a24d893fefec058c90420427aec0194007adf533f...,Na,[{'user': '0xb2aa9c1f85fc6db0a07ec0485a9e10c24...
3,ETH,A,2025-03-25T19:00:00.134682625,2061.4,0.0057,0x68f892fe5611d65786900420427aec01980083911b50...,Na,[{'user': '0x5e6a7450ee1eb7719bdcba90c29c3c24c...
4,ETH,A,2025-03-25T19:00:00.134682625,2061.4,0.0064,0x68f892fe5611d65786900420427aec01980083911b50...,Na,[{'user': '0x6ecba7527448bb56caba8ca7768d271de...


In [4]:
df_old["time"] = pd.to_datetime(df_old["time"], unit="ms")

df_new["crossed"] = df_new["crossed"].astype("boolean")

df_new["px"] = df_new["px"].astype("float")
df_new["sz"] = df_new["sz"].astype("float")
df_new["startPosition"] = df_new["startPosition"].astype("float")
df_new["closedPnl"] = df_new["closedPnl"].astype("float")
df_new["fee"] = df_new["fee"].astype("float")
df_new["builderFee"] = df_new["builderFee"].astype("float")

df_new["oid"] = df_new["oid"].astype("int")
df_new["tid"] = df_new["tid"].astype("int")

ValueError: non convertible value 2025-03-25T18:59:59.950677625 with the unit 'ms', at position 0

In [5]:
df_new

NameError: name 'df_new' is not defined