In [1]:
import pandas as pd
import tensorflow as tf
from sklearn.model_selection import train_test_split

In [3]:
# Load the CSV file
file_path = 'rounds.csv'  # Update with your file path
bnb_data = pd.read_csv(file_path)

# Optional: Inspect the data
print(bnb_data.head())

   epoch  startTimestamp  lockTimestamp  closeTimestamp    lockPrice  \
0      1      1629927751     1629928051      1629928363  50081000000   
1      2      1629928063     1629928363      1629928672  50027000000   
2      3      1629928372     1629928672      1629928984  50208000000   
3      4      1629928684     1629928984      1629929293  50399415626   
4      5      1629928993     1629929293      1629929602  50143000000   

    closePrice          lockOracleId         closeOracleId totalAmount  \
0  50027000000  18446744073709807007  18446744073709807020           0   
1  50208000000  18446744073709807020  18446744073709807033           0   
2  50399415626  18446744073709807033  18446744073709807045           0   
3  50143000000  18446744073709807045  18446744073709807060           0   
4  50222000000  18446744073709807060  18446744073709807071           0   

  bullAmount bearAmount rewardBaseCalAmount rewardAmount  oracleCalled  \
0          0          0                   0     

  bnb_data = pd.read_csv(file_path)


In [4]:
print(bnb_data.info())  # Check the column data types

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 361433 entries, 0 to 361432
Data columns (total 15 columns):
 #   Column               Non-Null Count   Dtype  
---  ------               --------------   -----  
 0   epoch                361433 non-null  int64  
 1   startTimestamp       361433 non-null  int64  
 2   lockTimestamp        361433 non-null  int64  
 3   closeTimestamp       361433 non-null  int64  
 4   lockPrice            361433 non-null  int64  
 5   closePrice           361433 non-null  int64  
 6   lockOracleId         361433 non-null  object 
 7   closeOracleId        361433 non-null  object 
 8   totalAmount          361433 non-null  object 
 9   bullAmount           361433 non-null  object 
 10  bearAmount           361433 non-null  object 
 11  rewardBaseCalAmount  361433 non-null  object 
 12  rewardAmount         361433 non-null  object 
 13  oracleCalled         361433 non-null  bool   
 14  Unnamed: 14          0 non-null       float64
dtypes: bool(1), float

In [17]:
bnb_data = pd.read_csv(file_path, low_memory=False)

# Convert relevant columns to numeric, coercing errors
columns_to_convert = ['totalAmount', 'bullAmount', 'bearAmount', 'rewardBaseCalAmount', 'rewardAmount']
for col in columns_to_convert:
    bnb_data[col] = pd.to_numeric(bnb_data[col], errors='coerce')

In [18]:
# Drop rows with invalid values if needed (optional)
bnb_data = bnb_data.dropna(subset=columns_to_convert)
bnb_data = bnb_data.drop(columns=['Unnamed: 14', 'oracleCalled','lockOracleId', 'closeOracleId'])
# Save the DataFrame to a CSV file
bnb_data.to_csv('cleaned.csv', index=False)  # Set index=False to exclude the index column

# Verify the conversion
print(bnb_data.info())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 361433 entries, 0 to 361432
Data columns (total 11 columns):
 #   Column               Non-Null Count   Dtype  
---  ------               --------------   -----  
 0   epoch                361433 non-null  int64  
 1   startTimestamp       361433 non-null  int64  
 2   lockTimestamp        361433 non-null  int64  
 3   closeTimestamp       361433 non-null  int64  
 4   lockPrice            361433 non-null  int64  
 5   closePrice           361433 non-null  int64  
 6   totalAmount          361433 non-null  float64
 7   bullAmount           361433 non-null  float64
 8   bearAmount           361433 non-null  float64
 9   rewardBaseCalAmount  361433 non-null  float64
 10  rewardAmount         361433 non-null  float64
dtypes: float64(5), int64(6)
memory usage: 30.3 MB
None


In [19]:
# Add a new boolean column comparing closePrice and lockPrice two rows ahead
bnb_data['2round_price_increased'] = bnb_data.apply(
    lambda row: bnb_data.loc[row.name + 2, 'closePrice'] > bnb_data.loc[row.name + 2, 'lockPrice']
    if row.name + 2 < len(bnb_data) else None,
    axis=1
)

In [25]:
# Display the updated DataFrame
bnb_data.head()

Unnamed: 0,epoch,startTimestamp,lockTimestamp,closeTimestamp,lockPrice,closePrice,totalAmount,bullAmount,bearAmount,rewardBaseCalAmount,rewardAmount,2round_price_increased
0,1,1629927751,1629928051,1629928363,50081000000,50027000000,0.0,0.0,0.0,0.0,0.0,True
1,2,1629928063,1629928363,1629928672,50027000000,50208000000,0.0,0.0,0.0,0.0,0.0,False
2,3,1629928372,1629928672,1629928984,50208000000,50399415626,0.0,0.0,0.0,0.0,0.0,True
3,4,1629928684,1629928984,1629929293,50399415626,50143000000,0.0,0.0,0.0,0.0,0.0,False
4,5,1629928993,1629929293,1629929602,50143000000,50222000000,0.0,0.0,0.0,0.0,0.0,False


In [26]:
# bnb_data_log = bnb_data.drop(columns=['startTimestamp','lockTimestamp','closeTimestamp'])


In [27]:
# Split into training and testing datasets
train_df, test_df = train_test_split(bnb_data, test_size=0.2, random_state=45, shuffle=False)
# Verify the split
print(f"Training data shape: {train_df.shape}")
print(f"Testing data shape: {test_df.shape}")

Training data shape: (289146, 12)
Testing data shape: (72287, 12)


In [28]:
train_df.head()

Unnamed: 0,epoch,startTimestamp,lockTimestamp,closeTimestamp,lockPrice,closePrice,totalAmount,bullAmount,bearAmount,rewardBaseCalAmount,rewardAmount,2round_price_increased
0,1,1629927751,1629928051,1629928363,50081000000,50027000000,0.0,0.0,0.0,0.0,0.0,True
1,2,1629928063,1629928363,1629928672,50027000000,50208000000,0.0,0.0,0.0,0.0,0.0,False
2,3,1629928372,1629928672,1629928984,50208000000,50399415626,0.0,0.0,0.0,0.0,0.0,True
3,4,1629928684,1629928984,1629929293,50399415626,50143000000,0.0,0.0,0.0,0.0,0.0,False
4,5,1629928993,1629929293,1629929602,50143000000,50222000000,0.0,0.0,0.0,0.0,0.0,False
