In [15]:
pip install mongopy

Collecting mongopy
  Downloading mongopy-0.01.tar.gz (3.8 kB)
  Preparing metadata (setup.py) ... [?25ldone
[?25hCollecting nose (from mongopy)
  Downloading nose-1.3.7-py3-none-any.whl.metadata (1.7 kB)
Downloading nose-1.3.7-py3-none-any.whl (154 kB)
Building wheels for collected packages: mongopy
  Building wheel for mongopy (setup.py) ... [?25ldone
[?25h  Created wheel for mongopy: filename=mongopy-0.1-py3-none-any.whl size=2988 sha256=08b89186cbb389a7721274f852709e0b88cca68ae4cd2bd09de140bcdeab0b31
  Stored in directory: /Users/samahita/Library/Caches/pip/wheels/c6/40/d5/9fd62fe0098ab4a90fb61eabeaffa7221bf18c86c7e0685a2f
Successfully built mongopy
Installing collected packages: nose, mongopy
Successfully installed mongopy-0.1 nose-1.3.7
Note: you may need to restart the kernel to use updated packages.


In [None]:
import os
from dotenv import load_dotenv
from pymongo import MongoClient
import pandas as pd

# Load variables from .env
load_dotenv()

# Get values from environment
MONGO_URI = os.getenv("MONGO_URI")
MONGO_DB = os.getenv("MONGO_DB")
MONGO_COLLECTION = os.getenv("MONGO_COLLECTION")

# Example connection
client = MongoClient(MONGO_URI)

# Choose DB and collection
db = client[MONGO_DB]
collection = db[MONGO_COLLECTION]


In [19]:
# Fetch N documents (e.g., 100 for testing)
cursor = collection.find().limit(100)

# Convert to DataFrame
df = pd.DataFrame(list(cursor))

# Convert block_time if stored as ISO string
if "block_time" in df.columns:
    df["block_time"] = pd.to_datetime(df["block_time"])

print(df.head())


                        _id  block_number          block_time  \
0  68811f156e21c4e4792e8d64      22983426 2025-07-23 17:42:35   
1  68811f166e21c4e4792e8d65      22983426 2025-07-23 17:42:35   
2  68811f1e6e21c4e4792e8d66      22983426 2025-07-23 17:42:35   
3  68811f206e21c4e4792e8d67      22983426 2025-07-23 17:42:35   
4  68811f226e21c4e4792e8d68      22983426 2025-07-23 17:42:35   

                                             tx_hash  \
0  a9bad274ed42d321b2f0abf147d2e61f22a9611ed78272...   
1  4196e4299599f6e8fad16abb4ea8ee3f3cc39ac4cb5015...   
2  24ba43651a8be89131a585af965b4338ef6374a146aff5...   
3  70b33b2905672b5bdfdade0be7f0393eab96055400aac8...   
4  444dd322cf9f002bd0dfb89ade994675e4db4f2fc39479...   

                                         from  \
0  0xb58555FCBa6479FcED7dE1485eB054943a09af7b   
1  0xa7565354851c34cffc94F024867bfE814FA5f3c0   
2  0xa7565354851c34cffc94F024867bfE814FA5f3c0   
3  0xb58555FCBa6479FcED7dE1485eB054943a09af7b   
4  0x9E1c7Cc9f4EBF41ef131cC

In [93]:

columns_to_keep = [
    "block_number", "block_time", "tx_hash", "from", "to", 
    "gas_price", "max_fee_per_gas", "max_priority_fee_per_gas", 
    "nonce", "type", "status", "function"
]

# This will keep only available columns and add missing ones as NaN
df = df.reindex(columns=columns_to_keep)


df.rename(columns={"hash": "tx_hash"}, inplace=True)

print(df.head()) 


   block_number          block_time  \
0      22983426 2025-07-23 17:42:35   
1      22983426 2025-07-23 17:42:35   
2      22983426 2025-07-23 17:42:35   
3      22983426 2025-07-23 17:42:35   
4      22983426 2025-07-23 17:42:35   

                                             tx_hash  \
0  a9bad274ed42d321b2f0abf147d2e61f22a9611ed78272...   
1  4196e4299599f6e8fad16abb4ea8ee3f3cc39ac4cb5015...   
2  24ba43651a8be89131a585af965b4338ef6374a146aff5...   
3  70b33b2905672b5bdfdade0be7f0393eab96055400aac8...   
4  444dd322cf9f002bd0dfb89ade994675e4db4f2fc39479...   

                                         from  \
0  0xb58555FCBa6479FcED7dE1485eB054943a09af7b   
1  0xa7565354851c34cffc94F024867bfE814FA5f3c0   
2  0xa7565354851c34cffc94F024867bfE814FA5f3c0   
3  0xb58555FCBa6479FcED7dE1485eB054943a09af7b   
4  0x9E1c7Cc9f4EBF41ef131cCe3B8888EC8E0204Ecd   

                                           to  gas_price  max_fee_per_gas  \
0  0x00000000003b3cc22aF3aE1EAc0440BcEe416B40   0.000000

In [108]:
print(df.columns.tolist())
df["function"] = df["function"].fillna("unknown")
from sklearn.preprocessing import LabelEncoder

func_encoder = LabelEncoder()
df["function_encoded"] = func_encoder.fit_transform(df["function"])


['block_number', 'block_time', 'tx_hash', 'from', 'to', 'gas_price', 'max_fee_per_gas', 'max_priority_fee_per_gas', 'nonce', 'type', 'status', 'function', 'confirmation_delay', 'effective_gas_fee', 'block_utilization']


In [110]:
feature_cols = [
    "gas_price", "effective_gas_fee", "block_utilization", 
    "nonce", "confirmation_delay", "function_encoded"
]
target_col = "gas_price"   


scaler_X = MinMaxScaler()
scaler_y = MinMaxScaler()

df[feature_cols] = scaler_X.fit_transform(df[feature_cols])
df[[target_col]] = scaler_y.fit_transform(df[[target_col]])


In [113]:
import torch
from torch.utils.data import Dataset, DataLoader

class TxDataset(Dataset):
    def __init__(self, df, feature_cols, target_col, window_size=10):
        self.features = df[feature_cols].values
        self.targets = df[target_col].values
        self.window_size = window_size

    def __len__(self):
        return len(self.features) - self.window_size

    def __getitem__(self, idx):
        X = self.features[idx:idx+self.window_size]
        y = self.targets[idx+self.window_size]
        return torch.tensor(X, dtype=torch.float32), torch.tensor(y, dtype=torch.float32)

# Create dataset & loader
dataset = TxDataset(df, feature_cols, target_col, window_size=10)
dataloader = DataLoader(dataset, batch_size=32, shuffle=True)


In [133]:
import torch
import torch.nn as nn

class AttentionLSTM(nn.Module):
    def __init__(self, input_dim, hidden_dim=64, num_layers=2):
        super(AttentionLSTM, self).__init__()
        self.lstm = nn.LSTM(input_dim, hidden_dim, num_layers, batch_first=True, bidirectional=True)
        self.attention = nn.Linear(hidden_dim*2, 1)   
        self.fc = nn.Linear(hidden_dim*2, 1)

    def forward(self, x):
        lstm_out, _ = self.lstm(x)               
        weights = torch.softmax(self.attention(lstm_out), dim=1)  
        context = torch.sum(weights * lstm_out, dim=1)            
        return out


In [135]:
import torch.optim as optim

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = AttentionLSTM(input_dim=len(feature_cols)).to(device)

criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)


epochs = 20
for epoch in range(epochs):
    for X, y in dataloader:
        X, y = X.to(device), y.to(device).unsqueeze(1) 

        optimizer.zero_grad()
        y_pred = model(X)
        loss = criterion(y_pred, y)
        loss.backward()
        optimizer.step()

    print(f"Epoch {epoch+1}/{epochs}, Loss: {loss.item():.6f}")


Epoch 1/20, Loss: 0.001180
Epoch 2/20, Loss: 0.001528
Epoch 3/20, Loss: 0.000720
Epoch 4/20, Loss: 0.000962
Epoch 5/20, Loss: 0.000696
Epoch 6/20, Loss: 0.001656
Epoch 7/20, Loss: 0.000731
Epoch 8/20, Loss: 0.000846
Epoch 9/20, Loss: 0.001185
Epoch 10/20, Loss: 0.000533
Epoch 11/20, Loss: 0.001734
Epoch 12/20, Loss: 0.001276
Epoch 13/20, Loss: 0.000390
Epoch 14/20, Loss: 0.000908
Epoch 15/20, Loss: 0.000725
Epoch 16/20, Loss: 0.000295
Epoch 17/20, Loss: 0.001253
Epoch 18/20, Loss: 0.000466
Epoch 19/20, Loss: 0.000441
Epoch 20/20, Loss: 0.000773


In [137]:
model.eval()
with torch.no_grad():
    sample_X, _ = dataset[0]  # first sequence
    sample_X = sample_X.unsqueeze(0).to(device)  # add batch dim
    prediction = model(sample_X).cpu().numpy()

    # inverse transform to original gas price scale
    predicted_price = scaler_y.inverse_transform(prediction)
    print("Predicted Next Gas Price:", predicted_price)


Predicted Next Gas Price: [[0.10055843]]


In [None]:
#accuracy write function
#difference between alstm and bi
#providing graph on simulation and testing for the users 

In [69]:
import torch.nn as nn

class LSTMModel(nn.Module):
    def __init__(self, input_dim, hidden_dim, num_layers, output_dim):
        super(LSTMModel, self).__init__()
        self.lstm = nn.LSTM(input_dim, hidden_dim, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_dim, output_dim)

    def forward(self, x):
        out, _ = self.lstm(x)
        out = out[:, -1, :]   
        out = self.fc(out)
        return out


input_dim = len(feature_cols)   
hidden_dim = 128
num_layers = 4
output_dim = 1

model = LSTMModel(input_dim, hidden_dim, num_layers, output_dim)
print(model)


LSTMModel(
  (lstm): LSTM(3, 128, num_layers=4, batch_first=True)
  (fc): Linear(in_features=128, out_features=1, bias=True)
)


In [73]:
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.0005) 
epochs = 10
for epoch in range(epochs):
    epoch_loss = 0
    for X, y in dataloader:
        optimizer.zero_grad()
        outputs = model(X)

        # reshape to same dimension
        outputs = outputs.view(-1)
        y = y.view(-1)

        loss = criterion(outputs, y)
        loss.backward()
        optimizer.step()

        epoch_loss += loss.item()

    print(f"Epoch {epoch+1}, Loss: {epoch_loss/len(dataloader):.4f}")


Epoch 1, Loss: 0.2416
Epoch 2, Loss: 0.2387
Epoch 3, Loss: 0.2342
Epoch 4, Loss: 0.2387
Epoch 5, Loss: 0.2381
Epoch 6, Loss: 0.2305
Epoch 7, Loss: 0.2351
Epoch 8, Loss: 0.2382
Epoch 9, Loss: 0.2297
Epoch 10, Loss: 0.2331
