In [None]:
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.metrics import accuracy_score
from google.oauth2 import service_account
from pandas_gbq import read_gbq

# Set device to GPU if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

# Authenticate with Google Cloud
credentials = service_account.Credentials.from_service_account_file('service_account.json')

# Function to load data from Google BigQuery
def load_data(query: str, credentials):
    """
    Load transaction log data from Google BigQuery.
    
    Parameters:
        query (str): SQL query to fetch data.
        credentials: Google authentication credentials.
    
    Returns:
        pd.DataFrame: DataFrame containing the transaction log data.
    """
    return read_gbq(query, credentials=credentials)

# Function to preprocess the data
def preprocess_data(df: pd.DataFrame):
    """
    Preprocess the transaction log data for model training.
    
    Parameters:
        df (pd.DataFrame): Raw transaction data.
    
    Returns:
        tuple: Processed feature tensor, label tensor, label encoder
    """
    # Handling missing values
    df.dropna(inplace=True)
    
    # Encoding categorical columns
    label_encoder = LabelEncoder()
    df['label'] = label_encoder.fit_transform(df['label'])
    
    # Feature scaling
    scaler = StandardScaler()
    features = df.drop(columns=['label'])
    features_scaled = scaler.fit_transform(features)
    labels = df['label'].values
    
    # Convert to tensors
    X_tensor = torch.tensor(features_scaled, dtype=torch.float32).to(device)
    y_tensor = torch.tensor(labels, dtype=torch.long).to(device)
    
    return X_tensor, y_tensor, label_encoder

# Define Transformer-based classification model
class TransactionClassifier(nn.Module):
    """
    Transformer-based model for classifying transactions as malicious or benign.
    """
    def __init__(self, input_dim, num_classes):
        super(TransactionClassifier, self).__init__()
        self.embedding = nn.Linear(input_dim, 128)
        self.transformer = nn.TransformerEncoder(
            nn.TransformerEncoderLayer(d_model=128, nhead=8, dim_feedforward=256), num_layers=2
        )
        self.fc = nn.Linear(128, num_classes)
        self.softmax = nn.Softmax(dim=1)
    
    def forward(self, x):
        x = self.embedding(x).unsqueeze(0)  # Adding sequence dimension
        x = self.transformer(x)
        x = self.fc(x.squeeze(0))
        return self.softmax(x)

# Function to train the model
def train_model(model, X_train, y_train, epochs=10, lr=0.001):
    """
    Train the transaction classifier.
    
    Parameters:
        model (nn.Module): Transformer model.
        X_train (Tensor): Training features.
        y_train (Tensor): Training labels.
        epochs (int): Number of training epochs.
        lr (float): Learning rate.
    """
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=lr)
    model.to(device)
    
    for epoch in range(epochs):
        optimizer.zero_grad()
        outputs = model(X_train)
        loss = criterion(outputs, y_train)
        loss.backward()
        optimizer.step()
        print(f"Epoch [{epoch+1}/{epochs}], Loss: {loss.item():.4f}")
    
# Function to evaluate the model
def evaluate_model(model, X_test, y_test):
    """
    Evaluate model performance on test data.
    
    Parameters:
        model (nn.Module): Trained model.
        X_test (Tensor): Test features.
        y_test (Tensor): Test labels.
    
    Returns:
        float: Accuracy of the model.
    """
    model.eval()
    with torch.no_grad():
        outputs = model(X_test)
        predictions = torch.argmax(outputs, dim=1)
        accuracy = accuracy_score(y_test.cpu(), predictions.cpu())
    return accuracy

# Example execution (assuming a query is provided)
query = "SELECT * FROM transactions"  # Modify as needed
df = load_data(query, credentials)
X, y, encoder = preprocess_data(df)

# Splitting data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize and train the model
model = TransactionClassifier(input_dim=X.shape[1], num_classes=len(encoder.classes_))
train_model(model, X_train, y_train, epochs=10)

# Evaluate the model
accuracy = evaluate_model(model, X_test, y_test)
print(f"Model Accuracy: {accuracy:.2%}")
