In [None]:
!pip install torch
!pip install torchvision
!pip install torchaudio
!pip install torch-geometric
!pip install pymongo
!pip install --upgrade pymongo

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch_geometric.nn import GraphConv
from torch_geometric.data import Data
from pymongo import MongoClient
import random
import json
import csv
import pandas as pd
from bson import ObjectId
from torch.utils.data import Dataset, DataLoader
import pandas as pd
import torch.nn as nn
import torch.optim as optim
import numpy as np

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

import certifi
from pymongo import MongoClient

client = MongoClient(
    "mongodb+srv://gauravtemp123:GrnDPnMKIEsnBqGk@cluster0.eu8dw.mongodb.net/?retryWrites=true&w=majority",
    tlsCAFile=certifi.where(),
    tls=True,
    tlsAllowInvalidCertificates=False,
    tlsAllowInvalidHostnames=False,
)
db = client["topology"]
devices_collection = db["devices"]

def json_to_csv(json_file, csv_file):
    """
    Convert a JSON file to a CSV file.

    Args:
        json_file (str): Path to the JSON file.
        csv_file (str): Path to the CSV file.
    """
    with open(json_file, 'r') as json_data:
        data = json.load(json_data)

    header = list(data[0].keys())

    with open(csv_file, 'w', newline='') as csv_data:
        writer = csv.DictWriter(csv_data, fieldnames=header)
        writer.writeheader()
        writer.writerows(data)

def fetch_device_data():
    """
    Fetch device data from MongoDB.

    Returns:
        A list of device data.
    """
    device_data_cursor = devices_collection.find()
    device_data = []
    for device in device_data_cursor:
        device_data.append(device)
    return device_data

def convert_json_to_csv():
    """
    Convert JSON data to CSV.
    """
    device_data = fetch_device_data()
    json_file = 'device_data.json'
    csv_file = 'device_data.csv'

    modified_device_data = [{k: str(v) if isinstance(v, ObjectId) else v for k, v in d.items()} for d in device_data]

    with open(json_file, 'w') as file:
        json.dump(modified_device_data, file)

    json_to_csv(json_file, csv_file)

def process_device_data_for_gnn(device):
    """
    Process device data for GNN.

    Args:
        device (dict): Device data.

    Returns:
        A Data object containing node features and edge connections.
    """
    ipAddress = device['ipAddress']
    sysName = device.get('sysName', '')
    routerID = device.get('routerID', '')
    asNumber = device.get('asNumber', 0)

    nodes = [ipAddress]  # For simplicity, Lets just start with the device itself
    edges = []

    for neighbor in device.get('eigrpNeighbors', []):
        edges.append([ipAddress, neighbor['neighborIP']])  # Create edges between devices based on eigrpNeighbors

        all_ips = set(sum(edges, []))  # Extract all unique IP addresses
        ip_to_index = {ip: i for i, ip in enumerate(all_ips)}

       # Convert edges to numerical indices using the mapping
        numerical_edges = [[ip_to_index[ip1], ip_to_index[ip2]] for ip1, ip2 in edges]

       # Now create the tensor from the numerical edges
        edge_index = torch.tensor(numerical_edges, dtype=torch.long).t().contiguous()

       # Create node features (we can add more complex features here from routingTable or other data)
        features = torch.tensor([asNumber], dtype=torch.float).view(-1, 1)  # Simplified feature: AS number

       # The Data class stores node features (x) and edge connections (edge_index)
        gnn_data = Data(x=torch.zeros((len(ip_to_index), 1)), edge_index=edge_index)  # Specify the correct number of nodes

        return gnn_data

# GNN Model for Anomaly Detection
class AnomalyDetectionGNN(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim):
        super(AnomalyDetectionGNN, self).__init__()
        self.conv1 = GraphConv(input_dim, hidden_dim)
        self.conv2 = GraphConv(hidden_dim, hidden_dim)
        self.fc = nn.Linear(hidden_dim, output_dim)

    def forward(self, x, edge_index):
        x = F.relu(self.conv1(x, edge_index))
        x = F.dropout(x, p=0.2, training=self.training)
        x = F.relu(self.conv2(x, edge_index))
        x = self.fc(x)
        return F.log_softmax(x, dim=1)

class DeviceDataset(Dataset):
    def __init__(self, csv_file):
        self.data = pd.read_csv(csv_file)

        # Use a different column that exists in the DataFrame
        self.device_data = self.data.iloc[:, 0]  # Use the first column
        self.labels = self.data.iloc[:, -1]  # Use the last column

    def __len__(self):
        return len(self.device_data)

    def __getitem__(self, idx):
        device_data = self.device_data.iloc[idx]
        label = self.labels.iloc[idx]
        return device_data, label

from torch.utils.data import DataLoader

import numpy as np

def preprocess_data(device_data):
    if isinstance(device_data, tuple):
        # Ignore non-numeric strings
        device_data = torch.tensor(np.array([float(x) for x in device_data if x.replace('.', '', 1).replace('-', '', 1).isdigit()]))
    else:
        device_data = torch.tensor([float(device_data)])
    return device_data

def train_on_stream(csv_file, num_epochs=10):
    # Create a data loader with a batch size of 1
    data_loader = DataLoader(dataset=DeviceDataset(csv_file), batch_size=1, shuffle=True)

    # Create the model, optimizer, and loss function
    model = AnomalyDetectionGNN(input_dim=1, hidden_dim=16, output_dim=2).to(device)
    optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
    criterion = nn.CrossEntropyLoss()

    # Train the model
    for epoch in range(num_epochs):
        for batch in data_loader:
            # Assuming batch is a list of [device_data, label]
            device_data, label = batch

            # Check if device_data is not empty
            if len(device_data) == 0:
                continue

            # Preprocess the data
            device_data = preprocess_data(device_data)

            # Check if device_data is not empty after preprocessing
            if len(device_data) == 0:
                continue

            # Convert device_data and label to tensors
            device_data = torch.tensor(device_data).unsqueeze(0).to(device, dtype=torch.float32)
            if isinstance(label, str):
                # Convert the label to a numerical value
                label_map = {'label1': 0, 'label2': 1, 'label3': 2}  # Replace with your actual labels
                label = torch.tensor(label_map.get(label, 0)).unsqueeze(0).to(device)
            elif isinstance(label, tuple):
                # Convert the tuple to a single value
                if isinstance(label[0], str):
                    label_map = {'label1': 0, 'label2': 1, 'label3': 2}  # Replace with your actual labels
                    label = torch.tensor(label_map.get(label[0], 0)).unsqueeze(0).to(device)
                else:
                    try:
                        label = torch.tensor(int(label[0])).unsqueeze(0).to(device)
                    except ValueError:
                        label = torch.tensor(0).unsqueeze(0).to(device)
            else:
                try:
                    label = torch.tensor(int(label)).unsqueeze(0).to(device)
                except ValueError:
                    label = torch.tensor(0).unsqueeze(0).to(device)

            # Create a dummy edge_index
            edge_index = torch.tensor([[0], [0]], dtype=torch.long).to(device)

            # Zero the gradients
            optimizer.zero_grad()

            # Forward pass
            output = model(device_data, edge_index)

            # Calculate the loss
            loss = criterion(output, label)

            # Backward pass
            loss.backward()

            # Update the model parameters
            optimizer.step()

    # Save the trained model
    torch.save(model.state_dict(), 'trained_model.pth')

    return model, 'trained_model.pth'

def infer_on_stream(model):
    # Create a data loader with a batch size of 1
    data_loader = DataLoader(dataset=DeviceDataset('device_data.csv'), batch_size=1, shuffle=False)

    # Infer on the stream
    for batch in data_loader:
        # Assuming batch is a list of [device_data, label]
        device_data, label = batch

        # Preprocess the data
        device_data = preprocess_data(device_data)

        # Check if device_data is not empty
        if len(device_data) == 0:
            continue

        # Convert device_data to a tensor
        gnn_data = torch.tensor(device_data).unsqueeze(0).to(device, dtype=torch.float32)

        # Create a dummy edge_index
        edge_index = torch.tensor([[0], [0]], dtype=torch.long).to(device)

        # Create a PyTorch Geometric data object
        gnn_data = Data(x=gnn_data, edge_index=edge_index)

        # Move the data to the device
        gnn_data = gnn_data.to(device)

        # Make predictions
        predictions = model(gnn_data.x, gnn_data.edge_index)

        # Get the anomaly status
        anomaly_status = torch.argmax(predictions, dim=1)

        # Print the anomaly status
        print(anomaly_status)

def watch_device_data():
    pipeline = [
        {'$match': {'operationType': {'$in': ['insert', 'update', 'replace']}}}
    ]
    change_stream = devices_collection.watch(pipeline)

    for change in change_stream:
        print("Change detected:", change)
        device_data = change['fullDocument']
        gnn_data = process_device_data_for_gnn(device_data).to(device)

        with torch.no_grad():
            predictions = model(gnn_data.x, gnn_data.edge_index)
            anomaly_status = predictions.argmax(dim=1).item()

            devices_collection.update_one(
                {'ipAddress': device_data['ipAddress']},
                {'$set': {'anomaly_detected': bool(anomaly_status)}}
            )

            if anomaly_status == 1:
                print(f"Real-time anomaly detected for device: {device_data['ipAddress']}")

if __name__ == "__main__":
    print("Converting JSON data to CSV...")
    convert_json_to_csv()

    print("Starting training...")
    trained_model, best_model_path = train_on_stream('device_data.csv')

    # Load the best model for inference
    trained_model.load_state_dict(torch.load(best_model_path))
    trained_model.to(device)

    print("Starting inference...")
    infer_on_stream(trained_model)

    print("Watching for real-time changes...")
    watch_device_data()


Collecting torch-geometric
  Downloading torch_geometric-2.6.1-py3-none-any.whl.metadata (63 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m63.1/63.1 kB[0m [31m998.8 kB/s[0m eta [36m0:00:00[0m
Downloading torch_geometric-2.6.1-py3-none-any.whl (1.1 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.1/1.1 MB[0m [31m10.2 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: torch-geometric
Successfully installed torch-geometric-2.6.1
Collecting pymongo
  Downloading pymongo-4.10.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (22 kB)
Collecting dnspython<3.0.0,>=1.16.0 (from pymongo)
  Downloading dnspython-2.7.0-py3-none-any.whl.metadata (5.8 kB)
Downloading pymongo-4.10.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.4 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.4/1.4 MB[0m [31m10.5 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading dnspython-2.7.0-py3-none-any.whl (31

  trained_model.load_state_dict(torch.load(best_model_path))


Starting inference...
Watching for real-time changes...
