In [None]:
pip install node2vec


In [None]:
from kafka3 import KafkaConsumer
from json import loads
import pandas as pd
import networkx as nx
from IPython.display import clear_output
import time
from node2vec import Node2Vec

hostip = "192.168.1.111"

class TransactionGraphUpdater:
    def __init__(self):
        self.transaction_graph = nx.Graph()
        self.edge_amounts = {}
        self.last_update_time = time.time()
        self.node2vec_model = None

    def _update_plot(self):
        current_time = time.time()
        if current_time - self.last_update_time >= 10:
            clear_output(wait=True)
            print("Node count:", len(self.transaction_graph.nodes))
            print("Edge count:", len(self.transaction_graph.edges))
            self.last_update_time = current_time

    def update_transaction_graph(self, df):
        for index, row in df.iterrows():
            from_account = row.get('Account')
            to_account = row.get('To_Account')
            amount = row.get('Amount_Received')
            if from_account is not None and to_account is not None:
                edge = (from_account, to_account)
                if self.transaction_graph.has_edge(*edge):
                    self.edge_amounts[edge] += amount
                else:
                    self.transaction_graph.add_edge(*edge)
                    self.edge_amounts[edge] = amount

        print("Node count:", len(self.transaction_graph.nodes))
        print("Edge count:", len(self.transaction_graph.edges))
        self._update_plot()

        self.generate_node_embeddings()

    def generate_node_embeddings(self):
        nx_graph = nx.Graph(self.transaction_graph)

        node2vec = Node2Vec(nx_graph, dimensions=64, walk_length=30, num_walks=200, workers=4)

        self.node2vec_model = node2vec.fit(window=10, min_count=1, batch_words=4)

        node_embeddings = {node: self.node2vec_model.wv[node] for node in nx_graph.nodes}

        print("Node embeddings:", node_embeddings)

def connect_kafka_consumer():
    _consumer = None
    try:
        _consumer = KafkaConsumer(
            bootstrap_servers=[f'{hostip}:9092'],
            auto_offset_reset='earliest',
            enable_auto_commit=True,
            group_id='my-group',
            value_deserializer=lambda x: loads(x),
            api_version=(0, 10)
        )
    except Exception as ex:
        print('Exception while connecting Kafka consumer.')
        print(str(ex))
    finally:
        return _consumer

def create_features(df):
    if 'Transaction_Direction' not in df.columns:
        df['Transaction_Direction'] = df.apply(lambda row: 'Incoming' if row.get('Received_Amount', 0) > 0 else 'Outgoing', axis=1)

    if 'Currency_Conversion' not in df.columns:
        df['Currency_Conversion'] = df.get('Receiving_Currency') != df.get('Payment_Currency')

    if 'Amount_Ratio' not in df.columns:
        df['Amount_Ratio'] = df.apply(lambda row: float(row.get('Received_Amount', 0)) / float(row.get('Amount_Paid', 0)) if row.get('Received_Amount', 0) and row.get('Amount_Paid', 0) and float(row['Amount_Paid']) != 0 else 0, axis=1)

    return df

def preprocess_and_plot(data, graph_updater):
    if not isinstance(data, list) or not all(isinstance(record, dict) for record in data):
        print("Invalid data format. Expecting a list of dictionaries.")
        return

    df = pd.DataFrame(data)
    df_updated = create_features(df)

    graph_updater.update_transaction_graph(df_updated)

if __name__ == '__main__':
    topic = 'Transaction'
    print('Consuming records..')
    consumer = connect_kafka_consumer()

    if consumer is None:
        print('Exiting script due to connection error.')
        exit()

    updater = TransactionGraphUpdater()

    try:
        consumer.subscribe([topic])
        for message in consumer:
            if message is not None:
                records = message.value
                preprocess_and_plot(records, updater)

    except Exception as ex:
        print('Exception in consuming messages.')
        print(str(ex))
