In [4]:
import re
import time
import torch
import pickle
import joblib
import asyncio
import websockets
import win32evtlog

import numpy as np
import pandas as pd
import torch.nn as nn
import torch.optim as optim
import xml.etree.ElementTree as ET

from river.drift import ADWIN
from sklearn.metrics import accuracy_score
from sklearn.linear_model import SGDClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

**GAN section**

In [5]:
class Generator(nn.Module):
    def __init__(self, noise_dim, out_dim):
        super().__init__()
        self.net = nn.Sequential(
            nn.Linear(noise_dim, 32),
            nn.ReLU(),
            nn.Linear(32, 64),
            nn.ReLU(),
            nn.Linear(64, 256),
            nn.ReLU(),
            nn.Linear(256, 128),
            nn.ReLU(),
            nn.Linear(128, 32),
            nn.ReLU(),
            nn.Linear(32, out_dim),
        )
    def forward(self, z):
        return self.net(z)

In [6]:
class Classifier(nn.Module):
    def __init__(self, in_dim):
        super().__init__()
        self.net = nn.Sequential(
            nn.Linear(in_dim, 32),
            nn.ReLU(),
            nn.Linear(32, 64),
            nn.ReLU(),
            nn.Linear(64, 256),
            nn.ReLU(),
            nn.Linear(256, 128),
            nn.ReLU(),
            nn.Linear(128, 32),
            nn.ReLU(),
            nn.Linear(32, 1),
            nn.Sigmoid()
        )
    def forward(self, x):
        return self.net(x)

In [7]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

df = pd.read_csv("base files/creditcard_train.csv")

X = df.drop(columns=["Class"]).values
y = df["Class"].values
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

X_normal = X_scaled[y == 0]
X_normal = torch.tensor(X_normal, dtype=torch.float32)
y_normal = torch.zeros(len(X_normal))
normal_data = X_normal

X_fraud = X_scaled[y == 1]
X_fraud = torch.tensor(X_fraud, dtype=torch.float32)
y_fraud = torch.ones(len(X_fraud))
fraud_data = X_fraud

X_real = torch.cat([X_normal, X_fraud])
y_real = torch.cat([y_normal, y_fraud])

In [8]:
noise_dim = 16
n_features = X_real.shape[1]
G = Generator(noise_dim, n_features).to(device)
D = Classifier(n_features).to(device)

lr = 1e-5
loss_fn = nn.BCELoss()
opt_G = optim.Adam(G.parameters(), lr=lr)
opt_G_2 = optim.Adam(G.parameters(), lr=lr)
opt_D = optim.Adam(D.parameters(), lr=lr)

In [9]:
for epoch in range(10000):
    real = fraud_data.to(device)
    z = torch.randn(len(real), noise_dim).to(device)
    fake = G(z)

    D_real = D(real)
    D_fake = D(fake.detach())
    loss_D = loss_fn(D_real, torch.ones_like(D_real)) + loss_fn(D_fake, torch.zeros_like(D_fake))
    opt_D.zero_grad()
    loss_D.backward()
    opt_D.step()

    D_fake = D(fake)
    loss_G = loss_fn(D_fake, torch.ones_like(D_fake))
    opt_G.zero_grad()
    loss_G.backward()
    opt_G.step()

    print(f"\r{epoch+1}: loss_D {loss_D.item():.3f}, loss_G {loss_G.item():.3f}", end=20*" ")

10000: loss_D 0.138, loss_G 3.107                    

In [10]:
z_synth = torch.randn(len(fraud_data), noise_dim).to(device)
synthetic_frauds = G(z_synth).detach().cpu()

synthetic_frauds_np = synthetic_frauds.numpy()

print("Generated Synthetic Fraud Data:")
print(synthetic_frauds_np)


Generated Synthetic Fraud Data:
[[ 0.5172903   0.6042827   0.35832328 ...  0.76952034  0.43773496
  -0.48459098]
 [ 0.49007067  0.5452349   0.31360126 ...  0.7076121   0.39527908
  -0.47065982]
 [ 1.1941773   1.3294133   0.8513359  ...  1.6827614   0.8992215
  -1.177026  ]
 ...
 [ 0.34244776  0.4226125   0.25303853 ...  0.51284426  0.37447464
  -0.34407666]
 [ 0.81841546  0.9063668   0.5811417  ...  1.1524384   0.61096716
  -0.7533277 ]
 [ 1.7485393   1.9323717   1.2353566  ...  2.4651153   1.2507966
  -1.6772698 ]]


In [11]:
z_synth = torch.randn(len(fraud_data), noise_dim).to(device)
synthetic_frauds = G(z_synth).detach().cpu()
synthetic_labels = torch.ones(len(synthetic_frauds))

X_aug = torch.cat([X_real, synthetic_frauds])
y_aug = torch.cat([y_real, synthetic_labels])

C = Classifier(n_features).to(device)
opt_C = optim.Adam(C.parameters(), lr=1e-3)

In [12]:
for epoch in range(100):
    idx = torch.randperm(len(X_aug))
    x_batch = X_aug[idx].to(device)
    y_batch = y_aug[idx].unsqueeze(1).to(device)

    y_pred = C(x_batch)
    loss = loss_fn(y_pred, y_batch)
    opt_C.zero_grad()
    loss.backward()
    opt_C.step()

    print(f"\r{epoch+1}: loss {loss.item():.3f}", end=20*" ")

100: loss 0.009                    

In [13]:
for epoch in range(2000):
    z = torch.randn(len(fraud_data), noise_dim).to(device)
    fake_frauds = G(z)

    labels = torch.ones(len(fake_frauds), 1).to(device)
    preds = C(fake_frauds)
    loss_C = loss_fn(preds, labels)
    opt_C.zero_grad()
    loss_C.backward(retain_graph=True)
    opt_C.step()

    preds = C(fake_frauds)
    loss_G = loss_fn(preds, torch.zeros_like(preds))
    opt_G.zero_grad()
    loss_G.backward()
    opt_G.step()

    print(f"\r{epoch+1}: loss_C {loss_C.item():.3f}, loss_G {loss_G.item():.3f}", end=20*" ")

1512: loss_C 0.000, loss_G 19.255                    

KeyboardInterrupt: 

In [None]:
torch.save(G.state_dict(), 'fraud_detection_G.pth')
torch.save(D.state_dict(), 'fraud_detection_D.pth')

**online-Learning section**

In [None]:
df = pd.read_csv('base files/creditcard_train.csv')
df = df.sample(frac=1).reset_index(drop=True)

X_all = df.drop(columns=['Class'])
y_all = df['Class'].values

scaler = StandardScaler()
X_all = scaler.fit_transform(X_all)

model = SGDClassifier(loss='log_loss', max_iter=1, warm_start=True)
model.partial_fit(X_all[:1000], y_all[:1000], classes=np.array([0, 1]))

adwin = ADWIN()
uncertainty_threshold = 0.3
window_size = 500
threshold_drop = 0.05

recent_accuracies = []

In [None]:
for i in range(1000, len(X_all)):
    x = X_all[i].reshape(1, -1)
    y_true = y_all[i]

    y_proba = model.predict_proba(x)[0]
    y_pred = np.argmax(y_proba)
    confidence = abs(y_proba[1] - y_proba[0])
    
    if len(recent_accuracies) >= window_size:
        recent_accuracies.pop(0)
    recent_accuracies.append(int(y_pred == y_true))

    if len(recent_accuracies) == window_size:
        avg_accuracy = np.mean(recent_accuracies)
        if avg_accuracy < (1 - threshold_drop):
            model.partial_fit(X_all[i-window_size:i], y_all[i-window_size:i])
            recent_accuracies = []

    if confidence > uncertainty_threshold:
        model.partial_fit(x, [y_pred])

In [None]:
joblib.dump(model, 'fraud_detection_model.pkl')

**in Kernel Processing section**

In [None]:
G_loaded = Generator(noise_dim, n_features)
D_loaded = Classifier(n_features)

G_loaded.load_state_dict(torch.load('fraud_detection_G.pth'))
D_loaded.load_state_dict(torch.load('fraud_detection_D.pth'))

G_loaded.to(device)
D_loaded.to(device)

online_model = joblib.load('fraud_detection_model.pkl')
scaler = StandardScaler()

name = "lr"
with open(f"model_{name}.pkl", "rb") as f:
    basic_model = pickle.load(f)

In [None]:
model = basic_model
def prep_val(model, f):
    model.predict([f])[0]
    return f

In [None]:
model = D_loaded
def prep_val(model, f):
    return model(torch.tensor(f))

In [None]:
import psycopg2

server = "localhost"
database = "db_02"
username = "postgres"
password = "ABcd@12#$"
source = "creditcard_train"

try:
  conn = psycopg2.connect(dbname=database, user=username, password=password, host=server, port="5432", sslmode='disable')
except psycopg2.Error as e:
  print("Error connecting to PostgreSQL database:", e)
else:
  print("Connection established successfully!")

cursor = conn.cursor()

query =f"SELECT * FROM {source}"
cursor.execute(query)
rows = cursor.fetchall()

In [None]:
def extract_features(row):
    # data={}

    # data['ID'] = row[0]
    # data['Time']= row[1]
    # for i in range (2,30):
    #     data[f"V{i-1}"]= float(row[i])
    # data['Amount']= row[30]

    data = [float(x) for x in row[1:]]
    data = [float(x) for x in row[1:-1]] # because creditcard_train.csv

    # print(f"\r{row}", end="")
    # return data
    return data

In [None]:
def handle_event(row):
    features = extract_features(row)
    prediction = model.predict([features])[0]

    # if prediction == 1:
    #     print(f"[ALERT] Fraud detected for event: {row[0]}")
    # else:
    #     print(f"[OK] Normal event: {row}")

In [None]:
def monitor_sysmon_logs(upto=max(10000, len(rows))):
    server = 'localhost'
    log_type = 'Microsoft-Windows-Sysmon/Operational'
    hand = win32evtlog.OpenEventLog(server, log_type)
    
    count = 0
    s = time.time()
    while True:
        count += 1
        events = win32evtlog.ReadEventLog(
            hand,
            win32evtlog.EVENTLOG_BACKWARDS_READ | win32evtlog.EVENTLOG_SEQUENTIAL_READ,
            0
        )
        if not events:
            break

        for ev_obj in events:
            if ev_obj.EventID in [1, 3, 11]:
                if count >= len(rows):
                    break
                handle_event(rows[count])
                count += 1
                # time.sleep(0.1)
        
    return time.time() - s

In [None]:
k_time = monitor_sysmon_logs()

In [None]:
s = time.time()
for row in rows:
    f = extract_features(row)
    p = prep_val(model, f)
u_time = time.time() - s

In [None]:
k_time, u_time, round((u_time - k_time) * 100 / u_time, 3)

(3.2229907512664795, 17.734492540359497, 81.826)