# Graph Anomaly Detection


### Load data

In [2]:
# Import packages
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import networkx as nx
import pickle as pkl

In [3]:
# Read files
path = "dades_arnau/"
df = pd.read_csv(path + "train_set.csv")
with open(path + "TrainSet_Grpah.pkl", 'rb') as f:    
    G = pkl.load(f)


## Graph analysis

#### Working with timesteps

We will use the subgraph of the time step 29 to make our first analysis, since it is a time step with a good amount of both licit and ilicit transactions.

In [9]:
# Create a subgraph for the chosen time step
desired_nodes = [node for node, data in G.nodes(data=True) if data.get('Time Step') == 29]
G_29 = G.subgraph(desired_nodes)

In [10]:
# Amount of licit, ilicit and unknown nodes
ilicit_nodes = [node for node, data in G_29.nodes(data=True) if data['class'] == 1]
licit_nodes = [node for node, data in G_29.nodes(data=True) if data['class'] == 2]
unknown_nodes = [node for node, data in G_29.nodes(data=True) if data['class'] == 3]
print("ilicit:",len(ilicit_nodes))
print("licit:",len(licit_nodes))
print("unknown:",len(unknown_nodes))

ilicit: 329
licit: 845
unknown: 3101


In [13]:
# Mean degree of the subgraph
degrees = [G_29.degree(node) for node, data in G_29.nodes(data=True)]
np.mean(degrees)

2.1244444444444444

In [52]:
neighbours = [[node] + [n for n in G_29.neighbors(node)] for node,data in G_29.nodes(data=True) if data['class'] != 3]

In [64]:
crossing = []

for elem in neighbours:
    check = True
    for node in elem:
        if G_29.nodes(data=True)[node]['class'] != G_29.nodes(data=True)[elem[0]]['class']:
            check = False
        
            
    if check:
        crossing.append(elem)


In [66]:
crossing

[[163815428.0, 166499589, 163815413],
 [163667978.0, 70015681, 163667986],
 [163667986.0, 163667978, 163667989],
 [163586066.0, 163585967, 163586069],
 [163586069.0, 163586066, 163586071],
 [163667989.0, 163667986, 166495212],
 [163586071.0, 163586072, 163586069],
 [163651610.0, 163651655, 163651605],
 [164118555.0, 163832331, 80352177, 163685462],
 [163586078.0, 98992940, 30022545],
 [163586089.0, 163694471],
 [163651655.0, 163651610, 163651932],
 [157319250.0, 23881442],
 [163586130.0, 135600096, 98992940],
 [21627002.0, 165850116, 165850173],
 [168116373.0, 167703048, 164757136],
 [168116391.0, 168116387, 168116419],
 [168116414.0, 168116407, 168135843],
 [166068433.0, 163655547, 163584758, 167124770],
 [164774193.0, 164097575, 166496373],
 [78348608.0, 163720373, 159939687],
 [163586393.0, 17763829, 163586398],
 [163651932.0, 163806297, 163651655],
 [163586398.0, 163586406, 163586393],
 [166093156.0, 165875286, 163670721, 164183369],
 [163586406.0, 163586417, 163586398],
 [16358641