##GNN

In [2]:
!pip install torch-scatter -f https://data.pyg.org/whl/torch-2.0.0+cu118.html
!pip install torch-sparse -f https://data.pyg.org/whl/torch-2.0.0+cu118.html
!pip install torch-cluster -f https://data.pyg.org/whl/torch-2.0.0+cu118.html
!pip install torch-spline-conv -f https://data.pyg.org/whl/torch-2.0.0+cu118.html
!pip install torch-geometric

Looking in links: https://data.pyg.org/whl/torch-2.0.0+cu118.html
Collecting torch-scatter
  Downloading https://data.pyg.org/whl/torch-2.0.0%2Bcu118/torch_scatter-2.1.2%2Bpt20cu118-cp310-cp310-linux_x86_64.whl (10.2 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m10.2/10.2 MB[0m [31m22.0 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: torch-scatter
Successfully installed torch-scatter-2.1.2+pt20cu118
Looking in links: https://data.pyg.org/whl/torch-2.0.0+cu118.html
Collecting torch-sparse
  Downloading https://data.pyg.org/whl/torch-2.0.0%2Bcu118/torch_sparse-0.6.18%2Bpt20cu118-cp310-cp310-linux_x86_64.whl (4.9 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m4.9/4.9 MB[0m [31m36.8 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: torch-sparse
Successfully installed torch-sparse-0.6.18+pt20cu118
Looking in links: https://data.pyg.org/whl/torch-2.0.0+cu118.html
Collecting torch-cluster
  Downloading https://data.p

In [3]:
import pandas as pd
import torch
from torch_geometric.data import Data
from sklearn.preprocessing import LabelEncoder
import numpy as np



In [None]:
import torch
import torch.nn.functional as F
from torch_geometric.nn import GraphSAGE
from torch_geometric.loader import DataLoader
import pandas as pd
import numpy as np

In [None]:
df = pd.read_csv('your_file.csv')
df = df.fillna('Unknown')

df['settlement_amount'] = pd.to_numeric(df['settlement_amount'], errors='coerce').fillna(0)
average_settlement_amount = df['settlement_amount'].mean()

df['filing_date'] = pd.to_datetime(df['filing_date'], errors='coerce', format='%d-%m-%Y')
df['decision_date'] = pd.to_datetime(df['decision_date'], errors='coerce', format='%d-%m-%Y')
df['settlement_duration'] = (df['decision_date'] - df['filing_date']).dt.days
average_settlement_duration = df['settlement_duration'].mean()

claimant_frequency = df['Petitioner'].value_counts()

lawyer_frequency = df.groupby('Advocate_Petitioner')['settlement_amount'].mean()
geographical_hotspot = df['location'].value_counts()

In [None]:
df['frequent_claimant'] = df['Petitioner'].apply(lambda x: 1 if claimant_frequency[x] > 5 else 0)

df['frequent_lawyer'] = df['Advocate_Petitioner'].apply(lambda x: 1 if lawyer_frequency.get(x, 0) > average_settlement_amount else 0)

top_locations = geographical_hotspot.head(10).index
df['geographical_hotspot'] = df['location'].apply(lambda x: 1 if x in top_locations else 0)

df['high_settlement'] = df['settlement_amount'].apply(lambda x: 1 if x > average_settlement_amount else 0)

df['quick_duration'] = df['settlement_duration'].apply(lambda x: 1 if x < average_settlement_duration else 0)

In [None]:
import torch
from torch_geometric.data import Data

node_features = df[['frequent_claimant', 'frequent_lawyer', 'geographical_hotspot', 'high_settlement', 'quick_duration']].values
node_features = torch.tensor(node_features, dtype=torch.float)

edges = []
for i, row in df.iterrows():
    edges.append([row['Petitioner'], row['Advocate_Petitioner']])
    edges.append([row['Petitioner'], row['Advocate_Respondent']])
    edges.append([row['location'], row['Petitioner']])

edge_index = torch.tensor(edges, dtype=torch.long).t().contiguous()

df['fraud_label'] = df.apply(lambda row: 1 if (row['frequent_claimant'] or row['frequent_lawyer'] or
                                                row['high_settlement'] or row['quick_duration']) else 0, axis=1)
labels = torch.tensor(df['fraud_label'].values, dtype=torch.long)

data = Data(x=node_features, edge_index=edge_index, y=labels)

In [None]:
import torch
import torch.nn.functional as F
from torch_geometric.nn import GraphSAGE

class FraudGraphSAGE(torch.nn.Module):
    def __init__(self, in_channels, hidden_channels, out_channels):
        super(FraudGraphSAGE, self).__init__()
        self.sage1 = GraphSAGE(in_channels, hidden_channels)
        self.sage2 = GraphSAGE(hidden_channels, out_channels)
        self.classifier = torch.nn.Linear(out_channels, 2)

    def forward(self, data):
        x, edge_index = data.x, data.edge_index
        x = self.sage1(x, edge_index)
        x = F.relu(x)
        x = self.sage2(x, edge_index)
        out = self.classifier(x)
        return F.log_softmax(out, dim=1)

model = FraudGraphSAGE(in_channels=node_features.shape[1], hidden_channels=16, out_channels=16)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
criterion = torch.nn.CrossEntropyLoss()

def train(data):
    model.train()
    optimizer.zero_grad()
    out = model(data)
    loss = criterion(out, data.y)
    loss.backward()
    optimizer.step()
    return loss.item()

def test(data):
    model.eval()
    with torch.no_grad():
        out = model(data)
        pred = out.argmax(dim=1)
        correct = (pred == data.y).sum()
        accuracy = int(correct) / int(data.y.size(0))
        return accuracy

for epoch in range(200):
    loss = train(data)
    if epoch % 10 == 0:
        acc = test(data)
        print(f'Epoch {epoch}, Loss: {loss:.4f}, Accuracy: {acc:.4f}')

In [None]:
import networkx as nx
from torch_geometric.utils import to_networkx
import matplotlib.pyplot as plt

G = to_networkx(data, to_undirected=True)

plt.figure(figsize=(10, 10))
nx.draw(G, with_labels=True, node_size=700, node_color=df['Contested_Label'].tolist(), cmap=plt.get_cmap('coolwarm'))
plt.show()

##XAI

In [None]:
!pip install shap

In [None]:
import shap

model.eval()
out = model(data)

explainer = shap.DeepExplainer(model, data.x)
shap_values = explainer.shap_values(data.x)

shap.summary_plot(shap_values, data.x.numpy())