<a href="https://colab.research.google.com/github/Pranshu244/Government_Spending_Fraud_Detection/blob/main/Website_Code/Government_Fraud_Detection_App.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install streamlit

In [None]:
!pip install torch-geometric

In [None]:
%%writefile government_fraud_detection_app.py
# Imports
import streamlit as st
import pickle
import torch
import pandas as pd
import numpy as np
import torch.nn.functional as F
import torch.nn as nn
from torch_geometric.nn import GCNConv



st.markdown("<h1 style='text-align: center; font-weight: bold; font-style: italic;'>Government Spending Fraud Detection</h1>",unsafe_allow_html=True)
st.markdown("<p style='text-align: center; font-style: italic;'>AI-assisted risk screening for public transactions</p>",unsafe_allow_html=True)


uploaded_stage1 = st.file_uploader( "Upload stage1.pkl (Encoder + Scaler + Isolation Forest)", type="pkl")
uploaded_config = st.file_uploader("Upload config.pkl", type="pkl")
uploaded_gnn = st.file_uploader("Upload gnn_ae.pt", type="pt")
uploaded_edge = st.file_uploader("Upload edge_index.pt", type="pt")
if uploaded_stage1 and uploaded_config and uploaded_gnn and uploaded_edge:
    encoder, scaler, iso = pickle.load(uploaded_stage1)
    config = pickle.load(uploaded_config)
    class GNN_AE(nn.Module):
        def __init__(self, in_channels):
            super().__init__()
            self.conv1 = GCNConv(in_channels, 32)
            self.conv2 = GCNConv(32, in_channels)
        def forward(self, x, edge_index):
            x = F.relu(self.conv1(x, edge_index))
            x = self.conv2(x, edge_index)
            return x
    gnn_model = GNN_AE(len(config["feature_cols"]))
    gnn_model.load_state_dict(torch.load(uploaded_gnn, map_location="cpu"))
    gnn_model.eval()
    edge_index = torch.load(uploaded_edge, map_location="cpu")

    st.session_state.update({"encoder": encoder, "scaler": scaler, "iso": iso, "gnn": gnn_model, "edge_index": edge_index, "config": config})
    st.success("All models loaded successfully!")
else:
    st.info("Please upload all model files")


uploaded_data = st.file_uploader("Upload transaction CSV file", type="csv")
if uploaded_data is not None:
    df = pd.read_csv(uploaded_data)
    st.success("Transaction data loaded!")
else:
    df = None


if st.button("Run Fraud Detection"):
    encoder = st.session_state.get("encoder")
    scaler = st.session_state.get("scaler")
    iso = st.session_state.get("iso")
    gnn = st.session_state.get("gnn")
    edge_index = st.session_state.get("edge_index")
    config = st.session_state.get("config")

    if all(v is not None for v in [encoder, scaler, iso, gnn, edge_index, config, df]):
        cat_cols = ["sector_type", "purpose", "payment_method"]
        encoded = encoder.transform(df[cat_cols])
        encoded_df = pd.DataFrame(encoded,columns=encoder.get_feature_names_out(cat_cols), index=df.index)
        df_f = pd.concat([df.drop(cat_cols, axis=1), encoded_df], axis=1)
        drop_cols = ["transaction_id","transaction_date", "is_fraud", "fraud_type","vendor_id","bank_account_id"]
        df_f = df_f.drop(columns=[c for c in drop_cols if c in df_f.columns])
        X_scaled = scaler.transform(df_f.values)
        df["anomaly_score"] = -iso.decision_function(X_scaled)

        X_gnn = df_f[config["feature_cols"]]
        x_tensor = torch.tensor(X_gnn.values, dtype=torch.float)
        with torch.no_grad():
          x_hat = gnn(x_tensor, edge_index)
          df["gnn_score"] = ((x_tensor - x_hat) ** 2).mean(dim=1).numpy()


        if_thr = df["anomaly_score"].quantile(config["if_percentile"])
        gnn_thr = df["gnn_score"].quantile(config["gnn_percentile"])
        df["stage2_flag"] = ((df["anomaly_score"] > if_thr) |(df["gnn_score"] > gnn_thr)).astype(int)


        amt_thr = df["amount"].quantile(config["amount_quantile"])
        df["r_high_amount"] = (df["amount"] > amt_thr).astype(int)

        vendor_cnt = df["vendor_id"].value_counts()
        df["r_repeat_vendor"] = df["vendor_id"].isin(vendor_cnt[vendor_cnt > config["vendor_repeat"]].index).astype(int)

        acct_cnt = df.groupby("bank_account_id")["vendor_id"].nunique()
        df["r_shared_account"] = df["bank_account_id"].isin(acct_cnt[acct_cnt > config["shared_account"]].index).astype(int)

        df["risk_score"] = df[["r_high_amount", "r_repeat_vendor", "r_shared_account"]].sum(axis=1)

        df["final_fraud"] = ((df["stage2_flag"] == 1) & (df["risk_score"] >= 2)).astype(int)


        # Output
        st.subheader("ðŸš¨ High-Risk Transactions")
        st.dataframe(df[df["final_fraud"] == 1])
        st.metric("Total Transactions", len(df))
        st.metric("Flagged High-Risk Cases", int(df["final_fraud"].sum()))
    else:
        st.error("Please upload models and transaction data")


# Styling
st.markdown(
    """
    <style>
    .stApp {
        background-color: #F1F8E9;
    }
    </style>
    """,
    unsafe_allow_html=True
)


In [None]:
!pip install pyngrok

In [None]:
from pyngrok import ngrok
ngrok.set_auth_token("YOUR_NGROK_AUTH_TOKEN ")
public_url = ngrok.connect(8501)
print(public_url)

In [None]:
!streamlit run government_fraud_detection_app.py