<a href="https://colab.research.google.com/github/BayramovaNazrin/anomaly_detection/blob/main/notebook/full_project_nb1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# --- 1. Setup ---
# --- Mount Google Drive  ---
from google.colab import drive
drive.mount('/content/drive')

# --- Clone the repository ---
!git clone https://github.com/BayramovaNazrin/anomaly_detection.git
%cd anomaly_detection

# --- Install dependencies ---
!pip install -r requirements.txt

# --- 2. Imports  ---
import sys
sys.path.append('/content/anomaly_detection')

from src.utils.helpers import setup_directories, set_seed
from src.data_loader import load_and_explore_data
from src.models.classical import train_random_forest, train_svm
from src.models.graph_models import train_node2vec_rf, train_graphsage
from src.visualization.eda import run_eda
from src.visualization.evaluation import plot_model_comparison, plot_bar_comparison

# --- 3. Initialize project  ---
setup_directories()
set_seed(42)

# --- 4. Load dataset  ---
features_path = "/content/drive/MyDrive/anomaly/elliptic_txs_features.csv"
edges_path = "/content/drive/MyDrive/anomaly/elliptic_txs_edgelist.csv"
classes_path = "/content/drive/MyDrive/anomaly/elliptic_txs_classes.csv"

features, edges, classes, merged_df = load_and_explore_data(
    features_path=features_path,
    edges_path=edges_path,
    classes_path=classes_path
)

# --- 5. Run EDA  ---
run_eda(merged_df)

# --- 6. Train models ---
# Classical models
train_random_forest(merged_df)
train_svm(merged_df)

# Graph-based models
train_node2vec_rf(features_path, edges_path, classes_path)
train_graphsage(features_path, edges_path, classes_path)

# --- 7. Compare results visually ---
import pandas as pd
from src.visualization.evaluation import plot_model_comparison, plot_bar_comparison

results = [
    {"Model": "RandomForest", "Accuracy": 0.988, "Precision (Illicit)": 0.95, "Recall (Illicit)": 0.92, "F1 (Illicit)": 0.94},
    {"Model": "SVM", "Accuracy": 0.983, "Precision (Illicit)": 0.93, "Recall (Illicit)": 0.89, "F1 (Illicit)": 0.91},
    {"Model": "Node2Vec+RF", "Accuracy": 0.979, "Precision (Illicit)": 0.91, "Recall (Illicit)": 0.88, "F1 (Illicit)": 0.89},
    {"Model": "GraphSAGE", "Accuracy": 0.981, "Precision (Illicit)": 0.92, "Recall (Illicit)": 0.90, "F1 (Illicit)": 0.91}
]

df_results = pd.DataFrame(results)
plot_model_comparison(df_results)
plot_bar_comparison(df_results)

# --- 8. Feature importance ---
from src.models.feature_importance import (
    permutation_importance, plot_feature_importances,
    create_top_feature_graph, retrain_with_top_features
)

importances = permutation_importance(model, graph_data, test, metric='f1_score', device='cpu')
imp_df = plot_feature_importances(importances, features.columns)