In [2]:
# ---
# 📘 1_setup_and_configuration.ipynb
# Title: Setup & Initialization for Healthcare Fraud Detection (Google Colab)
# ---

# 🔧 INSTALL DEPENDENCIES
!pip install -q lightgbm xgboost imbalanced-learn shap optuna

# 🗂️ MOUNT GOOGLE DRIVE (if data is stored on Google Drive)
from google.colab import drive
drive.mount('/content/drive')

# 📚 IMPORT LIBRARIES
import os
import random
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import train_test_split, StratifiedKFold, GridSearchCV
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.metrics import (
    accuracy_score, precision_score, recall_score, f1_score,
    roc_auc_score, confusion_matrix, classification_report
)

from imblearn.over_sampling import SMOTE
from imblearn.pipeline import Pipeline as ImbPipeline

import lightgbm as lgb
import xgboost as xgb
import shap

import warnings
warnings.filterwarnings("ignore")

# 🔁 REPRODUCIBILITY SETTINGS
RANDOM_STATE = 42
np.random.seed(RANDOM_STATE)
random.seed(RANDOM_STATE)
os.environ['PYTHONHASHSEED'] = str(RANDOM_STATE)

# 💻 DEVICE CONFIGURATION (Torch GPU Check)
import torch
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"✅ Running on: {device}")

# ✅ ENVIRONMENT CHECK
import sklearn
import imblearn
print(f"""
📦 Installed Versions:
- scikit-learn: {sklearn.__version__}
- imbalanced-learn: {imblearn.__version__}
- lightgbm: {lgb.__version__}
- xgboost: {xgb.__version__}
""")

# 📝 EXPORT ENVIRONMENT CONFIG (optional)
!pip freeze > requirements.txt
print("📄 requirements.txt generated.")


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
✅ Running on: cpu

📦 Installed Versions:
- scikit-learn: 1.6.1
- imbalanced-learn: 0.13.0
- lightgbm: 4.5.0
- xgboost: 2.1.4

📄 requirements.txt generated.
