In [None]:
import streamlit as st
import pandas as pd
import numpy as np
import xgboost as xgb
import pickle
from sklearn.metrics import classification_report, confusion_matrix, roc_auc_score, roc_curve, precision_recall_curve, auc
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from torch.utils.data import DataLoader, TensorDataset
import torch
import torch.nn as nn
import torch.optim as optim

# Check if GPU is available
device = torch.device("mps" if torch.backends.mps.is_available() else "cpu")
print(f"Using device: {device}")

# Paths to the pre-trained models
model_paths = {
    "Normal XGBoost": "/Users/aryankargwal/adverscredit/models/weights/lstm.pth",
    "XGBoost with FGSA Training": "/Users/aryankargwal/adverscredit/models/weights/lstm_fgsa.pth",
    "XGBoost with PGD Training": "/Users/aryankargwal/adverscredit/models/weights/lst_pgd.pth"
}

# Function to load model from pkl file
def load_model(model_path, input_dim, hidden_dim, output_dim, num_layers=1):
    model = LSTMWithAttention(input_dim, hidden_dim, output_dim, num_layers)
    model.load_state_dict(torch.load(model_path))
    model.eval()
    return model

# Preload the models
models = {name: load_model(path) for name, path in model_paths.items()}

class Attention(nn.Module):
    def __init__(self, hidden_dim):
        super(Attention, self).__init__()
        self.W = nn.Linear(hidden_dim, hidden_dim)
        self.v = nn.Parameter(torch.randn(hidden_dim))

    def forward(self, hidden_states):
        scores = torch.tanh(self.W(hidden_states))
        scores = torch.matmul(scores, self.v.unsqueeze(0).unsqueeze(-1)).squeeze(-1)
        attn_weights = torch.softmax(scores, dim=1)
        context = torch.bmm(attn_weights.unsqueeze(1), hidden_states).squeeze(1)
        return context

class LSTMWithAttention(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim, num_layers=1):
        super(LSTMWithAttention, self).__init__()
        self.lstm = nn.LSTM(input_dim, hidden_dim, num_layers, batch_first=True)
        self.attention = Attention(hidden_dim)
        self.fc = nn.Linear(hidden_dim, output_dim)

    def forward(self, x):
        lstm_out, _ = self.lstm(x)
        attn_out = self.attention(lstm_out)
        out = self.fc(attn_out)
        return out

def evaluate_model(model, test_loader):
    model.eval()
    all_labels = []
    all_preds = []

    with torch.no_grad():
        for inputs, labels in test_loader:
            outputs = model(inputs)
            predicted = (torch.sigmoid(outputs.squeeze()) > 0.5).float()
            all_labels.extend(labels.cpu().numpy())
            all_preds.extend(predicted.cpu().numpy())

    all_labels = np.array(all_labels)
    all_preds = np.array(all_preds)

    # Compute metrics
    conf_matrix = confusion_matrix(all_labels, all_preds)
    class_report = classification_report(all_labels, all_preds)

    # ROC Curve
    fpr, tpr, _ = roc_curve(all_labels, all_preds)
    roc_auc = auc(fpr, tpr)

    # Print confusion matrix and classification report
    print("Confusion Matrix:")
    print(conf_matrix)
    print("\nClassification Report:")
    print(class_report)

    # Plot ROC Curve
    plt.figure()
    plt.plot(fpr, tpr, color='darkorange', lw=2, label='ROC curve (area = %0.2f)' % roc_auc)
    plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--')
    plt.xlim([0.0, 1.0])
    plt.ylim([0.0, 1.05])
    plt.xlabel('False Positive Rate')
    plt.ylabel('True Positive Rate')
    plt.title('Receiver Operating Characteristic (ROC)')
    plt.legend(loc='lower right')
    plt.show()

# Function to display performance metrics
def display_metrics(y_true, y_pred, y_prob, model_name):
    st.write(f"### {model_name} Performance Metrics")
    
    st.write("#### Confusion Matrix")
    cm = confusion_matrix(y_true, y_pred)
    st.write(pd.DataFrame(cm, index=['Actual Neg', 'Actual Pos'], columns=['Predicted Neg', 'Predicted Pos']))
    
    st.write("#### Classification Report")
    report = classification_report(y_true, y_pred, output_dict=True)
    st.write(pd.DataFrame(report).transpose())
    
    st.write("#### ROC AUC Score")
    roc_auc = roc_auc_score(y_true, y_prob)
    st.write(f"ROC AUC Score: {roc_auc:.4f}")
    
    st.write("#### ROC Curve")
    fpr, tpr, _ = roc_curve(y_true, y_prob)
    plt.figure()
    plt.plot(fpr, tpr, marker='.')
    plt.xlabel('False Positive Rate')
    plt.ylabel('True Positive Rate')
    plt.title('ROC Curve')
    st.pyplot(plt)
    
    st.write("#### Precision-Recall Curve")
    precision, recall, _ = precision_recall_curve(y_true, y_prob)
    plt.figure()
    plt.plot(recall, precision, marker='.')
    plt.xlabel('Recall')
    plt.ylabel('Precision')
    plt.title('Precision-Recall Curve')
    st.pyplot(plt)

# Streamlit application
st.title("Credit Card Fraud Detection")

st.write("""
## Upload Dataset
Upload your dataset as a CSV file. The dataset should include all necessary features and the target column.
""")

uploaded_file = st.file_uploader("Choose a CSV file", type="csv")

if uploaded_file:
    data = pd.read_csv(uploaded_file)
    st.write("### Dataset Preview")
    st.write(data.head())

    # Assuming the target column is known
    target_col = 'Class'
    feature_cols = [col for col in data.columns if col != target_col]

    X = data[feature_cols].values
    y = data[target_col].values

    # Normalize features
    scaler = StandardScaler()
    x_test = scaler.transform(X)

    # Reshape for LSTM: (num_samples, seq_length, num_features)
    x_test = x_test.reshape((x_test.shape[0], 1, x_test.shape[1]))

    # Convert numpy arrays to PyTorch tensors
    x_test_tensor = torch.tensor(x_test, dtype=torch.float32).to(device)
    y_test_tensor = torch.tensor(y, dtype=torch.float32).to(device)

    # Create DataLoader for adversarial data
    test_loader = TensorDataset(x_test_tensor, y_test_tensor)

    st.write("### Model Comparison")

    model_options = list(models.keys())
    
    selected_model_1 = st.selectbox("Select Model 1", model_options, index=0)
    selected_model_2 = st.selectbox("Select Model 2", model_options, index=1)

    if selected_model_1 and selected_model_2:
        model_1 = models[selected_model_1]
        model_2 = models[selected_model_2]

        y_pred_1 = model_1.predict(X)
        y_prob_1 = model_1.predict_proba(X)[:, 1]
        
        y_pred_2 = model_2.predict(X)
        y_prob_2 = model_2.predict_proba(X)[:, 1]
        
        col1, col2 = st.columns(2)
        
        with col1:
            evaluate_model(model_1, test_loader)
        
        with col2:
            evaluate_model(model_2, test_loader)