In [1]:
# Question 1: Load & Explore the Credit Card Fraud Detection Dataset

# Step 1: Load the dataset from a CSV (Assume you have a file named creditcard.csv ).
# Step 2: Split the data.
# Step 3: Train a Logistic Regression model.
# Step 4: Evaluate using ROC AUC score.
# If 'creditcard.csv' is not available, create a small sample DataFrame for demonstration

import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import roc_auc_score
import numpy as np

try:
    df = pd.read_csv('creditcard.csv')
except FileNotFoundError:
    # Create a small synthetic dataset
    np.random.seed(42)
    df = pd.DataFrame({
        'V1': np.random.randn(100),
        'V2': np.random.randn(100),
        'Amount': np.random.uniform(1, 500, 100),
        'Class': np.random.choice([0, 1], size=100, p=[0.95, 0.05])
    })
print(df.head())

# Step 2: Split the data
X = df.drop('Class', axis=1)
y = df['Class']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

# Step 3: Train a Logistic Regression model
model = LogisticRegression(max_iter=1000)
model.fit(X_train, y_train)

# Step 4: Evaluate using ROC AUC score
y_pred_proba = model.predict_proba(X_test)[:, 1]
roc_auc = roc_auc_score(y_test, y_pred_proba)
print("ROC AUC Score:", roc_auc)

         V1        V2      Amount  Class
0  0.496714 -1.415371  438.809163      0
1 -0.138264 -0.420645  370.643540      0
2  0.647689 -0.342715  348.810855      1
3  1.523030 -0.802277  351.539558      0
4 -0.234153 -0.161286  180.386084      0
ROC AUC Score: 0.368421052631579
