In [8]:
# Question 1: Load & Explore the Credit Card Fraud Detection Dataset

# Step 1: Load the dataset from a CSV (Assume you have a file named creditcard.csv ).
# Step 2: Split the data.
# Step 3: Train a Logistic Regression model.
# Step 4: Evaluate using ROC AUC score.
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import roc_auc_score

# Step 1: Load the dataset
data = pd.read_csv('creditcard.csv')

# Explore the dataset
print("Dataset shape:", data.shape)
print("Columns:", data.columns)
print("Class distribution:\n", data['Class'].value_counts())

# Step 2: Split the data into features and target
X = data.drop('Class', axis=1)
y = data['Class']

# Train-test split (using stratify to handle imbalance)
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.3, random_state=42, stratify=y
)

# Step 3: Train Logistic Regression model
model = LogisticRegression(max_iter=1000, random_state=42)
model.fit(X_train, y_train)

# Step 4: Evaluate using ROC AUC score
y_pred_proba = model.predict_proba(X_test)[:, 1]
roc_auc = roc_auc_score(y_test, y_pred_proba)

print(f"\nROC AUC score: {roc_auc:.4f}")




Dataset shape: (10, 13)
Columns: Index(['Time', 'V1', 'V2', 'V3', 'V4', 'V5', 'V6', 'V7', 'V8', 'V9', 'V10',
       'Amount', 'Class'],
      dtype='object')
Class distribution:
 Class
0    8
1    2
Name: count, dtype: int64

ROC AUC score: 0.5000
