<a href="https://colab.research.google.com/github/Steven256-debug/Credit-card-fraud-detection/blob/app-steven/credit_card_fraud_detection_system.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Kaggle setup and installation of libraries


In [None]:
# ===============================
# Credit Card Fraud Detection System
# Starter Notebook for Team Collaboration
# Team: Steven Tesla, Aaron, Kenzie, Kelvin, Fada Dem
# ===============================

# -------------------------------
# 1Ô∏è‚É£ Install Required Libraries
# -------------------------------
!pip install pandas numpy scikit-learn matplotlib seaborn imbalanced-learn xgboost lightgbm joblib kaggle

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, confusion_matrix, roc_auc_score
from imblearn.over_sampling import SMOTE
from xgboost import XGBClassifier
import joblib
import os


Kaggle API Setup and Dataset Download

In [None]:
# -------------------------------
# 2Ô∏è‚É£ Kaggle API Setup
# -------------------------------
# Step 1: Upload your kaggle.json API token
from google.colab import files
files.upload()  # Upload kaggle.json here

# Step 2: Move kaggle.json to correct location
!mkdir -p ~/.kaggle
!cp kaggle.json ~/.kaggle/
!chmod 600 ~/.kaggle/kaggle.json

# Step 3: Download and unzip dataset
!kaggle datasets download -d mlg-ulb/creditcardfraud
!unzip -o creditcardfraud.zip -d data


LOAD DATASETS FROM KAGGLE

In [None]:
# -------------------------------
# 3Ô∏è‚É£ Load Dataset
# -------------------------------
df = pd.read_csv('data/creditcard.csv')
print("Dataset shape:", df.shape)
df.head()


Exploratory Data Analysis (EDA)

In [None]:
# -------------------------------
# 4Ô∏è‚É£ Exploratory Data Analysis (EDA)
# -------------------------------
print(df.info())
print(df.describe())

# Check class distribution
sns.countplot(x='Class', data=df)
plt.title('Fraud vs Non-Fraud Transactions')
plt.show()


Preprocessing and Feature Engineering

In [None]:
# -------------------------------
# 5Ô∏è‚É£ Preprocessing
# -------------------------------
# Feature scaling
scaler = StandardScaler()
df['norm_amount'] = scaler.fit_transform(df['Amount'].values.reshape(-1,1))
df['hour'] = (df['Time'] // 3600) % 24
df = df.drop(['Time', 'Amount'], axis=1)

# Split features and labels
X = df.drop('Class', axis=1)
y = df['Class']

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, stratify=y, random_state=42)

# Handle class imbalance with SMOTE
smote = SMOTE(random_state=42)
X_res, y_res = smote.fit_resample(X_train, y_train)
print("Resampled dataset shape:", X_res.shape)


Model Training

In [None]:
# -------------------------------
# 6Ô∏è‚É£ Model Training
# -------------------------------
models = {
    "Logistic Regression": LogisticRegression(max_iter=1000),
    "Random Forest": RandomForestClassifier(n_estimators=200, class_weight='balanced'),
    "XGBoost": XGBClassifier(use_label_encoder=False, eval_metric='logloss')
}

trained_models = {}

for name, model in models.items():
    print(f"\nTraining {name}...")
    model.fit(X_res, y_res)
    y_pred = model.predict(X_test)
    print(f"{name} Classification Report:\n")
    print(classification_report(y_test, y_pred))
    print("ROC-AUC:", roc_auc_score(y_test, y_pred))
    trained_models[name] = model


Confusion Matrix Visualization

In [None]:
# -------------------------------
# 7Ô∏è‚É£ Confusion Matrix Visualization
# -------------------------------
from sklearn.metrics import plot_confusion_matrix

for name, model in trained_models.items():
    plot_confusion_matrix(model, X_test, y_test, cmap='Blues')
    plt.title(f'{name} Confusion Matrix')
    plt.show()


Save Best Model

In [None]:
# -------------------------------
# 8Ô∏è‚É£ Save Best Model
# -------------------------------
best_model = trained_models['XGBoost']
os.makedirs('models', exist_ok=True)
joblib.dump(best_model, 'models/fraud_model.pkl')
print("Best model saved as models/fraud_model.pkl")


Deployment on Streamlit


In [None]:
import streamlit as st
import numpy as np
import joblib

model = joblib.load("../models/fraud_model.pkl")

st.title("üí≥ Credit Card Fraud Detection System")

inputs = []

for i in range(1, 29):
    val = st.number_input(f"V{i}", value=0.0)
    inputs.append(val)

norm_amount = st.number_input("Normalized Amount", value=0.0)
inputs.append(norm_amount)

hour = st.number_input("Hour (0-23)", min_value=0, max_value=23)
inputs.append(hour)

if st.button("Predict"):
    x = np.array(inputs).reshape(1,-1)
    pred = model.predict(x)[0]
    prob = model.predict_proba(x)[0][1]

    if pred == 1:
        st.error(f"‚ö†Ô∏è Fraud Detected (Probability: {prob:.3f})")
    else:
        st.success(f"‚úÖ Legitimate (Probability: {prob:.3f})")
