In [2]:
!pip install numpy pandas scikit-learn imbalanced-learn flask joblib





[notice] A new release of pip is available: 24.3.1 -> 25.0.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [3]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

# Load dataset
df = pd.read_csv("creditcard.csv")

# Features (X) and target (y)
X = df.drop("Class", axis=1)  # All columns except "Class"
y = df["Class"]  # 1 = Fraud, 0 = Legit

# Split data into training and testing sets (80% train, 20% test)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize data (important for models like SVM)
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)


In [4]:
import sys
print(sys.version)


3.7.4 (tags/v3.7.4:e09359112e, Jul  8 2019, 20:34:20) [MSC v.1916 64 bit (AMD64)]


In [5]:
!pip install --upgrade imbalanced-learn





[notice] A new release of pip is available: 24.3.1 -> 25.0.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [7]:
from imblearn.over_sampling import SMOTE

smote = SMOTE(sampling_strategy=0.2, random_state=42)  # Make fraud 20% of data
X_train_balanced, y_train_balanced = smote.fit_resample(X_train, y_train)

print(f"Before SMOTE: {y_train.value_counts()}")
print(f"After SMOTE: {y_train_balanced.value_counts()}")

Before SMOTE: 0    227451
1       394
Name: Class, dtype: int64
After SMOTE: 0    227451
1     45490
Name: Class, dtype: int64


In [8]:
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

# Initialize models
models = {
    "Logistic Regression": LogisticRegression(),
    "K-Nearest Neighbors": KNeighborsClassifier(n_neighbors=5),
    "Support Vector Machine": SVC(),
    "Decision Tree": DecisionTreeClassifier(),
    "Random Forest": RandomForestClassifier(n_estimators=50, n_jobs=-1)
}

# Store accuracy results
results = {}

# Train and test models
for name, model in models.items():
    print(f"Training {name}...")
    model.fit(X_train_balanced, y_train_balanced)  # Train model
    y_pred = model.predict(X_test)  # Predict on test data
    accuracy = accuracy_score(y_test, y_pred)  # Check accuracy
    results[name] = accuracy  # Store result

# Print model performance
print("\nüîπ Model Performance:")
for model, acc in results.items():
    print(f"{model}: {acc:.4f}")


Training Logistic Regression...
Training K-Nearest Neighbors...
Training Support Vector Machine...
Training Decision Tree...
Training Random Forest...

üîπ Model Performance:
Logistic Regression: 0.9939
K-Nearest Neighbors: 0.9983
Support Vector Machine: 0.9981
Decision Tree: 0.9976
Random Forest: 0.9996


In [9]:
from sklearn.ensemble import VotingClassifier
from sklearn.svm import LinearSVC
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier

# Reduce dataset size for testing (optional)
X_train_small, _, y_train_small, _ = train_test_split(X_train_balanced, y_train_balanced, test_size=0.7, random_state=42)

# Faster Voting Classifier
ensemble_model = VotingClassifier(
    estimators=[
        ('LogReg', LogisticRegression()),
        ('LinearSVC', LinearSVC()),  # Replaces slow SVC
        ('DecisionTree', DecisionTreeClassifier()),
        ('RandomForest', RandomForestClassifier(n_estimators=20, n_jobs=-1))  # Reduced estimators
    ],
    voting='hard'  # Hard voting is faster
)

# Train on reduced dataset
print("Training Voting Classifier...")
ensemble_model.fit(X_train_small, y_train_small)

# Predict and check accuracy
y_pred_ensemble = ensemble_model.predict(X_test)
ensemble_accuracy = accuracy_score(y_test, y_pred_ensemble)
print(f"\n‚úÖ Final Optimized Ensemble Model Accuracy: {ensemble_accuracy:.4f}")


Training Voting Classifier...





‚úÖ Final Optimized Ensemble Model Accuracy: 0.9992


In [12]:
import joblib

# Save the trained model
joblib.dump(ensemble_model, "fraud_detection_model.pkl")

# Save the scaler (since data needs to be standardized before prediction)
joblib.dump(scaler, "scaler.pkl")

print("‚úÖ Model and scaler saved successfully!")


‚úÖ Model and scaler saved successfully!


In [13]:
# Load the trained model and scaler

loaded_model = joblib.load("fraud_detection_model.pkl")
loaded_scaler = joblib.load("scaler.pkl")

print("‚úÖ Model and scaler loaded successfully!")

‚úÖ Model and scaler loaded successfully!


In [15]:
import joblib
import pandas as pd

# Load the trained model and scaler
loaded_model = joblib.load("fraud_detection_model.pkl")
loaded_scaler = joblib.load("scaler.pkl")

# Step 3: Function to Detect Fraud
def detect_fraud(transaction):
    # Convert dictionary to DataFrame with correct column order
    df = pd.DataFrame([transaction])  # Create DataFrame with one row

    # Standardize input data using the loaded scaler
    df_scaled = loaded_scaler.transform(df)

    # Predict fraud (1 = Fraud, 0 = Legit)
    prediction = loaded_model.predict(df_scaled)[0]
    
    return "‚ö†Ô∏è Fraudulent Transaction Detected!" if prediction == 1 else "‚úÖ Legitimate Transaction"

# Step 4: Provide an example transaction with ALL features
example_transaction = {
    "Time": 0.0, "V1": -1.3, "V2": 0.5, "V3": 2.5, "V4": 0.0, "V5": 0.0, 
    "V6": 0.0, "V7": 0.0, "V8": 0.0, "V9": 0.0, "V10": 0.0, "V11": 0.0, 
    "V12": 0.0, "V13": 0.0, "V14": 0.0, "V15": 0.0, "V16": 0.0, "V17": 0.0, 
    "V18": 0.0, "V19": 0.0, "V20": 0.0, "V21": 0.0, "V22": 0.0, "V23": 0.0, 
    "V24": 0.0, "V25": 0.0, "V26": 0.0, "V27": 0.0, "V28": 0.0, "Amount": 50
}

# Step 5: Test the fraud detection function
print(detect_fraud(example_transaction))


‚úÖ Legitimate Transaction


In [17]:
import time


transactions = [
    {"Time": 50000, "V1": -1.3, "V2": 0.5, "V3": 2.5, "V4": 0.1, "V5": -0.5, 
     "V6": 0.3, "V7": -1.2, "V8": 1.4, "V9": 0.7, "V10": -0.1, "V11": 0.5, 
     "V12": -0.4, "V13": 0.3, "V14": -0.2, "V15": 0.2, "V16": -1.1, "V17": 0.8, 
     "V18": -0.3, "V19": 1.2, "V20": 0.6, "V21": -0.7, "V22": 0.9, "V23": -0.8, 
     "V24": 0.1, "V25": -0.6, "V26": 1.5, "V27": -1.4, "V28": 0.5, "Amount": 50},  # Normal Transaction

    {"Time": 60000, "V1": 2.0, "V2": -1.5, "V3": -3.5, "V4": -0.2, "V5": 1.0, 
     "V6": -0.8, "V7": 2.5, "V8": -1.7, "V9": 3.2, "V10": -2.0, "V11": 1.5, 
     "V12": -3.1, "V13": 0.7, "V14": -0.6, "V15": 1.8, "V16": -1.9, "V17": 2.1, 
     "V18": -0.5, "V19": 3.0, "V20": -1.2, "V21": 1.7, "V22": -2.4, "V23": 1.2, 
     "V24": -3.8, "V25": 2.9, "V26": -1.6, "V27": 3.5, "V28": -2.1, "Amount": 5000},  

    {"Time": 70000, "V1": -2.0, "V2": 1.2, "V3": 4.5, "V4": 0.8, "V5": -1.2, 
     "V6": 1.0, "V7": -0.9, "V8": 2.3, "V9": -1.7, "V10": 0.6, "V11": -0.4, 
     "V12": 1.1, "V13": -0.9, "V14": 0.5, "V15": -0.3, "V16": 1.7, "V17": -2.0, 
     "V18": 0.2, "V19": -1.4, "V20": 0.9, "V21": -0.5, "V22": 1.8, "V23": -0.7, 
     "V24": 0.3, "V25": -1.1, "V26": 2.4, "V27": -1.3, "V28": 0.8, "Amount": 1000}  # Normal Transaction
]

# Loop through transactions in real-time
for transaction in transactions:
    result = detect_fraud(transaction)
    print(f"üîÑ Processing Transaction: Amount = ${transaction['Amount']}")
    print(result)
    print("-" * 50)
    time.sleep(2)  # Simulate delay between transactions


üîÑ Processing Transaction: Amount = $50
‚úÖ Legitimate Transaction
--------------------------------------------------
üîÑ Processing Transaction: Amount = $5000
‚úÖ Legitimate Transaction
--------------------------------------------------
üîÑ Processing Transaction: Amount = $1000
‚úÖ Legitimate Transaction
--------------------------------------------------
