In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
import os

In [None]:
def detect_label_column(df):
    for col in df.columns:
        unique_vals = df[col].dropna().unique()
        if len(unique_vals) == 2:
            if df[col].dtype in [np.int64, np.int32, np.float64, 'bool']:
                return col
    raise Exception("No binary label column found automatically.")

In [None]:
def preprocess(df, label_col):
    df = df.dropna()
    y = df[label_col]
    X = df.drop(columns=[label_col])

    for col in X.select_dtypes(include=['object', 'category']).columns:
        le = LabelEncoder()
        X[col] = le.fit_transform(X[col].astype(str))

    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X)

    return X_scaled, y

In [None]:
def train_and_predict(X, y):
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    model = RandomForestClassifier(n_estimators=100, random_state=42)
    model.fit(X_train, y_train)

    y_pred = model.predict(X_test)

    return model, X_test, y_test, y_pred

In [None]:
def print_summary(y_test, y_pred):
    fraud_count = np.sum(y_pred)
    total = len(y_pred)
    fraud_percentage = (fraud_count / total) * 100

    print("\nEvaluation Summary:")
    print("Total Transactions Analyzed:", total)
    print("Fraudulent Transactions Detected:", fraud_count)
    print(f"Fraud Percentage: {fraud_percentage:.2f}%")

    if fraud_percentage > 0:
        print("Status: FRAUD DETECTED")
    else:
        print("Status: NORMAL")

In [None]:
def main(csv_path):
    if not os.path.exists(csv_path):
        print(f"File not found: {csv_path}")
        return

    df = pd.read_csv(csv_path)
    print(f"\nLoaded dataset: {csv_path}")
    print(f"Columns: {list(df.columns)}")

    try:
        label_col = detect_label_column(df)
        print(f"\nDetected label column: {label_col}")
    except Exception as e:
        print(f"\nError: {e}")
        return

    X, y = preprocess(df, label_col)
    model, X_test, y_test, y_pred = train_and_predict(X, y)

    print("\nModel Evaluation:")
    print("Accuracy:", accuracy_score(y_test, y_pred))
    print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))
    print("Classification Report:\n", classification_report(y_test, y_pred))

    print_summary(y_test, y_pred)

In [None]:
if __name__ == "__main__":
    csv_path = r"/content/creditcard.csv"
    main(csv_path)


Loaded dataset: /content/creditcard.csv
Columns: ['Time', 'V1', 'V2', 'V3', 'V4', 'V5', 'V6', 'V7', 'V8', 'V9', 'V10', 'V11', 'V12', 'V13', 'V14', 'V15', 'V16', 'V17', 'V18', 'V19', 'V20', 'V21', 'V22', 'V23', 'V24', 'V25', 'V26', 'V27', 'V28', 'Amount', 'Class']

Detected label column: Class

Model Evaluation:
Accuracy: 0.9995611109160493
Confusion Matrix:
 [[56862     2]
 [   23    75]]
Classification Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00     56864
           1       0.97      0.77      0.86        98

    accuracy                           1.00     56962
   macro avg       0.99      0.88      0.93     56962
weighted avg       1.00      1.00      1.00     56962


Evaluation Summary:
Total Transactions Analyzed: 56962
Fraudulent Transactions Detected: 77
Fraud Percentage: 0.14%
Status: FRAUD DETECTED
