In [40]:
# importing essential libraries

import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from xgboost import XGBClassifier
from sklearn.metrics import classification_report, confusion_matrix, roc_auc_score
from imblearn.over_sampling import SMOTE
import warnings
warnings.filterwarnings("ignore")

In [42]:
# loading dataset

df = pd.read_csv("creditcard.csv")

In [43]:
# feature engineering

scaler = StandardScaler()
df['norm_time'] = scaler.fit_transform(df['Time'].values.reshape(-1, 1))
df['norm_amount'] = scaler.fit_transform(df['Amount'].values.reshape(-1, 1))

In [44]:
# droping original values

df.drop(['Time', 'Amount'], axis=1, inplace=True)

In [45]:
features = [col for col in df.columns if col != 'Class']
df = df[features + ['Class']]


In [46]:
X = df.drop('Class', axis=1)
y = df['Class']

In [47]:
sm = SMOTE(random_state=42)

# Drop rows where the target variable 'Class' is NaN
df_cleaned = df.dropna(subset=['Class'])
X_cleaned = df_cleaned.drop('Class', axis=1)
y_cleaned = df_cleaned['Class']

X_res, y_res = sm.fit_resample(X_cleaned, y_cleaned)

In [48]:
X_train, X_test, y_train, y_test = train_test_split(
    X_res, y_res, test_size=0.2, random_state=42
)

In [49]:
models = {
    "Logistic Regression": LogisticRegression(max_iter=1000),
    "Decision Tree": DecisionTreeClassifier(random_state=42),
    "XGBoost": XGBClassifier(use_label_encoder=False, eval_metric='logloss', random_state=42)
}

In [50]:
for name, model in models.items():
    print(f"\n==================== {name} ====================")

    # Train the model
    model.fit(X_train, y_train)

    # Predict on test data
    y_pred = model.predict(X_test)

    # Evaluate the model
    print("Classification Report:\n", classification_report(y_test, y_pred))
    print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))
    print("ROC-AUC Score:", roc_auc_score(y_test, y_pred))


Classification Report:
               precision    recall  f1-score   support

           0       0.93      0.98      0.95     56750
           1       0.97      0.92      0.95     56976

    accuracy                           0.95    113726
   macro avg       0.95      0.95      0.95    113726
weighted avg       0.95      0.95      0.95    113726

Confusion Matrix:
 [[55368  1382]
 [ 4429 52547]]
ROC-AUC Score: 0.9489565461986004

Classification Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00     56750
           1       1.00      1.00      1.00     56976

    accuracy                           1.00    113726
   macro avg       1.00      1.00      1.00    113726
weighted avg       1.00      1.00      1.00    113726

Confusion Matrix:
 [[56606   144]
 [   53 56923]]
ROC-AUC Score: 0.9982661694173417

Classification Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00     

In [51]:
# Step 1: Select the best trained model (e.g., XGBoost)
best_model = XGBClassifier(use_label_encoder=False, eval_metric='logloss', random_state=42)
best_model.fit(X_train, y_train)

# Step 2: Define a function to predict based on custom input
def predict_transaction(input_data):
    """
    input_data: List of 30 values (V1 to V28 + norm_time + norm_amount)
    """
    input_array = np.array(input_data).reshape(1, -1)
    prediction = best_model.predict(input_array)
    result = "Fraudulent" if prediction[0] == 1 else "Legitimate"
    print(f"Prediction: {result} transaction (Class = {prediction[0]})")

# Step 3: Example input (You will provide these values manually)
# Make sure these 30 features are in the correct order
sample_input = [
    -1.359807, -0.072781, 2.536346, 1.378155, -0.338321, 0.462388,
    0.239599, 0.098698, 0.363787, 0.090794, -0.551600, -0.617801,
    -0.991390, -0.311169, 1.468177, -0.470400, 0.207971, 0.025791,
    0.403993, 0.251412, -0.018307, 0.277838, -0.110474, 0.066928,
    0.128539, -0.189115, 0.133558, -0.021053, 0.004455, -0.014823  # norm_time, norm_amount
]

# Step 4: Predict
predict_transaction(sample_input)


Prediction: Legitimate transaction (Class = 0)
