In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report
import warnings
warnings.filterwarnings("ignore")

In [2]:
train_data = pd.read_csv("/kaggle/input/fraud-detection/fraudTrain.csv")
test_data = pd.read_csv("/kaggle/input/fraud-detection/fraudTest.csv")

In [3]:
data = pd.concat([train_data, test_data], axis=0)

In [4]:
X = data.drop(columns=["is_fraud"])
y = data["is_fraud"]

In [5]:
X = X.select_dtypes(include=[np.number])

In [6]:
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

In [7]:
X_train, X_test, y_train, y_test = train_test_split(
    X_scaled, y, test_size=0.3, random_state=42, stratify=y
)

In [8]:
log_model = LogisticRegression(max_iter=1000)
log_model.fit(X_train, y_train)
log_pred = log_model.predict(X_test)
print("Logistic Regression Results:")
print(classification_report(y_test, log_pred))

Logistic Regression Results:
              precision    recall  f1-score   support

           0       0.99      1.00      1.00    552824
           1       0.00      0.00      0.00      2895

    accuracy                           0.99    555719
   macro avg       0.50      0.50      0.50    555719
weighted avg       0.99      0.99      0.99    555719



In [9]:
tree_model = DecisionTreeClassifier(random_state=42)
tree_model.fit(X_train, y_train)
tree_pred = tree_model.predict(X_test)
print("Decision Tree Results:")
print(classification_report(y_test, tree_pred))

Decision Tree Results:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00    552824
           1       0.39      0.44      0.41      2895

    accuracy                           0.99    555719
   macro avg       0.69      0.72      0.70    555719
weighted avg       0.99      0.99      0.99    555719



In [10]:
forest_model = RandomForestClassifier(n_estimators=100, random_state=42)
forest_model.fit(X_train, y_train)
forest_pred = forest_model.predict(X_test)
print("Random Forest Results:")
print(classification_report(y_test, forest_pred))

Random Forest Results:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00    552824
           1       0.84      0.45      0.59      2895

    accuracy                           1.00    555719
   macro avg       0.92      0.73      0.79    555719
weighted avg       1.00      1.00      1.00    555719

