In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, accuracy_score




In [2]:
train_data = pd.read_csv("fraudTrain.csv")

test_data = pd.read_csv("fraudTest.csv")



In [3]:
X_train = train_data.drop("is_fraud", axis=1)
y_train = train_data["is_fraud"]


X_test = test_data.drop("is_fraud", axis=1)
y_test = test_data["is_fraud"]



In [4]:
numeric_cols = X_train.select_dtypes(include=['number']).columns
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train[numeric_cols])
X_test_scaled = scaler.transform(X_test[numeric_cols])


In [5]:
models = {
    "Logistic Regression": LogisticRegression(),
    "Decision Tree": DecisionTreeClassifier(),
    "Random Forest": RandomForestClassifier()
}



In [6]:
for name, model in models.items():
    print(f"Training {name}...")
    model.fit(X_train_scaled, y_train)
    y_pred = model.predict(X_test_scaled)
    
    # Model evaluation
    print(f"Evaluation results for {name}:")
    print(classification_report(y_test, y_pred))
    print("Accuracy:", accuracy_score(y_test, y_pred))

Training Logistic Regression...
Evaluation results for Logistic Regression:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00    553574
           1       0.00      0.00      0.00      2145

    accuracy                           1.00    555719
   macro avg       0.50      0.50      0.50    555719
weighted avg       0.99      1.00      0.99    555719

Accuracy: 0.9955067219224104
Training Decision Tree...
