In [1]:
# Import necessary libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report, accuracy_score, precision_score, recall_score, confusion_matrix
import pickle

# Load the dataset
data = pd.read_csv('fraudTest.csv')

# Basic preprocessing
# Assuming `is_fraud` is the target column
target = 'is_fraud'
features = ['amt', 'city_pop', 'lat', 'long', 'merch_lat', 'merch_long']

X = data[features]
y = data[target]

# Split the dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42, stratify=y)

# Scale the data
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Train the model
model = RandomForestClassifier(random_state=42, class_weight='balanced')  # Handle imbalance
model.fit(X_train_scaled, y_train)

# Evaluate the model
y_pred = model.predict(X_test_scaled)

# Accuracy, precision, and recall
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)

print("Model Evaluation Metrics:")
print(f"Accuracy: {accuracy:.2f}")
print(f"Precision: {precision:.2f}")
print(f"Recall: {recall:.2f}")

# Confusion matrix and classification report
print("\nConfusion Matrix:")
print(confusion_matrix(y_test, y_pred))
print("\nClassification Report:")
print(classification_report(y_test, y_pred))

# Save the model and scaler
with open('model.pkl', 'wb') as model_file:
    pickle.dump(model, model_file)

with open('scaler.pkl', 'wb') as scaler_file:
    pickle.dump(scaler, scaler_file)

print("Model and scaler saved successfully!")


Model Evaluation Metrics:
Accuracy: 1.00
Precision: 0.78
Recall: 0.34

Confusion Matrix:
[[166010     62]
 [   422    222]]

Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00    166072
           1       0.78      0.34      0.48       644

    accuracy                           1.00    166716
   macro avg       0.89      0.67      0.74    166716
weighted avg       1.00      1.00      1.00    166716

Model and scaler saved successfully!
