In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier
from sklearn.metrics import roc_auc_score, accuracy_score
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler

df = pd.read_csv('/content/credit_card_fraud_dataset (1).csv')

fraud = df[df['IsFraud'] == 1]
valid = df[df['IsFraud'] == 0]
valid_sample = valid.sample(n=len(fraud), random_state=42)
df_balanced = pd.concat([fraud, valid_sample])

X = df_balanced.drop(['IsFraud', 'TransactionDate', 'TransactionType', 'Location'], axis=1)
y = df_balanced['IsFraud']

scaler = StandardScaler()
X = scaler.fit_transform(X)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

rf_classifier = RandomForestClassifier(n_estimators=100, random_state=42)
rf_classifier.fit(X_train, y_train)
rf_predictions = rf_classifier.predict(X_test)
rf_proba = rf_classifier.predict_proba(X_test)[:, 1]

xgb_classifier = XGBClassifier(random_state=42)
xgb_classifier.fit(X_train, y_train)
xgb_predictions = xgb_classifier.predict(X_test)
xgb_proba = xgb_classifier.predict_proba(X_test)[:, 1]

print("Random Forest:")
print(f"  ROC-AUC: {roc_auc_score(y_test, rf_proba)}")
print(f"  Accuracy: {accuracy_score(y_test, rf_predictions)}")

print("\nXGBoost:")
print(f"  ROC-AUC: {roc_auc_score(y_test, xgb_proba)}")
print(f"  Accuracy: {accuracy_score(y_test, xgb_predictions)}")

plt.figure(figsize=(10, 6))
feat_importances = pd.Series(rf_classifier.feature_importances_, index=df.columns[:-1])
feat_importances.nlargest(10).plot(kind='barh')
plt.title("Random Forest Feature Importance")
plt.show()

plt.figure(figsize=(10, 6))
feat_importances_xgb = pd.Series(xgb_classifier.feature_importances_, index=df.columns[:-1])
feat_importances_xgb.nlargest(10).plot(kind='barh')
plt.title("XGBoost Feature Importance")
plt.show()

print("\nModel Comparison:")
print("Random Forest is generally easy to interpret and tune, making it a good starting point")
print("XGBoost is known for its high accuracy and efficiency in handling complex datasets")
