In [None]:
from joblib import dump
import xgboost as xgb
# Assuming your model is named `best_xgb`
dump(best_xgb, 'best_xgb_model.pkl')

In [None]:
# Import necessary libraries
from joblib import load
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report, roc_auc_score, roc_curve
import matplotlib.pyplot as plt
import seaborn as sns

# Load the saved model
loaded_model = load('best_xgb_model.pkl')
print("Model loaded successfully")

# Load your new dataset
df = pd.read_csv("C:/Users/singh/Desktop/TIP_2/fraudTest.csv")

# Display basic information about the dataset
print(df.info())

# Display the first few rows of the dataset
print(df.head())

# Preprocess the dataset
label_encoder = LabelEncoder()
df['merchant'] = label_encoder.fit_transform(df['merchant'])
df['category'] = label_encoder.fit_transform(df['category'])
df['city'] = label_encoder.fit_transform(df['city'])
df['state'] = label_encoder.fit_transform(df['state'])
df['pop_cat'] = label_encoder.fit_transform(df['pop_cat'])

# Drop irrelevant or personally identifiable information (PII) columns
df = df.drop(columns=["first", "last", "street", "trans_num", "dob", "trans_date", "trans_time", "job"])

# Map 'gender' column to numerical values
df['gender'] = df['gender'].map({'M': 1, 'F': 0})

# Split the data into features (X) and target (y)
X_new = df.drop(columns=['is_fraud'])
y_new = df['is_fraud']

# Make predictions
y_pred_new = loaded_model.predict(X_new)

# Evaluate the model
accuracy = accuracy_score(y_new, y_pred_new)
conf_matrix = confusion_matrix(y_new, y_pred_new)
class_report = classification_report(y_new, y_pred_new)
roc_auc = roc_auc_score(y_new, loaded_model.predict_proba(X_new)[:, 1])

print("Accuracy:", accuracy)
print("Confusion Matrix:\n", conf_matrix)
print("Classification Report:\n", class_report)
print("ROC AUC Score:", roc_auc)

# Plot ROC curve
fpr, tpr, thresholds = roc_curve(y_new, loaded_model.predict_proba(X_new)[:, 1])
plt.figure(figsize=(10, 7))
plt.plot(fpr, tpr, label=f'ROC curve (area = {roc_auc:.2f})')
plt.plot([0, 1], [0, 1], 'k--')
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('ROC Curve')
plt.legend(loc='lower right')
plt.show()

# Plot confusion matrix
plt.figure(figsize=(10, 7))
sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Blues', xticklabels=['Not Fraud', 'Fraud'], yticklabels=['Not Fraud', 'Fraud'])
plt.xlabel('Predicted')
plt.ylabel('Actual')
plt.title('Confusion Matrix')
plt.show()