# XGBClassifier

## Importing the libraries

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

## Importing the dataset

In [None]:
# Read the dataset
dataset = pd.read_csv('fraudtest_mod.csv')

In [None]:
X = dataset.iloc[:, :-1].values
y = dataset.iloc[:, -1].values

## Splitting the dataset into the Training set and Test set

In [None]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.25, random_state = 0)

## Encode Categorical data

In [None]:
from sklearn.preprocessing import OneHotEncoder

# Indices of the non-binary features
non_binary_feature_indices = [0,6]

# One-hot encode the categorical features
encoder = OneHotEncoder()
encoder.fit(np.concatenate((X_train[:, non_binary_feature_indices], X_test[:, non_binary_feature_indices])))


X_train_encoded = encoder.transform(X_train[:, non_binary_feature_indices])
X_test_encoded = encoder.transform(X_test[:, non_binary_feature_indices])

## Feature Scaling

In [None]:
from sklearn.preprocessing import StandardScaler

# Create a StandardScaler object
sc = StandardScaler(with_mean=False)

# Fit and transform the training data
X_train_scaled = sc.fit_transform(X_train_encoded)

# Transform the test data
X_test_scaled = sc.transform(X_test_encoded)

## Training the XGBClassifier model on the Training set

In [None]:
import xgboost as xgb
xgb_classifier = xgb.XGBClassifier()
xgb_classifier.fit(X_train_scaled, y_train)

## Making the Confusion Matrix

In [None]:
from sklearn.metrics import confusion_matrix, accuracy_score
y_pred = xgb_classifier.predict(X_test_scaled)
cm = confusion_matrix(y_test, y_pred)
print(cm)
accuracy_score(y_test, y_pred)

[[6225    0]
 [  25    0]]


0.996

In [None]:
from sklearn.metrics import roc_auc_score

# Calculate the AUC
y_pred_prob = xgb_classifier.predict_proba(X_test_scaled)[:, 1]
auc = roc_auc_score(y_test, y_pred_prob)

# Print the AUC
print("AUC:", auc)

AUC: 0.7548530120481927
