In [None]:
import pandas as pd

In [None]:
data = pd.read_csv("creditcard.csv")
data.head()

In [None]:
# Use options to see all column
pd.options.display.max_columns = None
data.head()

In [None]:
# .tail used to get the last five column
data.tail()

In [None]:
# We check row and column
print("Number of columns: {}".format(data.shape[1]))
print("Number of rows: {}".format(data.shape[0]))

In [None]:
# check mission value by using data.info
data.info()

In [None]:
# check null value
data.isnull().sum()

In [None]:
import matplotlib.pyplot as plt
import pandas as pd

# Assuming 'data' is already defined and contains your dataset
print(data['Class'].value_counts())

# Separate the dataset into fraud and normal transactions
Fraud = data[data['Class'] == 1]
Normal = data[data['Class'] == 0]

# Count classes
count_classes = pd.Series(data['Class']).value_counts(sort=True)

# Plotting
colors = ['blue', 'red']  # Customize colors for Normal and Fraud
count_classes.plot(kind="bar", color=colors)

LABELS = ["Normal", "Fraud"]
plt.xticks(range(2), LABELS)

plt.xlabel("Class")
plt.ylabel("Frequency")
plt.title("Transaction Class Distribution")
plt.show()

In [None]:
# and then we check the normal and fraud dataset
Fraud = data[data['Class'] == 1]
Normal = data[data['Class'] == 0]

print(Fraud.shape, Normal.shape)

In [None]:
# We drop the column time
data = data.drop(['Time'], axis = 1)

In [None]:
data.head()

In [None]:
# We check duplicated values
data.duplicated().any()

In [None]:
# Before we drop duplicated values
data.shape

In [None]:
# Use to drop duplicated values
data = data.drop_duplicates()

In [None]:
# After we drop duplicated values
data.shape

In [None]:
# Check class labels
data['Class'].value_counts()

In [None]:
# Use standarscaler for reduces bias improves the learning process.
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
data['Amount'] = sc.fit_transform(pd.DataFrame(data['Amount']))
data

In [None]:
X = data.drop('Class', axis = 1)
y=data['Class']

In [None]:
from sklearn.model_selection import train_test_split

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 42)

In [None]:
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from xgboost import XGBClassifier
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score

In [None]:
classifier = {
    "Logistic Regression": LogisticRegression(),
    "Decision Tree Classifier": DecisionTreeClassifier(),
    "RandomForest": RandomForestClassifier(),
    "KNeighbors": KNeighborsClassifier(),
    "Xgboost": XGBClassifier()
}

for name, clf in classifier.items():
    print(f"\n=========={name}===========")
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)
    print(f"\n Accuaracy: {accuracy_score(y_test, y_pred)}")
    print(f"\n Precision: {precision_score(y_test, y_pred)}")
    print(f"\n Recall: {recall_score(y_test, y_pred)}")
    print(f"\n F1 Score: {f1_score(y_test, y_pred)}")

In [None]:
data_prediction = XGBClassifier()
data_prediction.fit(X,y)

In [None]:
import joblib
joblib.dump(data_prediction, 'XGboot_Model.pkl')

In [None]:
# Separate the dataset into fraud and normal transactions
data_fraud = data[data['Class'] == 1]
data_normal = data[data['Class'] == 0]

# Function to print rows with commas
def print_rows_with_commas(df, n=4):
    for index, row in df.head(n).iterrows():
        print(', '.join(map(str, row.values)))

# Print the first 2 rows of fraud data  
print("Fraud Data:")
print_rows_with_commas(data_fraud)

# Print the first 2 rows of normal data
print("\nNormal Data:")
print_rows_with_commas(data_normal)

In [None]:
pred_model = joblib.load("XGboot_Model.pkl")
# Make the prediction
pred = pred_model.predict([
    [-2.3122265423263, 1.95199201064158, -1.60985073229769, 3.9979055875468, -0.522187864667764, -1.42654531920595, -2.53738730624579, 1.39165724829804, -2.77008927719433, -2.77227214465915, 3.20203320709635, -2.89990738849473, -0.595221881324605, -4.28925378244217, 0.389724120274487, -1.14074717980657, -2.83005567450437, -0.0168224681808257, 0.416955705037907, 0.126910559061474, 0.517232370861764, -0.0350493686052974, -0.465211076182388, 0.320198198514526, 0.0445191674731724, 0.177839798284401, 0.261145002567677, -0.143275874698919, -0.4175162445753002]
])

# Check the prediction and print the corresponding class
if pred[0] == 0:
    print("The transaction is Normal.")
elif pred[0] == 1:
    print("The transaction is Fraud.")

In [None]:
pred_model = joblib.load("XGboot_Model.pkl")
# Make the prediction
pred = pred_model.predict([
    [1.99580500482354, -0.185640469490288, -1.63055972869549, -0.123601632903268, 0.253957698895392, -0.912552023158912, 0.320712899205896, -0.243601532270024, 0.301719410344464, 0.106709606516659, 0.776338360874582, 0.751848210917616, -0.377450224359188, 0.860129915507045, 0.100224221845141, 0.0501332969452917, -0.722079290418566, -0.0250280347528174, 0.476523948758478, -0.163539137109292, -0.100148633773135, -0.331418973307353, 0.158847276366556, -0.481294479785805, -0.0938169937195864, -0.148649095179115, -0.0652773488212963, -0.0691752385113206, -0.2685631132328121]
])

# Check the prediction and print the corresponding class
if pred[0] == 0:
    print("The transaction is Normal.")
elif pred[0] == 1:
    print("The transaction is Fraud.")