**Install Libraries**

In [1]:
# !pip install catboost

^C


In [None]:
# !pip install tensorflow==2.12.0

**Importr Libraries**

In [None]:
# from google.colab import drive
# drive.mount('/content/drive')

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from catboost import CatBoostClassifier
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn import metrics


In [2]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, MaxPooling1D, Flatten, Dense, Dropout,BatchNormalization

**Data Description**

In [3]:
# Load Dataset
data = pd.read_csv("./creditcard.csv")

# Balancing the Dataset
non_fraud = data[data["Class"] == 0]
fraud = data[data["Class"] == 1]

In [7]:
data.isnull().sum()

Time      0
V1        0
V2        0
V3        0
V4        0
V5        0
V6        0
V7        0
V8        0
V9        0
V10       0
V11       0
V12       0
V13       0
V14       0
V15       0
V16       0
V17       0
V18       0
V19       0
V20       0
V21       0
V22       0
V23       0
V24       0
V25       0
V26       0
V27       0
V28       0
Amount    0
Class     0
dtype: int64

In [4]:
data.shape

(284807, 31)

In [5]:
data['Class'].value_counts()

0    284315
1       492
Name: Class, dtype: int64

In [6]:
fraud.shape

(492, 31)

**Sampling**

In [None]:
# prompt: oversample class 1 on data to 1000 sample

from sklearn.utils import resample

# Upsample minority class
fraud_upsampled = resample(fraud,
                          replace=True,  # sample with replacement
                          n_samples=1000,  # to match majority class
                          random_state=123)  # reproducible results

# Combine majority class with upsampled minority class
data_upsampled = pd.concat([non_fraud[:1000], fraud_upsampled])

# Display new class counts
print(data_upsampled["Class"].value_counts())


In [None]:
data_upsampled.head()

**Data Split**

In [None]:
# Splitting Features & Target
X = data_upsampled.drop(columns="Class", axis=1)
y = data_upsampled["Class"]

In [None]:
# Splitting Data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=30, stratify=y)

In [None]:
print(X_train.shape)

In [None]:
X_train.head()

In [None]:
X_test.shape

In [None]:
X_test.head()

In [None]:
print(y_train.shape)
print(y_test.shape)

**Value Counts**

In [None]:
print(y_train.value_counts())
print(y_test.value_counts())

**Scaling Data (Standard Scaler)**

In [None]:
# Standardizing the Features (This Boosts CatBoost Performance!)
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [None]:
import joblib

# Save the scaler object
scaler_filename = "standard_scaler.joblib"
joblib.dump(scaler, scaler_filename)


In [None]:
print("Rescaled X_train Data \n==================================\n")
print(X_train)

In [None]:
print("Sample data of rescaled X_train\n==================================\n")
print(X_train[0])

In [None]:
X_train[0].shape

In [None]:
print("Rescaled X_test Data \n==================================\n")
print(X_test)

In [None]:
print("Sample data of rescaled X_test\n==================================\n")
print(X_test[0])

**Machine Learning Model Training : Catboost**

In [None]:
# Initializing CatBoostClassifier
catboost_model = CatBoostClassifier(iterations=100,  # Balanced for speed & accuracy
                                    depth=6,  # Adjust depth based on dataset complexity
                                    learning_rate=0.05,
                                    loss_function='Logloss',
                                    eval_metric='Accuracy',
                                    verbose=10,
                                    random_seed=42)

# Training the Model
catboost_model.fit(X_train, y_train)



**Save trained CatBoost model**

In [None]:


model_filename = "catboost_model.cbm"
catboost_model.save_model(model_filename)

**Extract Leaf Embeddings (New Features)**

In [None]:

X_train_leaf = catboost_model.calc_leaf_indexes(X_train)
X_test_leaf = catboost_model.calc_leaf_indexes(X_test)

In [None]:
X_train_leaf

In [None]:
X_test_leaf

**Scaling Data : MinMaxSclaer**

In [None]:
from sklearn.preprocessing import MinMaxScaler

# Initialize MinMaxScaler
scaler = MinMaxScaler()

# Fit and transform X_train_leaf
X_train_scaled = scaler.fit_transform(X_train_leaf)

# Transform X_test_leaf
X_test_scaled = scaler.transform(X_test_leaf)

In [None]:
# prompt: save scaler as min max

# Save the MinMaxScaler object
scaler_filename = "minmax_scaler.joblib"
joblib.dump(scaler, scaler_filename)


In [None]:
X_train_scaled.shape

In [None]:
X_test_scaled.shape

In [None]:
# X_train = np.expand_dims(X_train_scaled, axis=-1)
# X_test = np.expand_dims(X_test_scaled, axis=-1)

In [None]:
# X_train

**Deep Learning Model Training : CNN**

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, MaxPooling1D, Flatten, Dense, Dropout, BatchNormalization

# CNN Model with Batch Normalization
cnn_model = Sequential([
    # First Convolutional Block
    Conv1D(filters=64, kernel_size=3, activation='relu', input_shape=(X_train.shape[1], 1)),
    BatchNormalization(),
    MaxPooling1D(pool_size=2),

    # Second Convolutional Block
    Conv1D(filters=128, kernel_size=3, activation='relu'),
    BatchNormalization(),
    MaxPooling1D(pool_size=2),

    # Flatten the feature map
    Flatten(),

    # Fully Connected Layers
    Dense(128, activation='relu'),
    BatchNormalization(),
    Dropout(0.5),

    Dense(64, activation='relu'),
    BatchNormalization(),

    # Output Layer for Binary Classification
    Dense(1, activation='sigmoid')
])

# Compile Model
cnn_model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Model Summary
cnn_model.summary()


In [None]:
# Train Model
history=cnn_model.fit(X_train_scaled, y_train, validation_data=(X_test_scaled, y_test), epochs=5, batch_size=16)

**Model Evaluation**

In [None]:
# Evaluate the model on the test set
test_loss, test_accuracy = cnn_model.evaluate(X_test_scaled, y_test, batch_size=32, verbose=1)
print(f'Test Accuracy: {test_accuracy:.4f}')
print(f'Test Loss: {test_loss:.4f}')


**Save Model**

In [None]:
cnn_model.save('cnn_model_99.h5')

In [None]:
cnn_model.save('cnn_model_99.keras')

**Metrics Plot**

In [None]:
import matplotlib.pyplot as plt

# Plot training & validation accuracy values
plt.figure(figsize=(10, 5))
plt.subplot(1, 2, 1)
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.title('Model accuracy')
plt.ylabel('Accuracy')
plt.xlabel('Epoch')
plt.legend(['Train', 'Test'], loc='upper left')

# Plot training & validation loss values
plt.subplot(1, 2, 2)
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('Model loss')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend(['Train', 'Test'], loc='upper left')
plt.show()


**Classification Report**

In [None]:
from sklearn.metrics import classification_report

# Make predictions on the test set
y_pred_prob = cnn_model.predict(X_test_scaled)
y_pred = (y_pred_prob > 0.5).astype(int) # Convert probabilities to binary predictions

# Generate classification report
print(classification_report(y_test, y_pred))


**Confusion Matrix**

In [None]:
from sklearn.metrics import confusion_matrix
import seaborn as sns
import matplotlib.pyplot as plt

# Assuming y_test and y_pred are already defined from your model's predictions
# Example:
# y_pred = cnn_model.predict(X_test_scaled)
# y_pred = (y_pred > 0.5).astype(int)

conf_mat = confusion_matrix(y_test, y_pred)

plt.figure(figsize=(8, 6))
sns.heatmap(conf_mat, annot=True, fmt="d", cmap="Blues",
            xticklabels=['Predicted 0', 'Predicted 1'],
            yticklabels=['Actual 0', 'Actual 1'])
plt.xlabel("Predicted Labels")
plt.ylabel("True Labels")
plt.title("Confusion Matrix")
plt.show()


# **Prediction**

In [None]:
# prompt: laod sample csv for test and read ,then load scale data and convert rescaling then load catboost then extract calc leaf index
sample_data = pd.read_csv("Legit.csv")

In [None]:


# Load the saved CatBoost model
import joblib
from catboost import CatBoostClassifier

model_filename = "catboost_model.cbm"
catboost_model = CatBoostClassifier()
catboost_model.load_model(model_filename)

# Load the saved scaler
scaler_filename = "standard_scaler.joblib"
standar_scaler = joblib.load(scaler_filename)

# Load sample data (replace with your actual sample data)
X_sample_scaled = standar_scaler.transform(sample_data)


# Extract leaf indices for the sample data
X_sample_leaf = catboost_model.calc_leaf_indexes(X_sample_scaled)

X_sample_leaf


In [None]:

# Load the saved MinMaxScaler
min_max_scaler = joblib.load("minmax_scaler.joblib")

sample_data_scaled=min_max_scaler.transform(X_sample_leaf)
# Load the saved TF Keras model
cnn_model = tf.keras.models.load_model('cnn_model_99.h5',compile=False)


# Make predictions
y_pred_prob = cnn_model.predict(sample_data_scaled)
y_pred = (y_pred_prob > 0.5).astype(int)

# Now you can use y_pred for further analysis or evaluation.
if int(y_pred[0][0])==1:
  print("Fraud")
else:
  print("Non Fraud")
