In [44]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import OneHotEncoder
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, precision_score, recall_score, f1_score, roc_curve, auc, roc_auc_score
from sklearn.preprocessing import StandardScaler
from imblearn.over_sampling import SMOTE
from sklearn.metrics import matthews_corrcoef
import time

In [6]:
df = pd.read_csv('base.csv')

In [7]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1000000 entries, 0 to 999999
Data columns (total 32 columns):
 #   Column                            Non-Null Count    Dtype  
---  ------                            --------------    -----  
 0   fraud_bool                        1000000 non-null  int64  
 1   income                            1000000 non-null  float64
 2   name_email_similarity             1000000 non-null  float64
 3   prev_address_months_count         1000000 non-null  int64  
 4   current_address_months_count      1000000 non-null  int64  
 5   customer_age                      1000000 non-null  int64  
 6   days_since_request                1000000 non-null  float64
 7   intended_balcon_amount            1000000 non-null  float64
 8   payment_type                      1000000 non-null  object 
 9   zip_count_4w                      1000000 non-null  int64  
 10  velocity_6h                       1000000 non-null  float64
 11  velocity_24h                      1000

In [8]:
print(df['device_fraud_count'].value_counts())

0    1000000
Name: device_fraud_count, dtype: int64


In [9]:
# "device_fraud_count" just has one value, so drop this column
df = df.drop(['device_fraud_count'], axis=1, errors='ignore') 

In [10]:
print(df['prev_address_months_count'].value_counts()[-1])

712920


In [11]:
# -1 means a missing value and there are about 71% missing, so I drop this feature. If amount is small, I will use impute
df = df.drop(['prev_address_months_count'], axis=1, errors='ignore')

In [12]:
# 'intended_balcon_amount', negatives are missing values
print((df['intended_balcon_amount']<0).sum())

742523


In [13]:
# because negatives are about 74%, so I drop this feature
df = df.drop(['intended_balcon_amount'], axis=1, errors='ignore')

In [14]:
# impute "current_address_months_count", -1 means a missing value, 0.4% in total, right skewed so use median
median_value = df[df['current_address_months_count'] != -1]['current_address_months_count'].median()
# Replace -1 with the median
df.loc[df['current_address_months_count'] == -1, 'current_address_months_count'] = median_value
print(df['current_address_months_count'].describe())

count    1000000.000000
mean          86.817583
std           88.248728
min            0.000000
25%           20.000000
50%           53.000000
75%          130.000000
max          428.000000
Name: current_address_months_count, dtype: float64


In [15]:
# impute "bank_months_count", -1 means a missing value, 25% in total, right skewed so use median
median_value = df[df['bank_months_count'] != -1]['bank_months_count'].median()
# Replace -1 with the median
df.loc[df['bank_months_count'] == -1, 'bank_months_count'] = median_value
print(df['bank_months_count'].describe())

count    1000000.000000
mean          14.897463
std            9.959364
min            1.000000
25%            5.000000
50%           15.000000
75%           25.000000
max           32.000000
Name: bank_months_count, dtype: float64


In [16]:
# impute "session_length_in_minutes", -1 means a missing value, 0.2% in total, right skewed so use median
median_value = df[df['session_length_in_minutes'] != -1]['session_length_in_minutes'].median()
# Replace -1 with the median
df.loc[df['session_length_in_minutes'] == -1, 'session_length_in_minutes'] = median_value
print(df['session_length_in_minutes'].describe())

count    1000000.000000
mean           7.557278
std            8.024671
min            0.000872
25%            3.122461
50%            5.122832
75%            8.866131
max           85.899143
Name: session_length_in_minutes, dtype: float64


In [17]:
# impute "device_distinct_emails_8w", -1 means a missing value, 359 in total, because of categorical, so use mode
mode_value = df['device_distinct_emails_8w'].mode()[0]
df['device_distinct_emails_8w'].replace(-1, mode_value, inplace=True)
statistics = df['device_distinct_emails_8w'].describe()
print(statistics)

count    1000000.000000
mean           1.019030
std            0.176669
min            0.000000
25%            1.000000
50%            1.000000
75%            1.000000
max            2.000000
Name: device_distinct_emails_8w, dtype: float64


In [18]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1000000 entries, 0 to 999999
Data columns (total 29 columns):
 #   Column                            Non-Null Count    Dtype  
---  ------                            --------------    -----  
 0   fraud_bool                        1000000 non-null  int64  
 1   income                            1000000 non-null  float64
 2   name_email_similarity             1000000 non-null  float64
 3   current_address_months_count      1000000 non-null  int64  
 4   customer_age                      1000000 non-null  int64  
 5   days_since_request                1000000 non-null  float64
 6   payment_type                      1000000 non-null  object 
 7   zip_count_4w                      1000000 non-null  int64  
 8   velocity_6h                       1000000 non-null  float64
 9   velocity_24h                      1000000 non-null  float64
 10  velocity_4w                       1000000 non-null  float64
 11  bank_branch_count_8w              1000

In [19]:
# split data into target and attributes
y = df['fraud_bool']
X = df.drop(['fraud_bool'], axis=1)


In [20]:
# train-test split by "month", 0-5 are training, 6-7 are testing
y_train = y[X['month']<6]
y_test = y[X['month']>=6]
X_train = X[X['month']<6]
X_test = X[X['month']>=6]

In [21]:
# All following techniques apply to training set

In [22]:
# apply one-hot encoding before applying the SMOTE oversampling
# Create a new DataFrame with the encoded categorical column
X_train_encoded = pd.get_dummies(X_train)
X_train_encoded.shape

(794989, 49)

In [23]:
# Oversampling Using SMOTE
smote =SMOTE(random_state=0)
X_train_smote, y_train_smote = smote.fit_resample(X_train_encoded, y_train)
print('Feature/label dataset for training before applying SMOTE: ', X_train_encoded.shape, y_train.shape)
print('Feature/label dataset for training after applying SMOTE: ', X_train_smote.shape, y_train_smote.shape)
print('Distribution of label values after applying SMOTE:\n',pd.Series(y_train_smote).value_counts())

Feature/label dataset for training before applying SMOTE:  (794989, 49) (794989,)
Feature/label dataset for training after applying SMOTE:  (1573676, 49) (1573676,)
Distribution of label values after applying SMOTE:
 1    786838
0    786838
Name: fraud_bool, dtype: int64


In [24]:
X_train_smote.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1573676 entries, 0 to 1573675
Data columns (total 49 columns):
 #   Column                            Non-Null Count    Dtype  
---  ------                            --------------    -----  
 0   income                            1573676 non-null  float64
 1   name_email_similarity             1573676 non-null  float64
 2   current_address_months_count      1573676 non-null  int64  
 3   customer_age                      1573676 non-null  int64  
 4   days_since_request                1573676 non-null  float64
 5   zip_count_4w                      1573676 non-null  int64  
 6   velocity_6h                       1573676 non-null  float64
 7   velocity_24h                      1573676 non-null  float64
 8   velocity_4w                       1573676 non-null  float64
 9   bank_branch_count_8w              1573676 non-null  int64  
 10  date_of_birth_distinct_emails_4w  1573676 non-null  int64  
 11  credit_risk_score                 157

In [25]:
# feature selection
# domain drop attribute "email_is_free". 1. free:paid = 53:47, not very significant; 2. lots of legitimate application use free email in the real world
X_train_smote = X_train_smote.drop(['email_is_free'], axis=1, errors='ignore')

In [26]:
# feature reduction
# Select only the numerical attributes
numerical_attributes = X_train_smote.select_dtypes(include=['int64', 'float64'])
# Calculate correlation between numerical attributes
corr_matrix = numerical_attributes.corr()
corr_matrix

Unnamed: 0,income,name_email_similarity,current_address_months_count,customer_age,days_since_request,zip_count_4w,velocity_6h,velocity_24h,velocity_4w,bank_branch_count_8w,...,phone_home_valid,phone_mobile_valid,bank_months_count,has_other_cards,proposed_credit_limit,foreign_request,session_length_in_minutes,keep_alive_session,device_distinct_emails_8w,month
income,1.0,-0.070939,0.015828,0.159756,-0.011406,-0.050262,-0.089769,-0.080438,-0.077888,-0.017238,...,-0.082421,-0.031013,-0.006811234,-0.008796,0.204399,-0.012242,-0.043695,-0.140971,-0.03588528,0.063796
name_email_similarity,-0.070939,1.0,0.013343,-0.090286,-0.011653,0.012878,0.036079,0.042225,0.04448,0.019084,...,0.074006,0.042794,-0.0251377,0.069851,0.036292,0.005029,-0.004192,0.099686,-0.005568964,-0.038114
current_address_months_count,0.015828,0.013343,1.0,0.207487,-0.063973,0.026433,0.00212,0.002562,-0.009277,0.030278,...,0.041105,-0.129726,0.06184781,-0.007216,0.181469,-0.026971,-0.014354,-0.107885,-0.02010356,-0.003893
customer_age,0.159756,-0.090286,0.207487,1.0,-0.033564,-0.015024,-0.040657,-0.012347,-0.008139,0.015378,...,0.040501,-0.200623,0.02860191,-0.028938,0.22425,-0.031343,0.042059,-0.161795,-0.01686817,-0.017488
days_since_request,-0.011406,-0.011653,-0.063973,-0.033564,1.0,-0.010302,0.029076,0.006057,0.00881,-0.014489,...,-0.031328,0.007459,0.02005993,-0.037396,-0.073733,-0.002017,0.037973,-0.003132,0.01228078,-0.013651
zip_count_4w,-0.050262,0.012878,0.026433,-0.015024,-0.010302,1.0,0.065467,0.108072,0.2133,-0.00614,...,-0.031796,0.000229,0.03938613,-0.030215,-0.028215,0.008517,0.045022,0.026012,0.01325629,-0.200655
velocity_6h,-0.089769,0.036079,0.00212,-0.040657,0.029076,0.065467,1.0,0.392736,0.311228,0.01741,...,0.015278,-0.006368,-0.02768542,0.011111,-0.079998,-0.000683,0.036659,0.050334,0.03049849,-0.304766
velocity_24h,-0.080438,0.042225,0.002562,-0.012347,0.006057,0.108072,0.392736,1.0,0.410409,0.028784,...,-0.008727,-0.029369,-0.0317,-0.02465,-0.030157,0.010785,0.051612,0.041492,0.02122456,-0.412997
velocity_4w,-0.077888,0.04448,-0.009277,-0.008139,0.00881,0.2133,0.311228,0.410409,1.0,0.019373,...,-0.009044,-0.03896,-0.01423671,-0.055827,0.006779,0.015693,0.070929,0.075279,0.01858991,-0.79556
bank_branch_count_8w,-0.017238,0.019084,0.030278,0.015378,-0.014489,-0.00614,0.01741,0.028784,0.019373,1.0,...,0.06569,0.007422,0.02705482,0.047993,-0.022432,0.006324,-0.003594,0.044306,0.01825566,-0.020389


In [27]:
X_train_smote['velocity_4w'].corr(X_train_smote['month'])

-0.7955602521420382

In [28]:
# the correlation is about -0.8, so I drop attribute 'velocity_4w'
X_train_smote = X_train_smote.drop(['velocity_4w'], axis=1, errors='ignore')

In [29]:
print('Distribution of label values after applying SMOTE:\n',pd.Series(y_train_smote).value_counts())

Distribution of label values after applying SMOTE:
 1    786838
0    786838
Name: fraud_bool, dtype: int64


In [30]:
# I want to sample about 8000 for each class, so set k = 100, use 1-in-100 systematic sampling
# apply sampling before performing time-series validation
sampling_ratio = 0.01
n_samples = int(len(X_train_smote) * sampling_ratio)
indices = list(range(0, len(X_train_smote), 100))[:n_samples]
X_train_sampled = X_train_smote.iloc[indices]
y_train_sampled = y_train_smote.iloc[indices]

# Effectiveness

In [31]:
# Perform time-series validation by iterating over the desired temporal attribute range
# Define the temporal attribute column name and range
temporal_column = "month"
start_month = 0
end_month = 5

# Initialize lists to store the results
confusion_matrices = []
mcc_scores = []

# Perform time-series validation
prc = 0  # initial precision
rec = 0  # initial recall
f1 = 0  # initial f1 score
roc_auc = 0  # initial roc_auc

# Create a StandardScaler instance
scaler = StandardScaler()

for i in range(start_month, end_month):
    # Split the data into training and testing sets based on the month column
    X_train_ts = X_train_sampled[X_train_sampled[temporal_column] <= i]
    y_train_ts = y_train_sampled[X_train_sampled[temporal_column] <= i]
    X_test_ts = X_train_sampled[X_train_sampled[temporal_column] == i + 1]
    y_test_ts = y_train_sampled[X_train_sampled[temporal_column] == i + 1]

    # Scale the training and testing data
    X_train_ts_scaled = scaler.fit_transform(X_train_ts)
    X_test_ts_scaled = scaler.transform(X_test_ts)

    # Train a logistic regression classifier using the scaled training data
    clf3 = LogisticRegression(max_iter=1000, random_state=42)
    clf3.fit(X_train_ts_scaled, y_train_ts)

    # Predict class labels for the scaled testing data
    y_pred = clf3.predict(X_test_ts_scaled)

    # Calculate the confusion matrix
    confusion = confusion_matrix(y_test_ts, y_pred)
    confusion_matrices.append(confusion)

    # Calculate evaluation metrics
    prc += precision_score(y_test_ts, y_pred)
    rec += recall_score(y_test_ts, y_pred)
    f1 += f1_score(y_test_ts, y_pred)
    roc_auc += roc_auc_score(y_test_ts, y_pred)
    # Calculate the Matthews Correlation Coefficient (MCC)
    mcc = matthews_corrcoef(y_test_ts, y_pred)
    mcc_scores.append(mcc)

# Compute the average confusion matrix
average_confusion = sum(confusion_matrices) / len(confusion_matrices)
average_precision = prc / len(confusion_matrices)
average_recall = rec / len(confusion_matrices)
average_f1 = f1 / len(confusion_matrices)
average_roc_auc = roc_auc / len(confusion_matrices)
average_mcc = sum(mcc_scores) / len(mcc_scores)

print("Average Confusion Matrix:")
print(average_confusion)
print("\nAverage precision:")
print(average_precision)
print("\nAverage recall:")
print(average_recall)
print("\nAverage f1 score:")
print(average_f1)
print("\nAverage roc_auc score:")
print(average_roc_auc)
print("\nAverage MCC:")
print(average_mcc)

Average Confusion Matrix:
[[1296.8   15. ]
 [  30.  1261.6]]

Average precision:
0.9885754998346347

Average recall:
0.9759663755271613

Average f1 score:
0.9822195363817029

Average roc_auc score:
0.9823274783150768

Average MCC:
0.9652343816907953


# Efficiency

In [32]:
# time
start_time = time.time()
temporal_column = "month"
start_month = 0
end_month = 5

# Initialize lists to store the results
confusion_matrices = []
mcc_scores = []

# Perform time-series validation
prc = 0  # initial precision
rec = 0  # initial recall
f1 = 0  # initial f1 score
roc_auc = 0  # initial roc_auc

# Create a StandardScaler instance
scaler = StandardScaler()

for i in range(start_month, end_month):
    # Split the data into training and testing sets based on the month column
    X_train_ts = X_train_sampled[X_train_sampled[temporal_column] <= i]
    y_train_ts = y_train_sampled[X_train_sampled[temporal_column] <= i]
    X_test_ts = X_train_sampled[X_train_sampled[temporal_column] == i + 1]
    y_test_ts = y_train_sampled[X_train_sampled[temporal_column] == i + 1]

    # Scale the training and testing data
    X_train_ts_scaled = scaler.fit_transform(X_train_ts)
    X_test_ts_scaled = scaler.transform(X_test_ts)

    # Train a logistic regression classifier using the scaled training data
    clf3 = LogisticRegression(max_iter=1000, random_state=42)
    clf3.fit(X_train_ts_scaled, y_train_ts)

    # Predict class labels for the scaled testing data
    y_pred = clf3.predict(X_test_ts_scaled)

    # Calculate the confusion matrix
    confusion = confusion_matrix(y_test_ts, y_pred)
    confusion_matrices.append(confusion)

    # Calculate evaluation metrics
    prc += precision_score(y_test_ts, y_pred)
    rec += recall_score(y_test_ts, y_pred)
    f1 += f1_score(y_test_ts, y_pred)
    roc_auc += roc_auc_score(y_test_ts, y_pred)
    # Calculate the Matthews Correlation Coefficient (MCC)
    mcc = matthews_corrcoef(y_test_ts, y_pred)
    mcc_scores.append(mcc)

# Compute the average confusion matrix
average_confusion = sum(confusion_matrices) / len(confusion_matrices)
average_precision = prc / len(confusion_matrices)
average_recall = rec / len(confusion_matrices)
average_f1 = f1 / len(confusion_matrices)
average_roc_auc = roc_auc / len(confusion_matrices)
average_mcc = sum(mcc_scores) / len(mcc_scores)

end_time = time.time()
execution_time = end_time - start_time
print("Execution Time:", execution_time)

Execution Time: 0.4909942150115967


# Stability

In [41]:
# seed = 10
# Perform time-series validation by iterating over the desired temporal attribute range
# Define the temporal attribute column name and range
temporal_column = "month"
start_month = 0
end_month = 5

# Initialize lists to store the results
confusion_matrices = []
mcc_scores = []

# Perform time-series validation
prc = 0  # initial precision
rec = 0  # initial recall
f1 = 0  # initial f1 score
roc_auc = 0  # initial roc_auc

# Create a StandardScaler instance
scaler = StandardScaler()

for i in range(start_month, end_month):
    # Split the data into training and testing sets based on the month column
    X_train_ts = X_train_sampled[X_train_sampled[temporal_column] <= i]
    y_train_ts = y_train_sampled[X_train_sampled[temporal_column] <= i]
    X_test_ts = X_train_sampled[X_train_sampled[temporal_column] == i + 1]
    y_test_ts = y_train_sampled[X_train_sampled[temporal_column] == i + 1]

    # Scale the training and testing data
    X_train_ts_scaled = scaler.fit_transform(X_train_ts)
    X_test_ts_scaled = scaler.transform(X_test_ts)

    # Train a logistic regression classifier using the scaled training data
    clf3 = LogisticRegression(max_iter=1000, random_state=10)
    clf3.fit(X_train_ts_scaled, y_train_ts)

    # Predict class labels for the scaled testing data
    y_pred = clf3.predict(X_test_ts_scaled)

    # Calculate the confusion matrix
    confusion = confusion_matrix(y_test_ts, y_pred)
    confusion_matrices.append(confusion)

    # Calculate evaluation metrics
    prc += precision_score(y_test_ts, y_pred)
    rec += recall_score(y_test_ts, y_pred)
    f1 += f1_score(y_test_ts, y_pred)
    roc_auc += roc_auc_score(y_test_ts, y_pred)
    # Calculate the Matthews Correlation Coefficient (MCC)
    mcc = matthews_corrcoef(y_test_ts, y_pred)
    mcc_scores.append(mcc)

# Compute the average confusion matrix
average_confusion = sum(confusion_matrices) / len(confusion_matrices)
average_precision = prc / len(confusion_matrices)
average_recall = rec / len(confusion_matrices)
average_f1 = f1 / len(confusion_matrices)
average_roc_auc = roc_auc / len(confusion_matrices)
average_mcc = sum(mcc_scores) / len(mcc_scores)

print("Average Confusion Matrix:")
print(average_confusion)
print("\nAverage precision:")
print(average_precision)
print("\nAverage recall:")
print(average_recall)
print("\nAverage f1 score:")
print(average_f1)
print("\nAverage roc_auc score:")
print(average_roc_auc)
print("\nAverage MCC:")
print(average_mcc)

Average Confusion Matrix:
[[1296.8   15. ]
 [  30.  1261.6]]

Average precision:
0.9885754998346347

Average recall:
0.9759663755271613

Average f1 score:
0.9822195363817029

Average roc_auc score:
0.9823274783150768

Average MCC:
0.9652343816907953


In [42]:
# seed = 500
# Perform time-series validation by iterating over the desired temporal attribute range
# Define the temporal attribute column name and range
temporal_column = "month"
start_month = 0
end_month = 5

# Initialize lists to store the results
confusion_matrices = []
mcc_scores = []

# Perform time-series validation
prc = 0  # initial precision
rec = 0  # initial recall
f1 = 0  # initial f1 score
roc_auc = 0  # initial roc_auc

# Create a StandardScaler instance
scaler = StandardScaler()

for i in range(start_month, end_month):
    # Split the data into training and testing sets based on the month column
    X_train_ts = X_train_sampled[X_train_sampled[temporal_column] <= i]
    y_train_ts = y_train_sampled[X_train_sampled[temporal_column] <= i]
    X_test_ts = X_train_sampled[X_train_sampled[temporal_column] == i + 1]
    y_test_ts = y_train_sampled[X_train_sampled[temporal_column] == i + 1]

    # Scale the training and testing data
    X_train_ts_scaled = scaler.fit_transform(X_train_ts)
    X_test_ts_scaled = scaler.transform(X_test_ts)

    # Train a logistic regression classifier using the scaled training data
    clf3 = LogisticRegression(max_iter=1000, random_state=500)
    clf3.fit(X_train_ts_scaled, y_train_ts)

    # Predict class labels for the scaled testing data
    y_pred = clf3.predict(X_test_ts_scaled)

    # Calculate the confusion matrix
    confusion = confusion_matrix(y_test_ts, y_pred)
    confusion_matrices.append(confusion)

    # Calculate evaluation metrics
    prc += precision_score(y_test_ts, y_pred)
    rec += recall_score(y_test_ts, y_pred)
    f1 += f1_score(y_test_ts, y_pred)
    roc_auc += roc_auc_score(y_test_ts, y_pred)
    # Calculate the Matthews Correlation Coefficient (MCC)
    mcc = matthews_corrcoef(y_test_ts, y_pred)
    mcc_scores.append(mcc)

# Compute the average confusion matrix
average_confusion = sum(confusion_matrices) / len(confusion_matrices)
average_precision = prc / len(confusion_matrices)
average_recall = rec / len(confusion_matrices)
average_f1 = f1 / len(confusion_matrices)
average_roc_auc = roc_auc / len(confusion_matrices)
average_mcc = sum(mcc_scores) / len(mcc_scores)

print("Average Confusion Matrix:")
print(average_confusion)
print("\nAverage precision:")
print(average_precision)
print("\nAverage recall:")
print(average_recall)
print("\nAverage f1 score:")
print(average_f1)
print("\nAverage roc_auc score:")
print(average_roc_auc)
print("\nAverage MCC:")
print(average_mcc)

Average Confusion Matrix:
[[1296.8   15. ]
 [  30.  1261.6]]

Average precision:
0.9885754998346347

Average recall:
0.9759663755271613

Average f1 score:
0.9822195363817029

Average roc_auc score:
0.9823274783150768

Average MCC:
0.9652343816907953


In [43]:
# seed = 5000
# Perform time-series validation by iterating over the desired temporal attribute range
# Define the temporal attribute column name and range
temporal_column = "month"
start_month = 0
end_month = 5

# Initialize lists to store the results
confusion_matrices = []
mcc_scores = []

# Perform time-series validation
prc = 0  # initial precision
rec = 0  # initial recall
f1 = 0  # initial f1 score
roc_auc = 0  # initial roc_auc

# Create a StandardScaler instance
scaler = StandardScaler()

for i in range(start_month, end_month):
    # Split the data into training and testing sets based on the month column
    X_train_ts = X_train_sampled[X_train_sampled[temporal_column] <= i]
    y_train_ts = y_train_sampled[X_train_sampled[temporal_column] <= i]
    X_test_ts = X_train_sampled[X_train_sampled[temporal_column] == i + 1]
    y_test_ts = y_train_sampled[X_train_sampled[temporal_column] == i + 1]

    # Scale the training and testing data
    X_train_ts_scaled = scaler.fit_transform(X_train_ts)
    X_test_ts_scaled = scaler.transform(X_test_ts)

    # Train a logistic regression classifier using the scaled training data
    clf3 = LogisticRegression(max_iter=1000, random_state=5000)
    clf3.fit(X_train_ts_scaled, y_train_ts)

    # Predict class labels for the scaled testing data
    y_pred = clf3.predict(X_test_ts_scaled)

    # Calculate the confusion matrix
    confusion = confusion_matrix(y_test_ts, y_pred)
    confusion_matrices.append(confusion)

    # Calculate evaluation metrics
    prc += precision_score(y_test_ts, y_pred)
    rec += recall_score(y_test_ts, y_pred)
    f1 += f1_score(y_test_ts, y_pred)
    roc_auc += roc_auc_score(y_test_ts, y_pred)
    # Calculate the Matthews Correlation Coefficient (MCC)
    mcc = matthews_corrcoef(y_test_ts, y_pred)
    mcc_scores.append(mcc)

# Compute the average confusion matrix
average_confusion = sum(confusion_matrices) / len(confusion_matrices)
average_precision = prc / len(confusion_matrices)
average_recall = rec / len(confusion_matrices)
average_f1 = f1 / len(confusion_matrices)
average_roc_auc = roc_auc / len(confusion_matrices)
average_mcc = sum(mcc_scores) / len(mcc_scores)

print("Average Confusion Matrix:")
print(average_confusion)
print("\nAverage precision:")
print(average_precision)
print("\nAverage recall:")
print(average_recall)
print("\nAverage f1 score:")
print(average_f1)
print("\nAverage roc_auc score:")
print(average_roc_auc)
print("\nAverage MCC:")
print(average_mcc)

Average Confusion Matrix:
[[1296.8   15. ]
 [  30.  1261.6]]

Average precision:
0.9885754998346347

Average recall:
0.9759663755271613

Average f1 score:
0.9822195363817029

Average roc_auc score:
0.9823274783150768

Average MCC:
0.9652343816907953
