In [1]:
import numpy as np
from tensorflow.keras.models import load_model
import joblib
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
import pandas as pd

# Load the data
data = pd.read_csv('dataset.csv')

# Preprocess the data
data = data.drop(['nameOrig', 'nameDest'], axis=1)
label_encoder = LabelEncoder()
data['type'] = label_encoder.fit_transform(data['type'])

X = data.drop(['isFraud'], axis=1)
y = data['isFraud']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize the features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Load the trained models
dt_model = joblib.load('dt_model_best.joblib')
ann_model = load_model('ann_model_best.h5')

# Make predictions
dt_pred_train = dt_model.predict(X_train)
ann_pred_train_proba = ann_model.predict(X_train)
ann_pred_train = (ann_pred_train_proba > 0.5).astype('int32')

dt_pred_test = dt_model.predict(X_test)
ann_pred_test_proba = ann_model.predict(X_test)
ann_pred_test = (ann_pred_test_proba > 0.5).astype('int32')

# Combine predictions for meta-model training
X_meta_train = np.column_stack((dt_pred_train, ann_pred_train))
X_meta_test = np.column_stack((dt_pred_test, ann_pred_test))

from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import confusion_matrix, accuracy_score, precision_score, recall_score, f1_score

# Define the meta-model
meta_model = LogisticRegression()

# Define the parameter grid for hyperparameter tuning
param_grid = {
    'C': [0.01, 0.1, 1, 10, 100],
    'solver': ['liblinear', 'lbfgs']
}

# Create the GridSearchCV object
grid_search_meta = GridSearchCV(estimator=meta_model, param_grid=param_grid, 
                                scoring='precision', cv=5, n_jobs=-1, verbose=1)

# Fit the grid search to the data
grid_search_meta.fit(X_meta_train, y_train)

# Get the best parameters and best meta-model
best_meta_params = grid_search_meta.best_params_
best_meta_model = grid_search_meta.best_estimator_

# Save the best meta-model
joblib.dump(best_meta_model, 'best_meta_model.joblib')

# Make predictions with the meta-model
y_meta_pred = best_meta_model.predict(X_meta_test)

# Calculate metrics
conf_matrix = confusion_matrix(y_test, y_meta_pred)
accuracy = accuracy_score(y_test, y_meta_pred)
precision = precision_score(y_test, y_meta_pred)
recall = recall_score(y_test, y_meta_pred)
f1 = f1_score(y_test, y_meta_pred)

# Display metrics
print("Confusion Matrix (Meta-Model):")
print(conf_matrix)
print("\nMetrics (Meta-Model):")
print("Accuracy:", accuracy)
print("Precision:", precision)
print("Recall:", recall)
print("F1-score:", f1)

# Print the best parameters
print("Best Parameters found by Grid Search:")
print(best_meta_params)




[1m159066/159066[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m125s[0m 783us/step
[1m39767/39767[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m36s[0m 897us/step
Fitting 5 folds for each of 10 candidates, totalling 50 fits
Confusion Matrix (Meta-Model):
[[1270448     456]
 [     59    1561]]

Metrics (Meta-Model):
Accuracy: 0.9995952925052887
Precision: 0.7739216658403569
Recall: 0.9635802469135802
F1-score: 0.8583997800384933
Best Parameters found by Grid Search:
{'C': 0.01, 'solver': 'liblinear'}
