In [1]:
import joblib
from sklearn.metrics import roc_auc_score, precision_score, recall_score, f1_score
import pandas as pd

#Lee el archivo CSV
data = pd.read_csv('BalanceData.csv')

from sklearn.preprocessing import LabelEncoder

# Eliminar la columna 'trans_date_trans_time'
data.drop(columns=['trans_date_trans_time', 'transaction_time'], inplace=True)

# Codificar variables categóricas con label encoding
label_encoder = LabelEncoder()
categorical_cols = ['merchant', 'category', 'first', 'last', 'gender', 'street', 'city', 'state', 'job']
for col in categorical_cols:
    data[col + '_encoded'] = label_encoder.fit_transform(data[col])

# Eliminar columnas originales no numéricas y otras columnas irrelevantes
data.drop(columns=['merchant', 'category', 'first', 'last', 'gender', 'street', 'city', 'state', 'job', 'dob', 'trans_num'], inplace=True)

from sklearn.model_selection import train_test_split
from sklearn.impute import SimpleImputer 

# Dividir el dataset en conjunto de entrenamiento, conjunto de validación (dev) y conjunto de prueba
X_train, X_test, y_train, y_test = train_test_split(data.drop('is_fraud', axis=1), data['is_fraud'], test_size=0.2, random_state=42)
X_train, X_dev, y_train, y_dev = train_test_split(X_train, y_train, test_size=0.1, random_state=42)

# Imputar valores faltantes con la estrategia de relleno con el valor medio
imputer = SimpleImputer(strategy='mean')
X_train_imputed = imputer.fit_transform(X_train)
X_dev_imputed = imputer.transform(X_dev)

In [4]:


def evaluate_model(model, X, y):
    # Predecir probabilidades
    y_pred_proba = model.predict_proba(X)[:, 1] if hasattr(model, 'predict_proba') else model.decision_function(X)
    
    # Calcular métricas
    roc_auc = roc_auc_score(y, y_pred_proba)
    precision = precision_score(y, model.predict(X))
    recall = recall_score(y, model.predict(X))
    f1 = f1_score(y, model.predict(X))
    
    return roc_auc, precision, recall, f1

# Cargar el modelo
model = joblib.load('XGBoost_initial_model.pkl')

roc_auc_before, precision_before, recall_before, f1_before = evaluate_model(model, X_dev_imputed, y_dev)  # Utilizar X_dev_imputed

print(f"ROC-AUC: {roc_auc_before}, Precisión: {precision_before}, Recall: {recall_before}, F1-score: {f1_before}")

# Cargar el modelo
model = joblib.load('XGBoost_incremental_model.pkl')

roc_auc_before, precision_before, recall_before, f1_before = evaluate_model(model, X_dev_imputed, y_dev)  # Utilizar X_dev_imputed

print(f"ROC-AUC: {roc_auc_before}, Precisión: {precision_before}, Recall: {recall_before}, F1-score: {f1_before}")

ROC-AUC: 0.999940938211968, Precisión: 0.9970716435232042, Recall: 1.0, F1-score: 0.9985336748001541
ROC-AUC: 0.9999737131241117, Precisión: 0.9978930877069772, Recall: 1.0, F1-score: 0.9989454329133092


In [5]:
# Cargar el modelo
model = joblib.load('ANN_initial_model.pkl')

roc_auc_before, precision_before, recall_before, f1_before = evaluate_model(model, X_dev_imputed, y_dev)  # Utilizar X_dev_imputed

print(f"ROC-AUC: {roc_auc_before}, Precisión: {precision_before}, Recall: {recall_before}, F1-score: {f1_before}")

# Cargar el modelo
model = joblib.load('ANN_incremental_model.pkl')

roc_auc_before, precision_before, recall_before, f1_before = evaluate_model(model, X_dev_imputed, y_dev)  # Utilizar X_dev_imputed

print(f"ROC-AUC: {roc_auc_before}, Precisión: {precision_before}, Recall: {recall_before}, F1-score: {f1_before}")

ROC-AUC: 0.5, Precisión: 0.5011955677505351, Recall: 1.0, F1-score: 0.6677285471892926


  _warn_prf(average, modifier, msg_start, len(result))


ROC-AUC: 0.5, Precisión: 0.0, Recall: 0.0, F1-score: 0.0


In [6]:
# Cargar el modelo
model = joblib.load('Random Forest_initial_model.pkl')

roc_auc_before, precision_before, recall_before, f1_before = evaluate_model(model, X_dev_imputed, y_dev)  # Utilizar X_dev_imputed

print(f"ROC-AUC: {roc_auc_before}, Precisión: {precision_before}, Recall: {recall_before}, F1-score: {f1_before}")

# Cargar el modelo
model = joblib.load('Random Forest_incremental_model.pkl')

roc_auc_before, precision_before, recall_before, f1_before = evaluate_model(model, X_dev_imputed, y_dev)  # Utilizar X_dev_imputed

print(f"ROC-AUC: {roc_auc_before}, Precisión: {precision_before}, Recall: {recall_before}, F1-score: {f1_before}")

ROC-AUC: 1.0, Precisión: 0.9995400432900433, Recall: 1.0, F1-score: 0.9997699687428115
ROC-AUC: 1.0, Precisión: 1.0, Recall: 1.0, F1-score: 1.0


In [7]:
# Cargar el modelo
model = joblib.load('LightGBM_initial_model.pkl')

roc_auc_before, precision_before, recall_before, f1_before = evaluate_model(model, X_dev_imputed, y_dev)  # Utilizar X_dev_imputed

print(f"ROC-AUC: {roc_auc_before}, Precisión: {precision_before}, Recall: {recall_before}, F1-score: {f1_before}")

# Cargar el modelo
model = joblib.load('LightGBM_incremental_model.pkl')

roc_auc_before, precision_before, recall_before, f1_before = evaluate_model(model, X_dev_imputed, y_dev)  # Utilizar X_dev_imputed

print(f"ROC-AUC: {roc_auc_before}, Precisión: {precision_before}, Recall: {recall_before}, F1-score: {f1_before}")

found 0 physical cores < 1
  File "c:\Users\diego\AppData\Local\Programs\Python\Python311\Lib\site-packages\joblib\externals\loky\backend\context.py", line 245, in _count_physical_cores
    raise ValueError(


ROC-AUC: 0.9996665752013862, Precisión: 0.991479504481821, Recall: 0.9992826787212733, F1-score: 0.99536579858244
ROC-AUC: 0.9996801911944654, Precisión: 0.9915070461305028, Recall: 0.9993909536312698, F1-score: 0.9954333898402192


In [14]:
import numpy as np
from sklearn.metrics import roc_auc_score, precision_score, recall_score, f1_score
import joblib

import tensorflow as tf

# Definir el modelo SVM lineal en TensorFlow
class LinearSVM(tf.keras.Model):
    def __init__(self):
        super(LinearSVM, self).__init__()
        self.dense = tf.keras.layers.Dense(1, activation=None, kernel_regularizer=tf.keras.regularizers.l2(0.01))

    def call(self, inputs):
        return self.dense(inputs)

# Cargar el modelo inicial
initial_model_filename = "SVM_initial_model.pkl"
initial_model = joblib.load(initial_model_filename)

# Cargar el modelo de entrenamiento incremental
incremental_model_filename = "SVM_incremental_model.pkl"
incremental_model = joblib.load(incremental_model_filename)

# Aquí debes cargar tus datos de prueba, X_test_imputed y y_test
# Preprocesa los datos si es necesario

# Definir función evaluate_model (si no se ha definido en este nuevo archivo)
def evaluate_model(model, X, y, threshold=0.5):
    # Predecir probabilidades
    y_pred_proba = model.predict(X)

    # Convertir probabilidades a etiquetas binarias
    y_pred = (y_pred_proba >= threshold).astype(int)

    # Calcular métricas
    roc_auc = roc_auc_score(y, y_pred_proba)
    precision = precision_score(y, y_pred)
    recall = recall_score(y, y_pred)
    f1 = f1_score(y, y_pred)

    return roc_auc, precision, recall, f1

# Evaluar el modelo inicial en los datos de prueba
roc_auc_initial, precision_initial, recall_initial, f1_initial = evaluate_model(initial_model, X_dev_imputed, y_dev)
print("Métricas del modelo inicial:")
print(f"ROC-AUC: {roc_auc_initial}, Precisión: {precision_initial}, Recall: {recall_initial}, F1-score: {f1_initial}")

# Evaluar el modelo de entrenamiento incremental en los datos de prueba
roc_auc_incremental, precision_incremental, recall_incremental, f1_incremental = evaluate_model(incremental_model, X_dev_imputed, y_dev)
print("Métricas del modelo de entrenamiento incremental:")
print(f"ROC-AUC: {roc_auc_incremental}, Precisión: {precision_incremental}, Recall: {recall_incremental}, F1-score: {f1_incremental}")


Métricas del modelo inicial:
ROC-AUC: 0.49299079804473644, Precisión: 0.5011955677505351, Recall: 1.0, F1-score: 0.6677285471892926
Métricas del modelo de entrenamiento incremental:
ROC-AUC: 0.5069611277327026, Precisión: 0.0, Recall: 0.0, F1-score: 0.0


  _warn_prf(average, modifier, msg_start, len(result))
