# Entrenamiento del Modelo de Detección de Fraude
Este notebook carga el dataset, entrena el modelo y lo guarda para su uso en el backend.

In [None]:
import pandas as pd
import numpy as np
import pickle
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report

# 1. Cargar datos
url = "https://raw.githubusercontent.com/nsethi31/Kaggle-Data-Credit-Card-Fraud-Detection/master/creditcard.csv"
try:
    df = pd.read_csv(url)
    print("Dataset cargado remotamente.")
except:
    print("Error cargando URL, generando datos sintéticos...")
    # Generar datos dummy si falla
    np.random.seed(42)
    n_samples = 10000
    data = {f'V{i}': np.random.normal(0, 1, n_samples) for i in range(1, 29)}
    data['Time'] = np.random.normal(0, 1, n_samples)
    data['Amount'] = np.random.exponential(100, n_samples)
    df = pd.DataFrame(data)
    df['Class'] = 0
    fraud_idx = np.random.choice(n_samples, int(n_samples*0.01), replace=False)
    df.loc[fraud_idx, 'Class'] = 1

# 2. Preprocesamiento
scaler = StandardScaler()
df['Amount'] = scaler.fit_transform(df[['Amount']])
if 'Time' in df.columns:
    df.drop('Time', axis=1, inplace=True)

X = df.drop('Class', axis=1)
y = df['Class']

# 3. Split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# 4. Entrenar
model = LogisticRegression(max_iter=1000)
model.fit(X_train, y_train)

# 5. Evaluar
y_pred = model.predict(X_test)
print("Accuracy:", accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred))

# 6. Guardar Modelo
with open('model.pkl', 'wb') as f:
    pickle.dump(model, f)
with open('scaler.pkl', 'wb') as f:
    pickle.dump(scaler, f)
print("Modelo y scaler guardados como model.pkl y scaler.pkl")