# Entrenamiento de los modelos

## Librerías utilizadas

In [None]:
from sklearn.svm import SVC
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score, f1_score
import numpy as np
from sklearn.ensemble import RandomForestClassifier
import joblib
from sklearn.linear_model import LogisticRegression


In [21]:
# Carga de datos
path = "dim_reduction/"

# Cargar datos de PCA
data = np.load(path + 'pca_reduced.npz')
X_train_pca = data['X_train']
y_train = data['y_train']
X_test_pca = data['X_test']
y_test = data['y_test']

# Cargar datos de SVD
data_svd = np.load(path + 'svd_reduced.npz')
X_train_svd = data_svd['X_train']
X_test_svd = data_svd['X_test']

# Cargar datos de FA
data_fa = np.load(path + 'fa_reduced.npz')
X_train_fa = data_fa['X_train']
X_test_fa = data_fa['X_test']

# Cargar datos de LDA
data_rp = np.load(path + 'lda_reduced.npz')
X_train_rp = data_rp['X_train']
X_test_rp = data_rp['X_test']

# Cargar datos de JL
data_jl = np.load(path + 'jl_reduced.npz')
X_train_jl = data_jl['X_train']
X_test_jl = data_jl['X_test']

## Entrenamiento del modelo

### Función de resultados

In [20]:
def showResults(y_test, y_pred):
    print(f"\n{'='*60}")
    print("RESULTADOS EN TEST SET (Split por Documento)")
    print(f"{'='*60}")
    print(f"\nAccuracy: {accuracy_score(y_test, y_pred):.4f}")

    print("\n--- Classification Report ---")
    print(classification_report(y_test, y_pred, target_names=['IA', 'Humano'], digits=4))

    print("\n--- Confusion Matrix ---")
    cm = confusion_matrix(y_test, y_pred)
    print(cm)
    print(f"\nInterpretación:")
    print(f"  TN (IA correctamente clasificada): {cm[0,0]}")
    print(f"  FP (IA clasificada como Humano): {cm[0,1]}")
    print(f"  FN (Humano clasificado como IA): {cm[1,0]}")
    print(f"  TP (Humano correctamente clasificado): {cm[1,1]}")

### SVM

In [None]:
trainX, testX = X_train_pca, X_test_pca
# trainX, testX = x_train_lda, x_test_lda
# trainX, testX = x_train_fa, x_test_fa
# trainX, testX = x_train_svd, x_test_svd
# trainX, testX = x_train_jl, x_test_jl

print("="*60)

svm_model = SVC(kernel='rbf', C=10, gamma='scale', random_state=42)
svm_model.fit(trainX, y_train)

y_pred = svm_model.predict(testX)

print("Modelo entrenado exitosamente")



In [None]:
showResults(y_test, y_pred)
model_filename = f'svm_model_C{C}_pca.pkl'
joblib.dump(svm_model, model_filename)
print(f"Modelo guardado como {model_filename}")

### Random Forest

In [None]:
trainX, testX = X_train_pca, X_test_pca
# trainX, testX = x_train_lda, x_test_lda
# trainX, testX = x_train_fa, x_test_fa
# trainX, testX = x_train_svd, x_test_svd
# trainX, testX = x_train_jl, x_test_jl

n_estimators = 100

random_forest_model = RandomForestClassifier(n_estimators=n_estimators, random_state=42)
random_forest_model.fit(trainX, y_train)
y_pred = random_forest_model.predict(testX)

In [None]:
showResults(y_test, y_pred)
# Guardar el modelo entrenado
import joblib
model_filename = f'rf_model_n{n_estimators}_pca.pkl'
joblib.dump(random_forest_model, model_filename)
print(f"Modelo guardado como {model_filename}")


RESULTADOS EN TEST SET (Split por Documento)

Accuracy: 0.6418

Macro F1: 0.6146

--- Classification Report ---
              precision    recall  f1-score   support

          IA     0.6368    0.4283    0.5122      1130
      Humano     0.6439    0.8089    0.7170      1444

    accuracy                         0.6418      2574
   macro avg     0.6404    0.6186    0.6146      2574
weighted avg     0.6408    0.6418    0.6271      2574


--- Confusion Matrix ---
[[ 484  646]
 [ 276 1168]]

Interpretación:
  TN (IA correctamente clasificada): 484
  FP (IA clasificada como Humano): 646
  FN (Humano clasificado como IA): 276
  TP (Humano correctamente clasificado): 1168


### Regresión logística

In [None]:
trainX, testX = X_train_pca, X_test_pca
# trainX, testX = x_train_lda, x_test_lda
# trainX, testX = x_train_fa, x_test_fa
# trainX, testX = x_train_svd, x_test_svd
# trainX, testX = x_train_jl, x_test_jl

logistic_model = LogisticRegression(max_iter=1000, random_state=42)
logistic_model.fit(trainX, y_train)
y_pred = logistic_model.predict(testX)

In [None]:
showResults(y_test, y_pred)
# Guardar el modelo entrenado
import joblib
model_filename = f'rf_model_n{n_estimators}_pca.pkl'
joblib.dump(random_forest_model, model_filename)
print(f"Modelo guardado como {model_filename}")

In [None]:
# # Guardar el modelo entrenado
# import joblib
# model_filename = f'svm_model_C{C}_pca.pkl'
# joblib.dump(svm_model, model_filename)
# print(f"Modelo guardado como {model_filename}")

In [None]:
# Cargar el modelo entrenado
# loaded_model = joblib.load(model_filename)
# y_pred_loaded = loaded_model.predict(testX)

# Ejemplo de uso del modelo cargado
# showResults(y_test, y_pred_loaded)