### Bibliotecas

In [24]:
import pandas as pd
import json
import logging
import os
from cryptography.fernet import Fernet
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.neural_network import MLPClassifier  # Red Neuronal en Scikit-learn
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
import streamlit as st
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
from sklearn.decomposition import PCA
from sklearn.manifold import TSNE
import umap.umap_ as umap

### Auditorias

In [25]:
logging.basicConfig(filename="audit_log.txt", level=logging.INFO, format="%(asctime)s - %(message)s")

### Roles

In [26]:
config = {
    "admin": {"access_level": "full"},
    "user": {"access_level": "restricted"}
}

with open("config.json", "w") as f:
    json.dump(config, f)

# Cargar configuración de roles
with open("config.json", "r") as f:
    roles = json.load(f)

### Claves cifrado

In [27]:
key = Fernet.generate_key()
cipher_suite = Fernet(key)

### Verificacion

In [28]:
def check_access(role):
    """Verifica los permisos de los roles."""
    if role not in roles:
        logging.warning(f"Acceso denegado: Rol {role} no registrado")
        raise PermissionError("Acceso denegado: Rol no registrado")
    return roles[role]["access_level"]

def encrypt_data(data):
    """Cifra datos sensibles."""
    return cipher_suite.encrypt(data.encode()).decode()

def decrypt_data(encrypted_data):
    """Descifra datos sensibles."""
    return cipher_suite.decrypt(encrypted_data.encode()).decode()

### Backup

In [29]:
def backup_data(df, filename="backup_data.csv"):
    """Realiza un backup de los datos."""
    df.to_csv(filename, index=False)
    logging.info(f"Backup realizado con éxito en {filename}")

### Extraccion y limpieza

In [30]:
url = "https://raw.githubusercontent.com/Gabriellogan11/test/main/emp_attrition.csv"
df = pd.read_csv(url)

logging.info("Datos extraidos con éxito.")

# Ejecutamos el backup
backup_data(df)

### Cifrado

In [31]:
df["MonthlyIncome"] = df["MonthlyIncome"].astype(str).apply(encrypt_data)
logging.info("Datos cifrados correctamente.")

### Guardar datos procesados

In [32]:
df.to_csv("datos_procesados.csv", index=False)
logging.info("Datos procesados guardados en datos_procesados.csv")

### Streamlit

In [33]:
# Crear una aplicación Streamlit
st.title("Data Pipeline con Random Forest, SVM y Red Neuronal")

# Selección de la variable objetivo
target_column = st.selectbox("Selecciona la variable objetivo", df.columns)

# Actualizar los datos para el modelado
if df[target_column].dtype == 'object':
    unique_values = df[target_column].unique()
    if len(unique_values) == 2:
        df[target_column] = df[target_column].map({unique_values[0]: 0, unique_values[1]: 1})
    else:
        st.error("La columna seleccionada no es binaria (debe tener exactamente 2 valores únicos).")
        st.stop()

numeric_columns = df.select_dtypes(include=['number']).columns
X = df[numeric_columns].drop(columns=[target_column])  # Excluir la columna seleccionada como target
y = df[target_column]

# Dividir los datos en conjuntos de entrenamiento y prueba
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, random_state=42)

# Escalar características
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

logging.info("Datos escalados correctamente.")



### Entrenamiento Random Forest

In [34]:
# Crear y entrenar el modelo
rf_model = RandomForestClassifier(n_estimators=100, random_state=42)
rf_model.fit(X_train, y_train)

# Realizar predicciones
y_pred_rf = rf_model.predict(X_test)

# Evaluar el modelo
accuracy_rf = accuracy_score(y_test, y_pred_rf)
report_rf = classification_report(y_test, y_pred_rf)

# Matriz de confusión para Random Forest
cm_rf = confusion_matrix(y_test, y_pred_rf)


Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.



### Entrenamiento SVM

In [35]:
# Crear y entrenar el modelo SVM
svm_model = SVC(kernel='linear', random_state=42)
svm_model.fit(X_train, y_train)

# Realizar predicciones
y_pred_svm = svm_model.predict(X_test)

# Evaluar el modelo
accuracy_svm = accuracy_score(y_test, y_pred_svm)
report_svm = classification_report(y_test, y_pred_svm)

# Matriz de confusión para SVM
cm_svm = confusion_matrix(y_test, y_pred_svm)


Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.



### Entrenamiento de la Red Neuronal (MLPClassifier)

In [36]:
# Crear y entrenar el modelo
nn_model = MLPClassifier(hidden_layer_sizes=(64, 32), activation='relu', solver='adam', max_iter=1000, random_state=42)
nn_model.fit(X_train, y_train)

# Realizar predicciones
y_pred_nn = nn_model.predict(X_test)

# Evaluar el modelo
accuracy_nn = accuracy_score(y_test, y_pred_nn)
report_nn = classification_report(y_test, y_pred_nn)

# Matriz de confusión para la Red Neuronal
cm_nn = confusion_matrix(y_test, y_pred_nn)


Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.



### Funciones para gráficos

In [37]:
def plot_decision_boundary_2d(model, X, y, title):
    """Grafica la frontera de decisión en 2D usando PCA."""
    pca = PCA(n_components=2)
    X_pca = pca.fit_transform(X)
    
    # Crear un meshgrid para la frontera de decisión
    x_min, x_max = X_pca[:, 0].min() - 1, X_pca[:, 0].max() + 1
    y_min, y_max = X_pca[:, 1].min() - 1, X_pca[:, 1].max() + 1
    xx, yy = np.meshgrid(np.arange(x_min, x_max, 0.1),
                         np.arange(y_min, y_max, 0.1))
    
    # Predecir la clase para cada punto en el meshgrid
    mesh_points = np.c_[xx.ravel(), yy.ravel()]
    Z = model.predict(pca.inverse_transform(mesh_points))
    Z = Z.reshape(xx.shape)
    
    # Crear el gráfico 2D
    fig = go.Figure(data=[
        go.Scatter(x=X_pca[:, 0], y=X_pca[:, 1], mode='markers',
                  marker=dict(size=8, color=y, colorscale='Viridis', opacity=0.8)),
        go.Contour(x=np.arange(x_min, x_max, 0.1), y=np.arange(y_min, y_max, 0.1), z=Z,
                  colorscale='Blues', opacity=0.3, showscale=False)
    ])
    fig.update_layout(title=title, xaxis_title="PC1", yaxis_title="PC2")
    return fig

def plot_decision_boundary_3d(model, X, y, title):
    """Grafica la frontera de decisión en 3D usando PCA."""
    pca = PCA(n_components=3)
    X_pca = pca.fit_transform(X)
    
    # Crear un meshgrid en 3D
    x_min, x_max = X_pca[:, 0].min() - 1, X_pca[:, 0].max() + 1
    y_min, y_max = X_pca[:, 1].min() - 1, X_pca[:, 1].max() + 1
    z_min, z_max = X_pca[:, 2].min() - 1, X_pca[:, 2].max() + 1
    xx, yy, zz = np.meshgrid(np.arange(x_min, x_max, 0.1),
                             np.arange(y_min, y_max, 0.1),
                             np.arange(z_min, z_max, 0.1))
    
    # Predecir la clase para cada punto en el meshgrid
    mesh_points = np.c_[xx.ravel(), yy.ravel(), zz.ravel()]
    Z = model.predict(pca.inverse_transform(mesh_points))
    Z = Z.reshape(xx.shape)
    
    # Crear el gráfico 3D
    fig = go.Figure(data=[
        go.Scatter3d(x=X_pca[:, 0], y=X_pca[:, 1], z=X_pca[:, 2], mode='markers',
                     marker=dict(size=5, color=y, colorscale='Viridis', opacity=0.8)),
        go.Isosurface(x=xx.flatten(), y=yy.flatten(), z=zz.flatten(), value=Z.flatten(),
                      isomin=0.5, isomax=1.5, surface_count=1, opacity=0.3, colorscale='Blues')
    ])
    fig.update_layout(scene=dict(xaxis_title='PC1', yaxis_title='PC2', zaxis_title='PC3'),
                      title=title)
    return fig

def plot_scatter_2d(X, y, title):
    """Gráfico de dispersión en 2D."""
    pca = PCA(n_components=2)
    X_pca = pca.fit_transform(X)
    fig = px.scatter(x=X_pca[:, 0], y=X_pca[:, 1], color=y, title=title,
                     labels={"x": "Componente 1", "y": "Componente 2"})
    return fig

def plot_scatter_3d(X, y, title):
    """Gráfico de dispersión en 3D."""
    pca = PCA(n_components=3)
    X_pca = pca.fit_transform(X)
    fig = px.scatter_3d(x=X_pca[:, 0], y=X_pca[:, 1], z=X_pca[:, 2], color=y, title=title,
                        labels={"x": "Componente 1", "y": "Componente 2", "z": "Componente 3"})
    return fig

def plot_tsne(X, y, title):
    """Gráfico de t-SNE para reducción de dimensionalidad."""
    tsne = TSNE(n_components=2, random_state=42)
    X_tsne = tsne.fit_transform(X)
    fig = px.scatter(x=X_tsne[:, 0], y=X_tsne[:, 1], color=y, title=title,
                     labels={"x": "t-SNE 1", "y": "t-SNE 2"})
    return fig

def plot_umap(X, y, title):
    """Gráfico de UMAP para reducción de dimensionalidad."""
    reducer = umap.UMAP(random_state=42)  # Configura UMAP
    X_umap = reducer.fit_transform(X)  # Aplica UMAP a los datos
    fig = px.scatter(x=X_umap[:, 0], y=X_umap[:, 1], color=y, title=title,
                     labels={"x": "UMAP 1", "y": "UMAP 2"})
    return fig

### Streamlit settings

In [38]:
tab1, tab2, tab3 = st.tabs(["Random Forest", "SVM", "Red Neuronal"])

# Función para mostrar gráficos en cada pestaña
def display_model_tab(model, X_train, y_train, X_test, y_test, accuracy, report, cm, model_name):
    st.header(model_name)
    st.write(f"Precisión del modelo {model_name}:", accuracy)
    st.write(f"Reporte de clasificación {model_name}:")
    st.text(report)

    # Mostrar matriz de confusión
    st.subheader(f"Matriz de Confusión - {model_name}")
    fig_cm = px.imshow(cm, text_auto=True, labels=dict(x="Predicción", y="Real", color="Cantidad"),
                       x=model.classes_, y=model.classes_)
    st.plotly_chart(fig_cm)

    # Selección de gráfico
    st.subheader(f"Selecciona el tipo de gráfico para {model_name}")
    graph_type = st.selectbox("Tipo de gráfico", 
                              ["2D - PCA", "3D - PCA", "t-SNE", "UMAP", "Frontera de Decisión 2D", "Frontera de Decisión 3D"],
                              key=f"{model_name}_graph")

    if graph_type == "2D - PCA":
        st.subheader(f"Gráfico de Dispersión 2D (PCA) - {model_name}")
        fig = plot_scatter_2d(X_train, y_train, f"Dispersión 2D (PCA) - {model_name}")
        st.plotly_chart(fig)
    elif graph_type == "3D - PCA":
        st.subheader(f"Gráfico de Dispersión 3D (PCA) - {model_name}")
        fig = plot_scatter_3d(X_train, y_train, f"Dispersión 3D (PCA) - {model_name}")
        st.plotly_chart(fig)
    elif graph_type == "t-SNE":
        st.subheader(f"Gráfico de t-SNE - {model_name}")
        fig = plot_tsne(X_train, y_train, f"t-SNE - {model_name}")
        st.plotly_chart(fig)
    elif graph_type == "UMAP":
        st.subheader(f"Gráfico de UMAP - {model_name}")
        fig = plot_umap(X_train, y_train, f"UMAP - {model_name}")
        st.plotly_chart(fig)
    elif graph_type == "Frontera de Decisión 2D":
        st.subheader(f"Frontera de Decisión 2D - {model_name}")
        fig = plot_decision_boundary_2d(model, X_train, y_train, f"Frontera de Decisión 2D - {model_name}")
        st.plotly_chart(fig)
    elif graph_type == "Frontera de Decisión 3D":
        st.subheader(f"Frontera de Decisión 3D - {model_name}")
        fig = plot_decision_boundary_3d(model, X_train, y_train, f"Frontera de Decisión 3D - {model_name}")
        st.plotly_chart(fig)

# Contenido de la pestaña de Random Forest
with tab1:
    display_model_tab(rf_model, X_train, y_train, X_test, y_test, accuracy_rf, report_rf, cm_rf, "Random Forest")

# Contenido de la pestaña de SVM
with tab2:
    display_model_tab(svm_model, X_train, y_train, X_test, y_test, accuracy_svm, report_svm, cm_svm, "SVM")

# Contenido de la pestaña de Red Neuronal
with tab3:
    display_model_tab(nn_model, X_train, y_train, X_test, y_test, accuracy_nn, report_nn, cm_nn, "Red Neuronal")



### Convertir Notebook a .py

In [1]:
!jupyter nbconvert --to script modeloST2.ipynb

[NbConvertApp] Converting notebook modeloST2.ipynb to script
[NbConvertApp] Writing 11884 bytes to modeloST2.py


### Correr el streamlit

In [2]:
!streamlit run modeloST2.py

^C
