<a href="https://colab.research.google.com/github/Maynex69/Analisis-de-datos-hospitalarios-y-prediccion-de-medicamentos/blob/main/Modelo_streamlit.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import streamlit as st
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler, OneHotEncoder, LabelEncoder
from sklearn.ensemble import RandomForestClassifier, StackingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.utils import to_categorical
from sklearn.base import BaseEstimator, ClassifierMixin

# Título de la aplicación
st.title("Predicción de Medicamentos Basada en Condición Médica y Edad")

# Cargar archivo CSV
uploaded_file = st.file_uploader("Sube tu archivo CSV", type="csv")

if uploaded_file is not None:
    df = pd.read_csv(uploaded_file)

    # Mostrar primeras filas del DataFrame
    st.write("Vista previa de los datos:")
    st.write(df.head())

    # Selecciona solo las columnas que deseas conservar
    df = df[['Age', 'Medication', 'Medical Condition']]

    # Obtener los 5 medicamentos más comunes
    top_5_medications = df['Medication'].value_counts().index[:10]

    # Filtrar el DataFrame para incluir solo esos medicamentos
    df_reduced = df[df['Medication'].isin(top_5_medications)]

    # Preparar los datos
    X = df_reduced[['Age', 'Medical Condition']]
    y = df_reduced['Medication']

    label_encoder = LabelEncoder()
    y_encoded = label_encoder.fit_transform(y)
    y_encoded_cat = to_categorical(y_encoded)

    X_numeric = X[['Age']].values
    X_categorical = X[['Medical Condition']].values

    scaler = StandardScaler()
    X_numeric_scaled = scaler.fit_transform(X_numeric)

    encoder = OneHotEncoder()
    X_categorical_encoded = encoder.fit_transform(X_categorical).toarray()

    X_prepared = np.concatenate([X_numeric_scaled, X_categorical_encoded], axis=1)

    X_train, X_test, y_train, y_test = train_test_split(X_prepared, y_encoded, test_size=0.4, random_state=42)

    def create_nn_model(input_dim):
        model = Sequential()
        model.add(Dense(128, input_dim=input_dim, activation='relu'))
        model.add(Dropout(0.3))
        model.add(Dense(64, activation='relu'))
        model.add(Dense(len(label_encoder.classes_), activation='softmax'))
        model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
        return model

    class KerasModelWrapper(BaseEstimator, ClassifierMixin):
        def __init__(self, epochs=50, batch_size=32):
            self.epochs = epochs
            self.batch_size = batch_size
            self.model = None
            self.classes_ = None

        def fit(self, X, y):
            self.classes_ = np.unique(y)
            self.model = create_nn_model(X.shape[1])
            self.model.fit(X, y, epochs=self.epochs, batch_size=self.batch_size, verbose=0)
            return self

        def predict(self, X):
            predictions = self.model.predict(X)
            return np.argmax(predictions, axis=1)

        def predict_proba(self, X):
            return self.model.predict(X)

    rf = RandomForestClassifier(random_state=42)

    param_grid = {
        'n_estimators': [50, 100, 200],
        'max_depth': [None, 10, 20, 30],
        'min_samples_split': [2, 5, 10]
    }

    grid_search = GridSearchCV(estimator=rf, param_grid=param_grid, cv=3, n_jobs=-1, scoring='accuracy')
    grid_search.fit(X_train, y_train)
    best_rf = grid_search.best_estimator_

    nn_model = KerasModelWrapper(epochs=50, batch_size=32)

    estimators = [
        ('rf', best_rf),
        ('nn', nn_model)
    ]

    stacking_clf = StackingClassifier(estimators=estimators, final_estimator=LogisticRegression())

    stacking_clf.fit(X_train, y_train)

    stacking_accuracy = stacking_clf.score(X_test, y_test)

    st.write("Precisión del modelo de Stacking en el conjunto de prueba:", stacking_accuracy)
