In [None]:
# 1. Cargar los datos
import pandas as pd

hospitaldf = pd.read_csv('hospital_data.csv')
studentsdf = pd.read_csv('students_data.csv')

# 2. Exploración inicial
print('Hospital DataFrame:')
display(hospitaldf.head())
display(hospitaldf.describe(include='all'))

print('Students DataFrame:')
display(studentsdf.head())
display(studentsdf.describe(include='all'))

# 3. Análisis de la variable objetivo
import matplotlib.pyplot as plt

print('Distribución Risk_Level (hospitaldf):')
print(hospitaldf['Risk_Level'].value_counts())
hospitaldf['Risk_Level'].value_counts().plot(kind='bar', title='Distribución Risk_Level')
plt.show()

if 'Result' in studentsdf.columns:
    print('Distribución Result (studentsdf):')
    print(studentsdf['Result'].value_counts())
    studentsdf['Result'].value_counts().plot(kind='bar', title='Distribución Result')
    plt.show()
else:
    print('No se encontró la columna Result en studentsdf.')

# 4. Preprocesamiento
from sklearn.preprocessing import LabelEncoder, StandardScaler

# Codificar categóricas hospitaldf
df_hosp = hospitaldf.copy()
for col in df_hosp.select_dtypes(include='object').columns:
    if col != 'Risk_Level':
        df_hosp[col] = LabelEncoder().fit_transform(df_hosp[col])
# Codificar target hospitaldf
le_hosp = LabelEncoder()
df_hosp['Risk_Level'] = le_hosp.fit_transform(df_hosp['Risk_Level'])

# Codificar categóricas studentsdf
df_stud = studentsdf.copy()
for col in df_stud.select_dtypes(include='object').columns:
    if col != 'Result':
        df_stud[col] = LabelEncoder().fit_transform(df_stud[col])
# Codificar target studentsdf
if 'Result' in df_stud.columns:
    le_stud = LabelEncoder()
    df_stud['Result'] = le_stud.fit_transform(df_stud['Result'])

# Normalizar numéricas
def normalize(df, exclude):
    scaler = StandardScaler()
    num_cols = df.select_dtypes(include=['int64', 'float64']).columns.difference([exclude])
    df[num_cols] = scaler.fit_transform(df[num_cols])
    return df

df_hosp = normalize(df_hosp, 'Risk_Level')
df_stud = normalize(df_stud, 'Result')

# 5. División de datos
from sklearn.model_selection import train_test_split

X_hosp = df_hosp.drop('Risk_Level', axis=1)
y_hosp = df_hosp['Risk_Level']
Xh_train, Xh_test, yh_train, yh_test = train_test_split(X_hosp, y_hosp, test_size=0.2, random_state=42)

if 'Result' in df_stud.columns:
    X_stud = df_stud.drop('Result', axis=1)
    y_stud = df_stud['Result']
    Xs_train, Xs_test, ys_train, ys_test = train_test_split(X_stud, y_stud, test_size=0.2, random_state=42)
else:
    Xs_train = Xs_test = ys_train = ys_test = None

# 6. Entrenamiento de modelos
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier

# Hospital
log_hosp = LogisticRegression(max_iter=1000)
log_hosp.fit(Xh_train, yh_train)
dt_hosp = DecisionTreeClassifier(random_state=42)
dt_hosp.fit(Xh_train, yh_train)

# Students
if Xs_train is not None:
    log_stud = LogisticRegression(max_iter=1000)
    log_stud.fit(Xs_train, ys_train)
    dt_stud = DecisionTreeClassifier(random_state=42)
    dt_stud.fit(Xs_train, ys_train)
else:
    log_stud = dt_stud = None

# 7. Evaluación de modelos
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix, classification_report

def eval_model(model, X, y, target_name):
    y_pred = model.predict(X)
    print(f"\nEvaluación para {target_name}:")
    print("Accuracy:", accuracy_score(y, y_pred))
    print("Precision:", precision_score(y, y_pred, average='weighted'))
    print("Recall:", recall_score(y, y_pred, average='weighted'))
    print("F1-score:", f1_score(y, y_pred, average='weighted'))
    print("Matriz de confusión:\n", confusion_matrix(y, y_pred))
    print(classification_report(y, y_pred))

print("\n--- Modelos Hospital Data ---")
eval_model(log_hosp, Xh_test, yh_test, 'Logistic Regression (Hospital)')
eval_model(dt_hosp, Xh_test, yh_test, 'Decision Tree (Hospital)')

if Xs_test is not None:
    print("\n--- Modelos Students Data ---")
    eval_model(log_stud, Xs_test, ys_test, 'Logistic Regression (Students)')
    eval_model(dt_stud, Xs_test, ys_test, 'Decision Tree (Students)')
else:
    print('No se puede evaluar modelos para studentsdf: falta columna Result.')

# 8. Comparación de modelos
print("\n--- Comparación de modelos ---")
print("Analiza las métricas de cada modelo (accuracy, precision, recall, F1-score) y justifica tu elección según el mejor desempeño y la interpretación de los resultados para cada conjunto de datos.")