# 🧠 Análisis de la Readmisión a la UCI en Pacientes con Hemorragia Intracerebral (MIMIC-IV)

## 1. 🔧 Configuración Inicial

In [3]:
from google.cloud import bigquery
from google.oauth2 import service_account

# Path to your service account key file
SERVICE_ACCOUNT_FILE = "mimic-sergi.json"
PROJECT_ID = "ogi-uci-i61"

# Create credentials and client
credentials = service_account.Credentials.from_service_account_file(
    SERVICE_ACCOUNT_FILE)

client = bigquery.Client(credentials=credentials, project=PROJECT_ID)

# Example query
query = "SELECT 'Hello, World!' AS greeting"
query_job = client.query(query)

# Fetch results
results = query_job.result()

for row in results:
    print(row.greeting)



Hello, World!


## 2. 📦 Extracción de Datos

In [4]:
query = """
SELECT
  a.subject_id,
  a.hadm_id,
  a.stay_id,
  p.gender,
  a.intime,
  a.outtime,
  i.icd_code,
  d.icd_version,
  d.long_title,
  s.gcs_verbal,
  s.gcs_motor,
  s.gcs_eyes,
  vitals.heart_rate,
  vitals.mbp,
  vitals.resp_rate,
  vitals.temperature,
  vitals.spo2,
  readmit.stay_id AS readmitted_stay
FROM
  `ogi-uci-i61.mimiciv_icu.icustays` a
JOIN
  `ogi-uci-i61.mimiciv_hosp.patients` p
  ON a.subject_id = p.subject_id
JOIN
  `ogi-uci-i61.mimiciv_hosp.diagnoses_icd` i
  ON a.hadm_id = i.hadm_id
JOIN
  `ogi-uci-i61.mimiciv_hosp.d_icd_diagnoses` d
  ON i.icd_code = d.icd_code AND i.icd_version = d.icd_version
LEFT JOIN (
  -- Select first GCS per stay
  SELECT
    stay_id,
    gcs_verbal,
    gcs_motor,
    gcs_eyes,
    ROW_NUMBER() OVER (PARTITION BY stay_id ORDER BY charttime) AS rn
  FROM
    `ogi-uci-i61.mimiciv_derived.gcs`
) s
  ON a.stay_id = s.stay_id AND s.rn = 1
LEFT JOIN (
  -- Select first vitals per stay
  SELECT
    stay_id,
    heart_rate,
    mbp,
    resp_rate,
    temperature,
    spo2,
    ROW_NUMBER() OVER (PARTITION BY stay_id ORDER BY charttime) AS rn
  FROM
    `ogi-uci-i61.mimiciv_derived.vitalsign`
) vitals
  ON a.stay_id = vitals.stay_id AND vitals.rn = 1
LEFT JOIN (
  -- Identify next stay as readmission
  SELECT
    a1.subject_id,
    a1.stay_id,
    MIN(a2.stay_id) AS readmitted_stay
  FROM
    `ogi-uci-i61.mimiciv_icu.icustays` a1
  JOIN
    `ogi-uci-i61.mimiciv_icu.icustays` a2
    ON a1.subject_id = a2.subject_id
    AND a2.intime > a1.outtime
  GROUP BY
    a1.subject_id, a1.stay_id
) readmit
  ON a.subject_id = readmit.subject_id AND a.stay_id = readmit.stay_id
WHERE
  LOWER(d.long_title) LIKE '%intracerebral hemorrhage%'
"""
df = client.query(query).to_dataframe()



## 3. 🧽 Preprocesamiento

In [5]:
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

# Inspect readmitted_stay
print("Nulls in readmitted_stay before fillna:", df['readmitted_stay'].isnull().sum())
print("readmitted_stay summary:", df['readmitted_stay'].describe())

# Create readmitted (adjust based on readmitted_stay's meaning)
df['readmitted'] = df['readmitted_stay'].notnull().astype(int)  # Or (df['readmitted_stay'] > 0).astype(int)

# Verify readmitted
print("Class distribution in readmitted:", df['readmitted'].value_counts())
if len(df['readmitted'].unique()) < 2:
    raise ValueError("readmitted has only one class. Redefine the target.")

# Identify integer columns
int_columns = df.select_dtypes(include=['Int64', 'int64']).columns

# Fill NaNs with rounded medians for integer columns
for col in int_columns:
    median_val = df[col].median()
    if not pd.isna(median_val):
        df[col] = df[col].fillna(int(median_val))

# Fill NaNs for other numeric columns, excluding readmitted_stay
numeric_cols = df.select_dtypes(include='number').columns.difference(['readmitted_stay'])
df[numeric_cols] = df[numeric_cols].fillna(df[numeric_cols].median())

# Handle readmitted_stay separately
df['readmitted_stay'] = df['readmitted_stay'].fillna(0)

# Scale features

scaler = StandardScaler()
# Select numeric columns, excluding readmitted_stay and readmitted
numeric_cols = df.select_dtypes(include='number').columns.difference(['readmitted_stay', 'readmitted'])
features_scaled = scaler.fit_transform(df[numeric_cols])

Nulls in readmitted_stay before fillna: 3222
readmitted_stay summary: count              938.0
mean     35030430.127932
std       2878265.870109
min           30024491.0
25%           32506322.0
50%           34949251.5
75%          37500054.75
max           39979862.0
Name: readmitted_stay, dtype: Float64
Class distribution in readmitted: readmitted
0    3222
1     938
Name: count, dtype: int64


## 4. 📊 Modelado Predictivo

In [6]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, roc_auc_score

X = features_scaled
y = df['readmitted']

X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y)

model = RandomForestClassifier(n_estimators=100)
model.fit(X_train, y_train)

y_pred = model.predict(X_test)
y_proba = model.predict_proba(X_test)[:,1]

print(classification_report(y_test, y_pred))
print("AUC:", roc_auc_score(y_test, y_proba))

              precision    recall  f1-score   support

           0       0.79      0.98      0.88       806
           1       0.69      0.12      0.21       234

    accuracy                           0.79      1040
   macro avg       0.74      0.55      0.54      1040
weighted avg       0.77      0.79      0.73      1040

AUC: 0.6065168289113699


## 5. 📈 Visualización de Resultados

In [None]:
import matplotlib.pyplot as plt
from sklearn.metrics import roc_curve

fpr, tpr, _ = roc_curve(y_test, y_proba)
plt.plot(fpr, tpr)
plt.xlabel("FPR")
plt.ylabel("TPR")
plt.title("Curva ROC")
plt.show()

## 6. 🚀 Despliegue (Futuro)
Se puede utilizar Hugging Face Spaces o Streamlit Cloud para desplegar un frontend que permita:
- Cargar variables clínicas
- Obtener un score de riesgo
- Visualizar el gráfico ROC o una matriz de confusión