## **Limpieza de los archivos Excel de "Emergencias UNGRD"**
----

#### ***Importación de paquetes***

In [73]:
import pandas as pd

### ***Cargar los datasets mediante Pandas***

In [74]:
df_2004 = pd.read_excel('../Excel (2003 - 2018, 2023)/EMERGENCIAS-2004.xls', sheet_name='REPORTE DE EMERGENCIAS', skiprows=1)
df_2004

Unnamed: 0,FECHA,DEPTO,MUNICIPIO,EVENTO,MUERTOS,HERIDOS,DESAPA.,PERSONAS,FAMILIAS,VIV.DESTRU.,...,VALOR.29,VALOR TOTAL.1,CANTIDAD.29,VALOR.30,CANTIDAD.30,VALOR.31,CANTIDAD.31,VALOR.32,Unnamed: 99,Unnamed: 100
0,2004-01-01 00:00:00,CAUCA,CAJIBIO,VENDAVAL,,,,75.0,15.0,,...,,0.0,,,,,226.0,2592220.0,,
1,2004-01-01 00:00:00,ATLANTICO,BARRANQUILLA,VENDAVAL,,,,,,,...,,0.0,,,,,3000.0,65016840.0,,
2,2004-01-01 00:00:00,CAUCA,PIENDAMO,VENDAVAL,,,,1900.0,380.0,,...,,0.0,,,,,2000.0,26400000.0,,
3,2004-01-07 00:00:00,CAUCA,MORALES,VENDAVAL,,,,410.0,82.0,,...,,0.0,,,83.0,2697500.0,1100.0,12211100.0,,
4,2004-01-07 00:00:00,BOLIVAR,SAN CRISTOBAL,INUNDACION,,,,1000.0,200.0,,...,,11000000.0,,,200.0,6700000.0,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8094,,,,,,,,,,,...,,,,,,,,,,
8095,,,,,,,,,,,...,,,,,,,,,,
8096,,,,,,,,,,,...,,,,,,,,,,
8097,,,,,,,,,,,...,,,,,,,,,,


## **Procesamiento de los datos**
---


### ***Declarar función para realizar la limpieza***

In [75]:
def limpiar_datos_emergencias(filepath, end_row, sheet_name='REPORTE DE EMERGENCIAS', skip_rows=1):
    # Leer el archivo Excel
    df = pd.read_excel(filepath, sheet_name=sheet_name, skiprows=skip_rows)
    
    # Seleccionar filas y columnas relevantes
    df = df[0:end_row]  # Restar 1 y el valor de skip_rows para que incluya la fila deseada
    df = df[["FECHA", "DEPTO", "MUNICIPIO", "EVENTO", "MUERTOS", "HERIDOS", "DESAPA.", "PERSONAS", "FAMILIAS", "VIV.DESTRU.", "VIAS", "HECTAREAS"]]
    
    # Renombrar columnas
    df = df.rename(columns={
        'DEPTO': 'DEPARTAMENTO',
        'MUERTOS': 'FALLECIDOS',
        'DESAPA.': 'DESAPARECIDOS',
        'VIV.DESTRU.': 'VIVIENDAS'
    })
    
    # Columnas a limpiar y convertir
    columnas_a_convertir = ["FALLECIDOS", "HERIDOS", "DESAPARECIDOS", "PERSONAS", "FAMILIAS", "VIVIENDAS", "VIAS", "HECTAREAS"]

    # Rellenar valores faltantes con cero y reemplazar '-' por '0' solo en las columnas especificadas
    df[columnas_a_convertir] = df[columnas_a_convertir].fillna(0).replace('-', '0').replace(' ', '0').replace('                    ', '0')
    
    # Limpiar y convertir columnas a int o float según corresponda
    for col in columnas_a_convertir:
        try:
            df[col] = df[col].astype(str).str.replace(',', '').astype(int)
        except ValueError:
            df[col] = df[col].astype(str).str.replace(',', '').astype(float)    
    
    # Convertir la columna de fechas y extraer el año y el mes
    df['FECHA'] = pd.to_datetime(df['FECHA'])
    df['AÑO'] = df['FECHA'].dt.year
    df['MES'] = df['FECHA'].dt.month
    
    # Reorganizar las columnas
    cols = ['AÑO', 'MES'] + [col for col in df.columns if col not in ['AÑO', 'MES']]
    df = df[cols]

    
    return df

### ***Procesar CSVs***
---

#### ***2003***

In [76]:
df_2003 = limpiar_datos_emergencias('../Excel (2003 - 2018, 2023)/EMERGENCIAS-2003.xls', 561)

In [77]:
# df_2003.head()
# df_2003.tail()
df_2003["EVENTO"].unique()

array(['INUNDACION', 'VENDAVAL', 'INCENDIO FORESTAL',
       'INCENDIO ESTRUCTURAL', 'CONTAMINACION', 'SISMO', 'DESLIZAMIENTO',
       'AVALANCHA', 'ACCIDENTE AEREO', 'ACCIDENTE', 'SEQUIA', 'VARIOS',
       'GRANIZADA', 'ANTROPICO', 'COLAPSO ESTRUCTURAL', 'ERUPCION ',
       'EXPLOSION'], dtype=object)

#### ***2004***

In [78]:
df_2004 = limpiar_datos_emergencias('../Excel (2003 - 2018, 2023)/EMERGENCIAS-2004.xls', 774)

In [79]:
# df_2004.head()
df_2004.tail()
# df_2004["EVENTO"].unique()

Unnamed: 0,AÑO,MES,FECHA,DEPARTAMENTO,MUNICIPIO,EVENTO,FALLECIDOS,HERIDOS,DESAPARECIDOS,PERSONAS,FAMILIAS,VIVIENDAS,VIAS,HECTAREAS
769,2004,12,2004-12-26,QUINDIO,FILANDIA,VENDAVAL,0.0,0.0,0.0,137.0,26.0,0.0,0.0,0.0
770,2004,12,2004-12-26,QUINDIO,CALARCA,VENDAVAL,0.0,0.0,0.0,60.0,11.0,0.0,0.0,0.0
771,2004,12,2004-12-29,HUILA,LA PLATA,INUNDACION,1.0,1.0,0.0,329.0,80.0,10.0,0.0,0.0
772,2004,12,2004-12-29,NARIÑO,LEIVA,INUNDACION,0.0,0.0,0.0,2208.0,450.0,0.0,0.0,0.0
773,2004,12,2004-12-29,HUILA,TESALIA,INUNDACION,0.0,0.0,0.0,180.0,36.0,0.0,0.0,0.0


#### ***2005***

In [80]:
df_2005 = limpiar_datos_emergencias('../Excel (2003 - 2018, 2023)/EMERGENCIAS-2005.xls', 990)

In [81]:
# df_2005.head()
df_2005.tail()
# df_2005["EVENTO"].unique()

Unnamed: 0,AÑO,MES,FECHA,DEPARTAMENTO,MUNICIPIO,EVENTO,FALLECIDOS,HERIDOS,DESAPARECIDOS,PERSONAS,FAMILIAS,VIVIENDAS,VIAS,HECTAREAS
985,2005,12,2005-12-23,HUILA,NEIVA,INUNDACION,0.0,0.0,0.0,100.0,20.0,20.0,0,0.0
986,2005,12,2005-12-26,CAUCA,BALBOA,INUNDACION,0.0,0.0,0.0,1592.0,491.0,0.0,0,203.5
987,2005,12,2005-12-26,CAUCA,FLORENCIA,DESLIZAMIENTO,0.0,0.0,0.0,0.0,0.0,0.0,1,0.0
988,2005,12,2005-12-27,CHOCO,ATRATO,VENDAVAL,0.0,0.0,0.0,4330.0,866.0,0.0,0,0.0
989,2005,12,2005-12-28,CUNDINAMARCA,UTICA,INUNDACION,0.0,0.0,0.0,0.0,0.0,0.0,0,0.0


#### ***2006***

In [82]:
df_2006 = limpiar_datos_emergencias('../Excel (2003 - 2018, 2023)/EMERGENCIAS-2006.xls', 1053)

In [83]:
# df_2006.head()
# df_2006.tail()
df_2006["EVENTO"].unique()

array(['INUNDACION', 'DESLIZAMIENTO', 'VENDAVAL', 'AVALANCHA',
       'INCENDIO ESTRUCTURAL', 'EXPLOSION', 'INCENDIO FORESTAL',
       'EVENTO VOLCANICO', 'ACCIDENTE', 'COLAPSO ESTRUCTURAL',
       'TORMENTA ELECTRICA', 'EROSION', 'CONTAMINACION', 'MAREJADAS',
       'VARIOS', 'GRANIZADA', 'ERUPCION', 'TORNADO', 'SEQUIA'],
      dtype=object)

#### ***2007***

In [84]:
df_2007 = limpiar_datos_emergencias('../Excel (2003 - 2018, 2023)/EMERGENCIAS-2007.xls', 1011)

In [85]:
# df_2007.head()
# df_2007.tail()
df_2007["EVENTO"].unique()

array(['INUNDACION', 'VENDAVAL', 'MAR DE LEVA', 'DESLIZAMIENTO',
       'ACCIDENTE', 'INCENDIO ESTRUCTURAL', 'EROSION',
       'COLAPSO ESTRUCTURAL', 'INCENDIO FORESTAL', 'EXPLOSION', 'SEQUIA',
       'CONTAMINACION', 'SISMO', 'ERUPCION', 'TORMENTA ELECTRICA',
       'AVALANCHA', 'VARIOS', 'GRANIZADA', 'HURACAN'], dtype=object)