In [None]:
from pathlib import Path

In [None]:
from itables import init_notebook_mode
import pandas as pd

In [None]:
init_notebook_mode(all_interactive=True)

In [None]:
data_path = Path.cwd().parent / 'data'

In [None]:
data_filepath = data_path / 'raw' / 'enaho/2023/data.sav'

# Read Data

In [None]:
df = pd.read_spss(data_filepath)

In [None]:
df.info()

In [None]:
df.head()

# Clean Columns Names

In [None]:
replacements = {'$': '_', 'Ñ': 'N'}

df.columns = (
    df.columns
    .str.translate(str.maketrans(replacements))
    .str.lower()
)

In [None]:
df.head()

# Variable Selection

In [None]:
df.info()

In [None]:
household_info_vars = [
    'ano', 'mes', 
    'conglome', 'vivienda', 'hogar',
    'longitud', 'latitud'
]

In [None]:
household_char_vars = [
    'p24a', 'p24b',
    'p25_1', 'p25_2', 'p25_3', 'p25_4', 'p25_5',
    'p101', 'p102', 'p103', 'p103a', 
    'p104', 'p104a', 'p104b1', 'p104b2',
    'p110', 'p110a1', 'p110a', 'p110a_modificada', 
    'p110c', 'p110c1', 'p110c2', 'p110c3', 'p110', 'p110e',
    'p111a', 'p1121', 'p1123', 'p1124', 'p1125', 'p1126', 'p1127', 'p112a',
    'p1141', 'p1142', 'p1143', 'p1144', 'p114b1', 'p114b2', 'p114b3', 'p1145' 
]

In [None]:
vars = household_info_vars + ['nbi1']
df = df[vars]

df.head()

# Drop Missing Values

In [None]:
df = df.dropna()

In [None]:
df.info()

# Change Data Types

In [None]:
df.dtypes

In [None]:
dtype_mapping = {
    'ano': 'int',
    'mes': 'int',
    'conglome': 'string',
    'vivienda': 'string',
    'hogar': 'string',
    'latitud': 'float64',
    'longitud': 'float64',
    'nbi1': 'category'
}

df = df.astype(dtype_mapping)

In [None]:
df.dtypes

In [None]:
type(df)

# Transform Adequate Household

In [None]:
adequacy_mapping = {
    'Vivienda adecuada': True,
    'Vivienda inadecuada': False
}

df['nbi1'] = df['nbi1'].map(adequacy_mapping)

# Aggreate Values

In [None]:
df = (
    df.groupby(['year', 'month', 'conglomerate', 'longitude', 'latitude'])
    ['adequate'].all().reset_index(name='adequate')
)

In [None]:
df.tail()

# Rename Columns Names

In [None]:
df.columns = ['year', 'month', 'conglomerate', 'house', 'household', 'longitude', 'latitude', 'adequate']

df.head()

# Export

In [None]:
df.info()