In [None]:
#import the packages that needed
import pandas as pd
import numpy as np
import re
import warnings
warnings.filterwarnings('ignore')

In [None]:
#' fillDownHouseNum
#' 
#' Fill down house number within the same ED and matched street name.
#' This function is to be run on output from `04_____`.
#' @param df A dataframe with `best_match` and `house_num` columns. `house_num` 
#' column must be cleaned. 
#' @return A dataframe with filled down house numbers (group_by(`ED`, `best_match`))
#' and `flg_filled_hn` column. The value of `flg_filled_hn` is 1 if `house_num` of
#' a record is filled down. 0 otherwise. 

def fillDownHouseNum(df):
    """
    inputs:
    -------
    'df': A dataframe with `best_match` and `house_num` columns. `house_num` 
    
    outputs:
    -------
    A dataframe with filled down house numbers (group_by('ED', 'best_match')) and 'flg_filled_hn' column. The value of 'flg_filled_hn' is 1 if 'house_num' of a record is filled down. 0 otherwise. 
    """
    
    df = df[df.rectype == "H"]
    df.assign(house_num_temp == house_num)
    df.groupby(by = ['ED', 'street_name_clean'])
    df[['modifier.number', 'modifier.word', 'house_num', 'hn_1', 'hn_2', 'hn_3']].fillna(method='ffill')    
    df[['modifier.number', 'modifier.word', 'house_num', 'hn_1', 'hn_2', 'hn_3']].fillna(method='bfill')
    df.assign(flg_filled_st = np.where(df.house_num.isna() & df.house_num_temp.isna(), 1, 0))
    df.drop(columns = ['house_num_temp'])
    df.reset_index(inplace = True)
    
    return df

In [None]:

fillDownHouseNum <- function(df){
  
  df <- df %>% 
    mutate(across(where(is.character), ~na_if(., ""))) %>% 
    filter(rectype == "H") %>%
    mutate(house_num_temp = house_num) %>% 
    group_by(ED, street_name_clean) %>%
    fill(modifier.number, modifier.word, house_num, hn_1, hn_2, hn_3, .direction="down") %>%
    fill(modifier.number, modifier.word, house_num, hn_1, hn_2, hn_3, .direction="up") %>%
    rowwise() %>% 
    mutate(flg_filled_hn = ifelse(!is.na(house_num)  && is.na(house_num_temp), 1, 0)) %>% 
    select(- house_num_temp) %>%
    ungroup()
  return(df)
}

In [None]:

#' fillDownHouseNum
#' 
#' Fill down house number within the same ED and matched street name.
#' This function is to be run on output from `04_____`.
#' @param df A dataframe with `best_match` and `house_num` columns. `house_num` 
#' column must be cleaned. 
#' @return A dataframe with filled down house numbers (group_by(`ED`, `best_match`))
#' and `flg_filled_hn` column. The value of `flg_filled_hn` is 1 if `house_num` of
#' a record is filled down. 0 otherwise. 
fillDownHouseNum <- function(df){
  
  df <- df %>% 
    mutate(across(where(is.character), ~na_if(., ""))) %>% 
    filter(rectype == "H") %>%
    mutate(house_num_temp = house_num) %>% 
    group_by(ED, street_name_clean) %>%
    fill(modifier.number, modifier.word, house_num, hn_1, hn_2, hn_3, .direction="down") %>%
    fill(modifier.number, modifier.word, house_num, hn_1, hn_2, hn_3, .direction="up") %>%
    rowwise() %>% 
    mutate(flg_filled_hn = ifelse(!is.na(house_num)  && is.na(house_num_temp), 1, 0)) %>% 
    select(- house_num_temp) %>%
    ungroup()
  return(df)
}

#` ############ IMPORTANT !! change input to this function call to output from 04_.R
# <<<<<<< HEAD
#sample_hn_filled <- fillDownHouseNum(data)
# =======
#sample_hn_filled <- fillDownHouseNum(sample_st_filled)
# >>>>>>> c66c29fac27c8010eae343f5786cbf25be3bb492

#' One test for fillDownHouseNum(). Should not get error message from
#' running this line after if the function works properly. This checks
#' if house_num is not filled down but the flag says it is.
#assertthat::assert_that(nrow(sample_hn_filled %>% filter(is.na(house_num) && flg_filled_hn==1)) == 0)