In [3]:
setwd(paste0(Sys.getenv('R_SOURCES'), '/house_prices'))
getwd()

In [9]:
source('main.R')
house_prices$helpers$import_libs()

combined_dataset <-
    house_prices$helpers$load_data() %>%
    (house_prices$outliers$remove_outliers) %>%
    (house_prices$missing$categ$replace_with_most_common) %>%
    (house_prices$missing$categ$fix_valid) %>%
    (house_prices$missing$numeric$replace_with_zero) %>%
    mutate(
        price_log = log(SalePrice)
    ) %>%
    select(-SalePrice, -Id)

training_dataset <- 
    combined_dataset %>% 
    filter(dataSource == 'train')


test_that("should be no NA values except SalePrice column", {
    expect_equal(sum(is.na(combined_dataset %>% select(-price_log))), 0)
})

test_that("should be no NA values in training_dataset", {
    expect_equal(sum(is.na(training_dataset)), 0)
})

In [13]:
house_prices$helpers$get_character_colnames(combined_dataset)

rating_for_selected <- 
    house_prices$trans$categ$calc_rating_for_selected(
        df = combined_dataset, 
        categ_vars_for_fix = house_prices$helpers$get_character_colnames(combined_dataset),
        target_var = price_log
    )

ratings_for_all <- 
    house_prices$trans$categ$calc_rating_for_all(combined_dataset, price_log)

test_that("should be equal ratings", {
    expect_equal(ratings_for_all, rating_for_selected)
})

ratings_for_all %>% head

var,value,rating
Alley,_none_,2.531822
Alley,Grvl,1.5
Alley,Pave,2.536585
BldgType,1Fam,2.564039
BldgType,2fmCon,1.580645
BldgType,Duplex,1.692308


In [56]:
source('main.R')

ratings <- 
    house_prices$trans$categ$calc_rating_for_all(combined_dataset, price_log)

df1 <- 
    house_prices$trans$categ$rating_transform_for_selected(
        data = combined_dataset,
        columns = house_prices$helpers$get_character_colnames(combined_dataset),
        ratings = ratings
    )

# df2 <-
#     combined_dataset %>%
#     house_prices$trans$categ$rating_transform(price_log)


# test_that("should be equal", {
#     expect_equal(df1, df2)
# })
df1 %>% head

Alley,BldgType,BsmtCond,BsmtExposure,BsmtFinType1,BsmtFinType2,BsmtQual,CentralAir,Condition1,Condition2,⋯,OpenPorchSF,EnclosedPorch,X3SsnPorch,ScreenPorch,PoolArea,MiscVal,MoSold,YrSold,dataSource,price_log
2.531822,2.564039,2.540871,2.320042,3.262019,2.553429,3.066343,2.57887,2.547619,2.50277,⋯,61,0,0,0,0,0,2,2008,train,12.24769
2.531822,2.564039,2.540871,3.295455,2.231818,2.553429,3.066343,2.57887,1.9875,2.50277,⋯,0,0,0,0,0,0,5,2007,train,12.10901
2.531822,2.564039,2.540871,2.675439,3.262019,2.553429,3.066343,2.57887,2.547619,2.50277,⋯,42,0,0,0,0,0,9,2008,train,12.31717
2.531822,2.564039,3.046154,2.320042,2.231818,2.553429,1.847458,2.57887,2.547619,2.50277,⋯,35,272,0,0,0,0,2,2006,train,11.8494
2.531822,2.564039,2.540871,2.900452,3.262019,2.553429,3.066343,2.57887,2.547619,2.50277,⋯,84,0,0,0,0,0,12,2008,train,12.42922
2.531822,2.564039,2.540871,2.320042,3.262019,2.553429,3.066343,2.57887,2.547619,2.50277,⋯,30,0,320,0,0,700,10,2009,train,11.8706
