Skip to content

Commit

Permalink
gm
Browse files Browse the repository at this point in the history
  • Loading branch information
ShichenXie committed Nov 25, 2019
1 parent 35afbe6 commit 16867d0
Show file tree
Hide file tree
Showing 4 changed files with 46 additions and 3 deletions.
2 changes: 2 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@

* fixed a bug in woebin using chimerge method
* gains_table supports setting of break width
* fixed a bug in germancredit data set
*** scorecard2 supports adjusting sampling and return predict probability

# scorecard 0.2.7

Expand Down
41 changes: 41 additions & 0 deletions R/germancredit.R
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,47 @@ NULL



# download data from website
# dt = setDT(read.table('https://archive.ics.uci.edu/ml/machine-learning-databases/statlog/german/german.data'))
#
# library(rvest)
# attrs = read_html('https://archive.ics.uci.edu/ml/datasets/Statlog+(German+Credit+Data)') %>%
# html_nodes('p.normal') %>%
# html_text() %>%
# .[22] %>%
# strsplit(' *\r(\t)* *') %>%
# .[[1]] %>%
# sub('Attibute', 'Attribute', .)
#
# attr_no = which(grepl('^Attribute', attrs))
#
# attrdt = data.table(atr = attrs)[
# grepl('^Attribute', atr), var := atr
# # ][which(grepl('^Attribute', atr))+1, varNam := atr
# ][, var := var[1], by = cumsum(!is.na(var))
# # ][, varNam := varNam[1], by = cumsum(!is.na(varNam))
# ][, `:=`(
# var = paste0('V', sub('Attribute (\\d+):\\s+\\((.+)\\)', '\\1', var)),
# typ = sub('Attribute (\\d+):\\s+\\((.+)\\)', '\\2', var)
# )][typ == 'qualitative']
#
# atrlst = lapply(split(attrdt, by = 'var'), function(x) {
# x[-c(1:2), .(atr)
# ][, `:=`(
# val1 = sub('^(A\\d+).+?$', '\\1', atr),
# val2 = sub('.*?(A\\d+) : (.+)$', '\\2', atr)
# )]
# })
#
# varNam = attrs[attr_no+1] %>%
# tolower() %>%
# gsub('[^(a-z)]+', '.', .) %>%
# c(., 'creditability')
# setnames(dt, varNam)




# library(scorecard)
# library(data.table)
# data("germancredit")
Expand Down
6 changes: 3 additions & 3 deletions R/perf.R
Original file line number Diff line number Diff line change
Expand Up @@ -905,7 +905,7 @@ psi_plot = function(dt_psi, psi_sn, title, sn, line_color = 'blue', bar_color =


gains_table_format = function(dt_distr) {
. = good = bad = bin = count = datset = NULL
. = good = bad = bin = count = datset = bin_avg = NULL

dt_distr = dt_distr[, .(
bin,
Expand All @@ -916,7 +916,7 @@ gains_table_format = function(dt_distr) {
badprob=bad/count,
approval_rate = cumsum(count)/sum(count),
cum_badprob = cumsum(bad)/cumsum(count),
mean_score
bin_avg
), by = datset]

return(dt_distr)
Expand Down Expand Up @@ -1088,7 +1088,7 @@ gains_table = function(score, label, bin_num=10, method='freq', width_by=NULL, p
if (return_dt_psi) return(dt_psi) # innter result usded in perf_psi function

# distribution table
dt_distr = dt_psi[, .(count=.N, good = sum(label==0), bad = sum(label==1), mean_score = mean(score)), keyby = .(datset,bin)
dt_distr = dt_psi[, .(count=.N, good = sum(label==0), bad = sum(label==1), bin_avg = mean(score)), keyby = .(datset,bin)
][order(datset, -bin)]
if (!is_score) dt_distr = dt_distr[order(datset, bin)] #is predicted probability
# gains table
Expand Down
Binary file modified data/germancredit.RData
100755 → 100644
Binary file not shown.

0 comments on commit 16867d0

Please sign in to comment.