In [1]:
# This R environment comes with many helpful analytics packages installed
# It is defined by the kaggle/rstats Docker image: https://github.com/kaggle/docker-rstats
# For example, here's a helpful package to load

library(tidyverse) # metapackage of all tidyverse packages

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

list.files(path = "../input")

# You can write up to 5GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

── [1mAttaching packages[22m ─────────────────────────────────────── tidyverse 1.3.0 ──

[32m✔[39m [34mggplot2[39m 3.3.0.[31m9000[39m     [32m✔[39m [34mpurrr  [39m 0.3.4     
[32m✔[39m [34mtibble [39m 3.0.1          [32m✔[39m [34mdplyr  [39m 0.8.5     
[32m✔[39m [34mtidyr  [39m 1.0.3          [32m✔[39m [34mstringr[39m 1.4.0     
[32m✔[39m [34mreadr  [39m 1.3.1          [32m✔[39m [34mforcats[39m 0.5.0     

── [1mConflicts[22m ────────────────────────────────────────── tidyverse_conflicts() ──
[31m✖[39m [34mdplyr[39m::[32mfilter()[39m masks [34mstats[39m::filter()
[31m✖[39m [34mdplyr[39m::[32mlag()[39m    masks [34mstats[39m::lag()



In [2]:
data <- read.csv("../input/a6-essemble/Earthquate_Damage.csv")

In [3]:
cat_cols <- c("land_surface_condition","foundation_type",
              "roof_type","ground_floor_type",
             "other_floor_type","position",
              "plan_configuration","legal_ownership_status")

In [4]:
install.packages("nnet")
library(nnet)

Installing package into ‘/usr/local/lib/R/site-library’
(as ‘lib’ is unspecified)



In [5]:
j <- 1
for(i in 1:length(cat_cols)){
    tmp <- class.ind(data[,cat_cols[i]])
    colnames(tmp) <- (j:(j+length(colnames(tmp)) - 1))
    data <- cbind(data,tmp)
    j <- (j+length(colnames(tmp)))
}

In [6]:
data <- data[,!(names(data)%in%cat_cols)]

In [7]:
scaled_data <- scale(data[,1:31],center=TRUE,scale=TRUE)
other_data <- data[,32:70]
data <- cbind(scaled_data,other_data)

In [8]:
set.seed(42)
trn_idx <- sample(1:nrow(data), 150000)
train <- data[trn_idx,]
tmp <- data[-trn_idx,]
val_idx <- sample(1:nrow(tmp),50000)
val <- tmp[val_idx,]
test <- tmp[-val_idx,]

In [9]:
perf_eval_multi <- function(cm){
  
  # Simple Accuracy
  ACC = sum(diag(cm))/sum(cm)
  
  # Balanced Correction Rate
  BCR = 1
  for (i in 1:dim(cm)[1]){
    BCR = BCR*(cm[i,i]/sum(cm[i,])) 
  }
  
  BCR = BCR^(1/dim(cm)[1])
  
  return(c(ACC, BCR))
}

In [32]:
prediction_one <- function(pred){
    tmp <- apply(pred,1,which.max)
    for (i in 1:nrow(pred)){
        pred[i,tmp[i]] <- 1
        pred[i,-tmp[i]] <- 0
    }
    return(pred)
}

In [10]:
x_train <- train[,!(names(train)%in%"damage_grade")]
y_train <- train[,(names(train)%in%"damage_grade")]
x_val <- val[,!(names(val)%in%"damage_grade")]
y_val <- val[,(names(val)%in%"damage_grade")]
x_test <- test[,!(names(train)%in%"damage_grade")]
y_test <- test[,(names(train)%in%"damage_grade")]

In [11]:
install.packages("moments") 
library(moments)

Installing package into ‘/usr/local/lib/R/site-library’
(as ‘lib’ is unspecified)



In [12]:
logistic_train <- rbind(train,val)

In [13]:
set.seed(43)
mult_lr <- multinom(damage_grade ~ ., data = logistic_train)

# weights:  213 (140 variable)
initial  value 219722.457735 
iter  10 value 175362.184661
iter  20 value 174883.832267
iter  30 value 174773.947634
iter  40 value 174508.487264
iter  50 value 173756.524829
iter  60 value 171179.027412
iter  70 value 166951.456076
iter  80 value 165174.847066
iter  90 value 163300.694940
iter 100 value 162033.070788
final  value 162033.070788 
stopped after 100 iterations


In [14]:
mult_lr_pred <- predict(mult_lr, newdata = test)
cfmatrix <- table(test$damage_grade, mult_lr_pred)
cfmatrix
perf_eval_multi(cfmatrix)

   mult_lr_pred
        1     2     3
  1  1263  4243   377
  2   998 28072  5395
  3   107 14070  6076

In [15]:
install.packages("rpart")
library(rpart)

Installing package into ‘/usr/local/lib/R/site-library’
(as ‘lib’ is unspecified)



In [25]:
max_depth <- c(5,10,15)
min_split <- c(10,100,1000)
rpart_pre_search_result = matrix(0,length(min_split)*length(max_depth),4)
colnames(rpart_pre_search_result) <- c("min_split", "max_depth","ACC", "BCR")

In [26]:
rpart_trn <- train
rpart_val <- val

In [27]:
iter_cnt = 1
for ( i in 1:length(min_split)){
    for ( j in 1:length(max_depth)){
        tmp_control <- rpart.control(maxdepth = max_depth[j], minsplit = min_split[i])
        tmp_rpart_model <- rpart( damage_grade~ ., data = rpart_trn, method = "class",control = tmp_control)
        tmp_pred <- predict(tmp_rpart_model, rpart_val, type='class')
        tmp_rpart_val_cm <- table(rpart_val$damage_grade, tmp_pred)
        rpart_pre_search_result[iter_cnt,3:4] = perf_eval_multi(tmp_rpart_val_cm)
        rpart_pre_search_result[iter_cnt,1] = min_split[i]
        rpart_pre_search_result[iter_cnt,2] = max_depth[j]
        iter_cnt = iter_cnt+1

    }
}


In [28]:
rpart_pre_search_result

min_split,max_depth,ACC,BCR
10,5,0.6288,0
10,10,0.63836,0
10,15,0.63836,0
100,5,0.6288,0
100,10,0.63836,0
100,15,0.63836,0
1000,5,0.6288,0
1000,10,0.63836,0
1000,15,0.63836,0


In [29]:
library(party)

Loading required package: grid

Loading required package: mvtnorm

Loading required package: modeltools

Loading required package: stats4

Loading required package: strucchange

Loading required package: zoo


Attaching package: ‘zoo’


The following objects are masked from ‘package:base’:

    as.Date, as.Date.numeric


Loading required package: sandwich


Attaching package: ‘strucchange’


The following object is masked from ‘package:stringr’:

    boundary




In [33]:
max_depth <- c(5,10,15)
min_split <- c(10,100,1000)
cart_pre_search_result = matrix(0,length(min_split)*length(max_depth),4)
colnames(cart_pre_search_result) <- c("min_split", "max_depth","ACC", "BCR")

In [34]:
cart_trn <- train
cart_val <- val

In [41]:
iter_cnt = 1
for ( i in 1:length(min_split)){
    for ( j in 1:length(max_depth)){
        tmp_control <- ctree_control(maxdepth = max_depth[j], minsplit = min_split[i])
        tmp_cart_model <- ctree( damage_grade~ ., data = cart_trn,controls = tmp_control)
        tmp_pred <- predict(tmp_cart_model, cart_val)
        tmp_pred <- round(tmp_pred)
        tmp_cart_val_cm <- table(cart_val$damage_grade, tmp_pred)
        cart_pre_search_result[iter_cnt,3:4] = perf_eval_multi(tmp_cart_val_cm)
        cart_pre_search_result[iter_cnt,1] = min_split[i]
        cart_pre_search_result[iter_cnt,2] = max_depth[j]
        iter_cnt = iter_cnt+1

    }
}


   tmp_pred
        1     2     3
  1  1101  3684     1
  2   737 27427   273
  3    24 15300  1453
   tmp_pred
        1     2     3
  1  1393  3336    57
  2   782 25064  2591
  3    37 11874  4866
   tmp_pred
        1     2     3
  1  1554  3161    71
  2   894 24619  2924
  3    45 10924  5808
   tmp_pred
        1     2     3
  1  1101  3684     1
  2   737 27427   273
  3    24 15300  1453
   tmp_pred
        1     2     3
  1  1390  3338    58
  2   781 25061  2595
  3    37 11870  4870
   tmp_pred
        1     2     3
  1  1555  3159    72
  2   899 24608  2930
  3    45 10916  5816
   tmp_pred
        1     2     3
  1  1101  3685     0
  2   737 27462   238
  3    24 15365  1388
   tmp_pred
        1     2     3
  1  1427  3304    55
  2   885 25046  2506
  3    35 11998  4744
   tmp_pred
        1     2     3
  1  1427  3292    67
  2   885 24680  2872
  3    35 11271  5471


In [42]:
cart_pre_search_result

min_split,max_depth,ACC,BCR
10,5,0.59962,0.267847
10,10,0.62646,0.4205984
10,15,0.63962,0.459966
100,5,0.59962,0.267847
100,10,0.62642,0.4203946
100,15,0.63958,0.4602072
1000,5,0.59902,0.2639041
1000,10,0.62434,0.4203187
1000,15,0.63156,0.4386199
