In [16]:
require(xgboost)
library(randomForest)
library(caTools)
library(caret) 
library(e1071)
library(ROSE)

In [17]:
# install the required packages first
require(jsonlite)
require(httr)
require(data.table)

get_token <- function(username, password, url_site){
    
    post_body = list(username=username,password=password)
    post_url_string = paste0(url_site,'/token/')
    result = POST(post_url_string, body = post_body)

    # error handling (wrong credentials)
    if(result$status_code==400){
        print('Check your credentials')
        return(0)
    }
    else if (result$status_code==201){
        output = content(result)
        token = output$key
    }

    return(token)
}



send_submission <- function(predictions, token, url_site, submit_now=F){
    
    format_check=check_format(predictions)
    if(!format_check){
        return(FALSE)
    }
    
    post_string="list("
    for(i in 1:length(predictions)){
        if(i<length(predictions)){
            post_string=sprintf("%s%s,",post_string,predictions[i])
        } else {
            post_string=sprintf("%s%s)",post_string,predictions[i])
        }
    }
    
    submission = eval(parse(text=post_string))
    json_body = jsonlite::toJSON(submission, auto_unbox = TRUE)
    submission=list(submission=json_body)
    print(submission)

    if(!submit_now){
        print("You did not submit.")
        return(FALSE)      
    }
    

    header = add_headers(c(Authorization=paste('Token',token,sep=' ')))
    post_url_string = paste0(url_site,'/submission/')
    result = POST(post_url_string, header, body=submission)
    
    if (result$status_code==201){
        print("Successfully submitted. Below you can see the details of your submission")
    } else {
        print("Could not submit. Please check the error message below, contact the assistant if needed.")
    }
    
    print(content(result))
    
}

check_format <- function(predictions){
    
    if(all(is.numeric(predictions)) & all(predictions<=1)){
        print("Format OK")
        return(TRUE)
    } else {
        print("Wrong format")
        return(FALSE)
    }
    
}

# this part is main code
subm_url = 'http://46.101.121.83'

u_name = "HNY"
p_word = "9IhQrOl8qYGJqf8A"
submit_now = FALSE

username = u_name
password = p_word

token = get_token(username=u_name, password=p_word, url=subm_url)

data_train=read.csv("IE582_Fall20_ProjectTrain.csv")
data_test=read.csv("IE582_Fall20_ProjectTest.csv")
data_train$y = as.numeric(data_train$y)-1
set.seed(582)
data_balanced_over <- ovun.sample(y ~ ., data = data_train, method = "over",N = 3130)$data
split=sample.split(data_balanced_over$y, SplitRatio=0.8)
train=subset(data_balanced_over,split==TRUE)
validation=subset(data_balanced_over,split==FALSE)

#data_train_mat = data.matrix(data_train)
data_train_mat_x = data.matrix(train[,-61])
data_train_mat_y = data.matrix(train$y)
data_val_mat_x = data.matrix(validation[,-61])
data_val_mat_y = data.matrix(validation$y)
data_test_x = data.matrix(data_test[,-61])
data_test_y = data.matrix(data_test$y)
str(data_train_mat_x)
str(data_train_mat_y)
#train <- train
#test <- validation
dtrain <- xgb.DMatrix(data = data_train_mat_x,label = data_train_mat_y)
#dtrain <- xgb.DMatrix(data = new_tr,label = labels) 
dval = xgb.DMatrix(data = data_val_mat_x,label = data_val_mat_y)

watchlist <- list(train=dtrain, test=dval)
bst <- xgb.train(set.seed=50,data = dtrain, max.depth = 8, eta = 0.9, nthread = 2, nrounds = 40 , watchlist=watchlist, objective = "binary:logistic")
val_pred = predict(bst, data_val_mat_x)

str(val_pred)
pred <- as.numeric(val_pred > 0.5)
err <- mean(as.numeric(pred > 0.5) != data_val_mat_y)
print(paste("val-error=", err))

test_pred = predict(bst, data_test_x)
predictions= test_pred


send_submission(predictions, token, url=subm_url, submit_now= submit_now)

 num [1:2504, 1:60] 30 29 33 28 27 29 27 29 37 27 ...
 - attr(*, "dimnames")=List of 2
  ..$ : chr [1:2504] "2" "3" "4" "6" ...
  ..$ : chr [1:60] "x1" "x2" "x3" "x4" ...
 num [1:2504, 1] 0 0 0 0 0 0 0 0 0 0 ...
Parameters: { set_seed } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


[1]	train-logloss:0.370955	test-logloss:0.416422 
[2]	train-logloss:0.255496	test-logloss:0.326626 
[3]	train-logloss:0.209340	test-logloss:0.294631 
[4]	train-logloss:0.183288	test-logloss:0.281295 
[5]	train-logloss:0.167505	test-logloss:0.269096 
[6]	train-logloss:0.128667	test-logloss:0.248816 
[7]	train-logloss:0.091027	test-logloss:0.243090 
[8]	train-logloss:0.067333	test-logloss:0.239123 
[9]	train-logloss:0.057974	test-logloss:0.226656 
[10]	train-logloss:0.052056	test-logloss:0.219381 
[11

In [12]:
#max.depth = 2, eta = 1, nthread = 2, nrounds = 150
#test-logloss:0.229614
#max.depth = 4, eta = 1, nthread = 2, nrounds = 30
#test-logloss:0.264505 
#max.depth = 4, eta = 0.9, nthread = 2, nrounds = 30
#test-logloss:0.264700
#max.depth = 4, eta = 0.9, nthread = 2, nrounds = 45
#test-logloss:0.23


In [20]:
length(predictions)