In [3]:
require(xgboost)
library(randomForest)
library(caTools)
library(caret) 
library(e1071)
library(ROSE)

In [15]:
# install the required packages first
require(jsonlite)
require(httr)
require(data.table)

get_token <- function(username, password, url_site){
    
    post_body = list(username=username,password=password)
    post_url_string = paste0(url_site,'/token/')
    result = POST(post_url_string, body = post_body)

    # error handling (wrong credentials)
    if(result$status_code==400){
        print('Check your credentials')
        return(0)
    }
    else if (result$status_code==201){
        output = content(result)
        token = output$key
    }

    return(token)
}



send_submission <- function(predictions, token, url_site, submit_now=F){
    
    format_check=check_format(predictions)
    if(!format_check){
        return(FALSE)
    }
    
    post_string="list("
    for(i in 1:length(predictions)){
        if(i<length(predictions)){
            post_string=sprintf("%s%s,",post_string,predictions[i])
        } else {
            post_string=sprintf("%s%s)",post_string,predictions[i])
        }
    }
    
    submission = eval(parse(text=post_string))
    json_body = jsonlite::toJSON(submission, auto_unbox = TRUE)
    submission=list(submission=json_body)
    print(submission)

    if(!submit_now){
        print("You did not submit.")
        return(FALSE)      
    }
    

    header = add_headers(c(Authorization=paste('Token',token,sep=' ')))
    post_url_string = paste0(url_site,'/submission/')
    result = POST(post_url_string, header, body=submission)
    
    if (result$status_code==201){
        print("Successfully submitted. Below you can see the details of your submission")
    } else {
        print("Could not submit. Please check the error message below, contact the assistant if needed.")
    }
    
    print(content(result))
    
}

check_format <- function(predictions){
    
    if(all(is.numeric(predictions)) & all(predictions<=1)){
        print("Format OK")
        return(TRUE)
    } else {
        print("Wrong format")
        return(FALSE)
    }
    
}

# this part is main code
subm_url = 'http://46.101.121.83'

u_name = "HNY"
p_word = "9IhQrOl8qYGJqf8A"
submit_now = TRUE

username = u_name
password = p_word

token = get_token(username=u_name, password=p_word, url=subm_url)

data_train=read.csv("IE582_Fall20_ProjectTrain.csv")
data_test=read.csv("IE582_Fall20_ProjectTest.csv")



down_train = downSample(x = data_train[, -ncol(data_train)],y = data_train$y)


fit = randomForest(Class ~ ., data = down_train,mtry=16, nodesize=14,ntree=100)

#Evaluate model performance on test set
pred= predict(fit, newdata = data_test, type = "prob")
predictions=pred[,2]


send_submission(predictions, token, url=subm_url, submit_now= submit_now)

[1] "Format OK"
$submission
[0.09,0.36,0.52,0.09,0.75,0.45,0.6,0.63,0.12,0,0.28,0.35,0.01,0.87,0.39,0.05,0.04,0.23,0.2,0.01,0.15,0.64,0.95,0.03,0.74,0.13,0.69,0.69,0.62,0.45,0.45,0.81,0.02,0.67,0.74,0.48,0.86,0.79,0.96,0.02,0.35,0.29,0.17,0.82,0.38,0.93,0.45,0.05,0.31,0.17,0.06,0.01,0.31,0.51,0.78,0.46,0.74,0.75,0.14,0.66,0.87,0.08,0.18,0.27,0.1,0.1,0.02,0.06,0.73,0.22,0.62,0.04,0.01,0.76,0.2,0.41,0.94,0.1,0.51,0.19,1,0.19,0,0.69,0.36,0.47,0,0.09,0.94,0.45,0.07,0.09,0.19,0.86,0.14,0.55,0.26,0.14,0.44,0.63,0.7,0.05,0.78,0.45,0.47,0.63,0.03,0.37,0.26,0.11,0.05,0.95,0.21,0.96,0.38,0.04,0.05,0.64,0.11,0.02,0.03,0.09,0.05,0.16,0.22,0.06,0.03,0.51,0.91,0.07,0.69,0.62,0.53,0.52,0.6,0.74,0.09,0.88,0.12,0.9,0.46,0.4,0.49,0.61,0.49,0.38,0.93,0.33,0.42,0.16,0.1,0.61,0.33,0.32,0.01,0.26,0.02,0.67,0.38,0.95,0.1,0.22,0.11,0.15,0.02,0.78,0.12,0.55,0.02,0.45,0.9,0.53,0.51,0.07,0.6,0.01,0.41,0.41,0.65,0.86,0.96,0.9,0.59,0.01,0.94,0.06,0,0.29,0.25,0.38,0.01,0.64,0.83,0.25,0.02,0,0.06,0.02,0.05,0.89,0.58

In [12]:
#max.depth = 2, eta = 1, nthread = 2, nrounds = 150
#test-logloss:0.229614
#max.depth = 4, eta = 1, nthread = 2, nrounds = 30
#test-logloss:0.264505 
#max.depth = 4, eta = 0.9, nthread = 2, nrounds = 30
#test-logloss:0.264700
#max.depth = 4, eta = 0.9, nthread = 2, nrounds = 45
#test-logloss:0.23


In [20]:
length(predictions)