In [1]:
rm(list=ls())

library(data.table)
library(tidyverse)
library(rJava)
library(RNetLogo)
library(lhs)

options(warn = -1)

Registered S3 methods overwritten by 'ggplot2':
  method         from 
  [.quosures     rlang
  c.quosures     rlang
  print.quosures rlang
Registered S3 method overwritten by 'rvest':
  method            from
  read_xml.response xml2
── Attaching packages ─────────────────────────────────────── tidyverse 1.2.1 ──
✔ ggplot2 3.1.1     ✔ purrr   0.3.2
✔ tibble  3.1.0     ✔ dplyr   1.0.5
✔ tidyr   1.1.3     ✔ stringr 1.4.0
✔ readr   1.3.1     ✔ forcats 0.4.0
“package ‘dplyr’ was built under R version 3.6.3”── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::between()   masks data.table::between()
✖ dplyr::filter()    masks stats::filter()
✖ dplyr::first()     masks data.table::first()
✖ dplyr::lag()       masks stats::lag()
✖ dplyr::last()      masks data.table::last()
✖ purrr::transpose() masks data.table::transpose()
Loading required package: igraph

Attaching package: ‘igraph’

The following objects are masked from ‘package:dplyr’:

    as_data_fra

In [2]:
nl.model <- "info_cascade_update_TDP_JPF_2020"
nl.path <- "/Users/ecemnaz.yildiz/Documents/NetLogo 6.0.4/Java"
file.path <- "/Users/ecemnaz.yildiz/Documents/Personal/Thesis/"
model.path <- paste0(file.path,nl.model,".nlogo")

data.path <- "/Users/ecemnaz.yildiz/Documents/Personal/Thesis/Data/"
logFile = paste0(data.path,"data_log.txt")

NLStart(nl.path, gui = FALSE,nl.jarname='netlogo-6.0.4.jar', nl.obj=nl.model)
NLLoadModel (model.path, nl.obj=nl.model)

In [3]:
#model = ifelse(nl.model == "Segregation","basic","dummy")
model = "info_cascade_update"

In [4]:
## Set model parameters
 # Number of replications for each instance
nofrep = 30 #100

feature_names = c(
    "max_links",
    "evidence",
    "sc-bel-prop",
    "prop-likelihood",
    "n_init_believers",
    "prior-mean",
    "prior-sd",
    "expertise_influence")  
output_name = c("cl-prop-same")

 # Number of input parameters of the agent-based model
nofparams = length(feature_names)

In [5]:
test_ins = 30
train_ins_oneshot = 30
train_ins_Ad = 30

my.seed =8

## Functions

In [6]:
run_model <- function(feature_values){    
    k = length(feature_names)    
    for(i in 1:k){
        NLCommand(paste0("set ",feature_names[i]," ",feature_values[i]), nl.obj = nl.model)      
    }
    NLCommand("setup", nl.obj = nl.model)
    NLDoCommand(30, "go", nl.obj = nl.model) 
    result <- NLReport(output_name, nl.obj = nl.model)
    return(result)   
}

run_replicas <- function(nofrep,feature_values) {
    replicas = matrix(NA, ncol = nofrep, nrow = 1) # Save the result of each replication
    for(i in 1:nofrep){
        replicas[i]= run_model(feature_values)
    }
    aggregated_result = mean(replicas)
    return(aggregated_result)
}

run_ABM = function(nofrep,nofinstances,unlabeledset){
   unlabeledset = setcolorder(unlabeledset,feature_names) 
   for(i in 1:nofinstances){
        unlabeledset[i, output :=  run_replicas(nofrep, as.matrix(unlabeledset[i,]))] 
        NLQuit(all = TRUE)
        NLStart(nl.path, gui = FALSE,nl.jarname='netlogo-6.0.4.jar', nl.obj=nl.model)
        NLLoadModel (model.path, nl.obj=nl.model)
    } 
    return(unlabeledset)
}

## Unlabeled Pool

In [7]:
nl.model = "info_cascade_update_TDP_JPF_2020"
unlabeled_ins = c(100,200,300,400,500,600,700,800,900,1000)
model = "info_cascade_update"

In [8]:
for(n in unlabeled_ins){
set.seed(n - my.seed)
    
unlabeled_pool = as.data.table(maximinLHS(n = n, k = nofparams, dup = 5))
  
    unlabeled_pool$V1  = qunif(unlabeled_pool$V1, 2, 500) #max_links
    unlabeled_pool$V2  = qunif(unlabeled_pool$V2, 0, 100) #evidence
    unlabeled_pool$V3  = qunif(unlabeled_pool$V3, 0, 5)   #sc-bel-prop
    unlabeled_pool$V4  = qunif(unlabeled_pool$V4, 0, 1)   #prop-likelihood
    unlabeled_pool$V5  = qunif(unlabeled_pool$V5, 0, 100) #n_init_believers
    unlabeled_pool$V6  = qunif(unlabeled_pool$V6, 0, 1)   #prior-mean
    unlabeled_pool$V7  = qunif(unlabeled_pool$V7, 0, 1)   #prior-sd
    unlabeled_pool$V8  = qunif(unlabeled_pool$V8, 0, 1)   #expertise_influence
        
    setnames(unlabeled_pool, c(paste0("V",1:nofparams)), feature_names)    
    unlabeled_pool[,idx := 1:.N]
        
    fwrite(unlabeled_pool, paste0(data.path,"unlabeled_pool","_",model,"_",n,"_",Sys.Date(),".csv"))
}

## Test Data

In [9]:
for(k in test_ins){
    set.seed(k - my.seed)

    test_set = matrix(nrow = k, ncol = nofparams) %>% data.table()
    setnames(test_set, c(paste0("V",1:nofparams)), feature_names)

    test_set$`max_links`          = runif(k, 2, 500) 
    test_set$`evidence`           = runif(k, 0, 100) 
    test_set$`sc-bel-prop`        = runif(k, 0, 5) 
    test_set$`prop-likelihood`    = runif(k, 0, 1) 
    test_set$`n_init_believers`   = runif(k, 0, 100)
    test_set$`prior-mean`         = runif(k, 0, 1)
    test_set$`prior-sd`           = runif(k, 0, 1)
    test_set$`expertise_influence`= runif(k, 0, 1)
    
    test_start = paste0("ABM run start time : ",Sys.time())
    print(test_start)
    
    write(paste0("test_set","_",model,"_",k,"_seed",my.seed,"   ",test_start),logFile, append=TRUE, sep = "\n" )    

    test_set = run_ABM(nofrep,k,test_set) %>% as.data.table()

    test_end = paste0("ABM run end time : ",Sys.time())
    print(test_end)
    
    write(paste0("test_set","_",model,"_",k,"_seed",my.seed,"   ",test_end),logFile, append=TRUE, sep = "\n" )    

    fwrite(test_set, paste0(data.path,"test_set","_",model,"_",k,"_seed",my.seed,"_",Sys.Date(),".csv"))
}

[1] "ABM run start time : 2022-01-05 22:19:00"
[1] "ABM run end time : 2022-01-05 22:38:58"


## Train Data

In [10]:
for(m in train_ins_oneshot){
    set.seed(m-my.seed)
    
    training_set = as.data.table(maximinLHS(n = m, k = nofparams, dup = 5))

    training_set$V1  = qunif(training_set$V1, 2, 500)    #max_links
    training_set$V2  = qunif(training_set$V2, 0, 100)    #evidence
    training_set$V3  = qunif(training_set$V3, 0, 5)      #sc-bel-prop
    training_set$V4  = qunif(training_set$V4, 0, 1)      #prop-likelihood    
    training_set$V5  = qunif(training_set$V5, 0, 100)    #n_init_believers
    training_set$V6  = qunif(training_set$V6,0, 1)       #prior-mean
    training_set$V7  = qunif(training_set$V7,0, 1)       #prior-sd
    training_set$V8  = qunif(training_set$V8,0, 1)       #expertise_influence

    setnames(training_set, c(paste0("V",1:nofparams)), feature_names)
    training_set$output <- 0.00

    training_start = paste0("ABM run start time : ",Sys.time())
    print(training_start)
    write(paste0("training_set","_",model,"_",m,"_seed",my.seed,"   ",training_start),logFile, append=TRUE, sep = "\n" )    

    training_set = run_ABM(nofrep,m,training_set) %>% as.data.table()

    training_end = paste0("ABM run end time : ",Sys.time())
    print(training_end)
    write(paste0("training_set","_",model,"_",m,"_seed",my.seed,"   ",training_end),logFile, append=TRUE, sep = "\n" )    

    fwrite(training_set, paste0(data.path,"training_set","_",model,"_",m,"_seed",my.seed,"_",Sys.Date(),".csv"))
}

[1] "ABM run start time : 2022-01-05 22:41:04"
[1] "ABM run end time : 2022-01-05 23:02:02"


In [11]:
NLQuit(all = TRUE)