 # Challenge generation

## 1. Import required libraries

In [1]:
if(!is.element("devtools", installed.packages()[,1])){
  install.packages("devtools")
}

if(!is.element("mongolite", installed.packages()[,1]) ){
  install.packages("mongolite", dependencies = T)
}

if(!is.element("uuid", installed.packages()[,1])){
  install.packages("uuid")
}


library(mongolite)
library(uuid)

## 2. Define constants

In [2]:
avg_response_time <- list(
        "SMS" = 30,
        "EMAIL" = 120,
        "OTP" = 180,
        "ID" = 240
)
protocols <- c("HTTP","HTTPS")
number_prefixes <- c("+32","+33","+34","+212","+213")
age_groups <- c("<20", "20-24", "25-29", "30-34", "35-39", "40-44", "45-49", "50-54", "55-59", ">=60")
attempt_count_probs <- list(
   "<20" = c(0.9, 0.05, 0.03, 0.02),
   "20-24" = c(0.85, 0.1, 0.03, 0.02),
   "25-29" = c(0.75, 0.2, 0.03, 0.02),
   "30-34" = c(0.6, 0.3, 0.05, 0.05),
   "35-39" = c(0.5, 0.4, 0.05, 0.05),
   "40-44" = c(0.4, 0.3, 0.2, 0.1),
   "45-49" = c(0.3, 0.4, 0.2, 0.1),
   "50-54" = c(0.2, 0.4, 0.3, 0.1),
   "55-59" = c(0.1, 0.3, 0.4, 0.2),
   ">=60" = c(0.05, 0.2, 0.5, 0.25)
)
attempt_counts <- c(1, 2, 3, 4)

## 3. Generate challenges

In [3]:
generate_birth_date <- function(n) {
  # Générer des années de naissance aléatoires entre 1970 et 2005
  annees <- sample(1930:2005, n, replace = TRUE)

  # Générer des mois de naissance aléatoires entre janvier et décembre
  mois <- sample(1:12, n, replace = TRUE)

  # Générer des jours de naissance aléatoires entre 1 et 28
  jours <- sample(1:28, n, replace = TRUE)

  # Générer des heures de naissance aléatoires entre 0 et 23
  heures <- sample(0:23, n, replace = TRUE)

  # Générer des minutes de naissance aléatoires entre 0 et 59
  minutes <- sample(0:59, n, replace = TRUE)

  # Générer des secondes de naissance aléatoires entre 0 et 59
  secondes <- sample(0:59, n, replace = TRUE)

  # Combiner les années, mois, jours, heures, minutes et secondes en un vecteur de dates-temps
  ClientBirthDate <- as.POSIXct(paste(annees, mois, jours, heures, minutes, secondes, sep = "-"), format = "%Y-%m-%d-%H-%M-%S")
  return (as.data.frame(ClientBirthDate))
}

In [4]:
generate_request_time <- function(n) {
  start_date <- as.POSIXct("2023-01-01 00:00:00")
  end_date <- Sys.time()
  return(as.POSIXct(start_date + runif(n, 0, difftime(end_date, start_date, units = "secs")), origin = "1970-01-01"))
}

In [5]:
generate_response_time <- function(request_time, challenge) {
  delay <- rnorm(n = 1, mean = as.numeric(avg_response_time[challenge]), sd = as.numeric(avg_response_time[challenge]) / 5)
  return (request_time + as.difftime(delay, units = "secs"))
}

In [6]:
generate_phone_prefix <- function(challenge) {
  if(challenge == "SMS") {
    return (sample(number_prefixes, size = 1,replace = TRUE))
  } else {
    return (NA)
  }
}

In [7]:
generate_protocol <- function(challenge) {
  if(challenge == "EMAIL") {
    return (sample(sample(protocols ,1,replace = TRUE, prob=c(0.1, 0.9))))
  } else {
    return (NA)
  }
}

In [8]:
generate_decision <- function(challenge, phone_prefix, protocol, attempt_count) {
  if(attempt_count > 3) {
    return ("Refused")
  } else if (challenge == "SMS" && (phone_prefix == "+212" || phone_prefix == "+213")) {
    return ("Refused")
  } else if (challenge == "EMAIL" && protocol == "HTTP") {
    return ("Refused")
  }
  return ("Accepted")
}

In [9]:
bounds <- c(0, 20, 24, 29, 34, 39, 44, 49, 54, 59, Inf)

generate_attempt_count <- function(age) {
  age_group <- cut(age, breaks = bounds, labels = age_groups)
  attempt_count <- sample(attempt_counts, size=1, replace = TRUE, prob=unlist(attempt_count_probs[age_group]))
  return(attempt_count)
}

In [10]:
temp_data <- read.csv("../workingsets/temp_age_gender_status_salary_bank_activity_amount_challenge.csv", header=TRUE)

Id <- unlist(lapply(seq_len(nrow(temp_data)), function(i) { UUIDgenerate() }))
ChallengeId <- unlist(lapply(seq_len(nrow(temp_data)), function(i) { UUIDgenerate() }))
BankId <- temp_data$bank
ClientId <- unlist(lapply(seq_len(nrow(temp_data)), function(i) { UUIDgenerate() }))
ClientBirthDate <- generate_birth_date(nrow(temp_data))
Age <- temp_data$age
ChallengeType <- temp_data$challenge
RequestTime <- generate_request_time(nrow(temp_data))
ResponseTime <- as.POSIXct(unlist(lapply(seq_len(nrow(temp_data)), function(i) { generate_response_time(RequestTime[i], temp_data$challenge[i]) })))
DecisionTime <- ResponseTime + as.difftime(rnorm(n = nrow(temp_data), mean = 3, sd = 1), units = "secs")
RequestTime <- as.data.frame(RequestTime)
ResponseTime <- as.data.frame(ResponseTime)
DecisionTime <- as.data.frame(DecisionTime)
Gender <- temp_data$gender
PhonePrefix <- unlist(lapply(seq_len(nrow(temp_data)), function(i) { generate_phone_prefix(temp_data$challenge[i]) }))
Protocol <- unlist(lapply(seq_len(nrow(temp_data)), function(i) { generate_protocol(temp_data$challenge[i]) }))
AttemptCount <- unlist(lapply(seq_len(nrow(temp_data)), function(i) { generate_attempt_count(temp_data$age[i]) }))
MaxAttemptsReached <- unlist(lapply(seq_len(nrow(temp_data)), function(i) { if(AttemptCount[i] > 3) { return ("TRUE")} else { return ("FALSE")} }))
Decision <- unlist(lapply(seq_len(nrow(temp_data)), function(i) { generate_decision(temp_data$challenge[i], PhonePrefix[i], Protocol[i], AttemptCount[i]) }))

## 3. Export challenges

In [11]:
challenges <- data.frame(Id,ChallengeId,BankId,ClientId,ClientBirthDate,Age,ChallengeType,RequestTime,ResponseTime,DecisionTime,Gender,Decision, PhonePrefix, Protocol, AttemptCount, MaxAttemptsReached)
write.csv(challenges, file = "../workingsets/challenges.csv")

con = mongo(collection = "BD_OPER_PROC_AUTH",db = "bandit-nbs",url = "mongodb://bandit:KQVYJnxcMXvQ5joLef524V97zRkMMT3N@localhost:27018")
con$insert(challenges)

List of 5
 $ nInserted  : num 7980
 $ nMatched   : num 0
 $ nRemoved   : num 0
 $ nUpserted  : num 0
 $ writeErrors: list()