In [9]:
read_log <- function(date) {
  # Load required packages
  library(jsonlite)
  
  # Convert date to string in 'YYYY-MM-DD' format
  date <- format(date, "%Y-%m-%d")
  
  # Read JSON file
  file_path <- paste0('log/log_', date, '.json')
  data <- fromJSON(file_path)

  # get the percentage of approved credits
  n_inferences <- length(data)
  approved  <- sum(data$output == 2)
  percentage <- as.integer(approved / n_inferences * 100)
  
  return(list(inferences = data, n_inferences = n_inferences, percentage = percentage))
}


In [10]:
gender_credits <- function(data) {
  # Extract Gender information from data
  genders <- sapply(data$input, function(sublist) sublist$Gender)
  outputs <- unlist(data$output)

  # Get the corresponding outputs for "male" and "female"
  male_amounts <- unlist(outputs[genders == "male"])
  female_amounts <- unlist(outputs[genders == "female"])

  # Count the occurrences of each amount for males and females
  male_counts <- table(male_amounts)
  female_counts <- table(female_amounts)

  # Calculate the total number of males and females
  total_male <- sum(male_counts)
  total_female <- sum(female_counts)

  # Calculate the values porcentually
  

  # Set up the plot
  png("gender.png", width=800, height=400)  # Adjust width and height as needed
  par(mfrow=c(1, 2), mar=c(1, 1, 3, 3))  # Set up the layout and margins

  # Plot for males
  pie(male_counts, main="Male", col=c("#9966ff", "#99ccff"), labels=c("Denied", "Accepted"))
  legend("topright", legend = c("Denied", "Accepted"), fill = c("#9966ff", "#99ccff"))
  text(x=0, y=-0.9, labels=paste("Total Males:", total_male), cex=0.8)

  # Plot for females
  pie(female_counts, main="Female", col=c("#ff6666", "#ff99cc"), labels=c("Denied", "Accepted"))
  legend("topright", legend = c("Denied", "Accepted"), fill = c("#ff6666", "#ff99cc"))
  text(x=0, y=-0.9, labels=paste("Total Females:", total_female), cex=0.8)

  # Save the plot
  dev.off()
}


In [11]:
gender_credits <- function(data) {
  # Extract Gender information from data
  genders <- sapply(data$input, function(sublist) sublist$Gender)
  outputs <- unlist(data$output)

  # Get the corresponding outputs for "male" and "female"
  male_amounts <- unlist(outputs[genders == "male"])
  female_amounts <- unlist(outputs[genders == "female"])

  # Count the occurrences of each amount for males and females
  male_counts <- table(male_amounts)
  female_counts <- table(female_amounts)

  # Calculate the total number of males and females
  total_male <- sum(male_counts)
  total_female <- sum(female_counts)

  # Calculate the values proportionally
  male_percentages <- prop.table(male_counts) * 100
  female_percentages <- prop.table(female_counts) * 100

  # Set up the plot
  png("tex/media/gender.png", width=800, height=400)  # Adjust width and height as needed
  par(mfrow=c(1, 2), mar=c(1, 1, 3, 3))  # Set up the layout and margins

  # Plot for males
  pie(male_counts, main="Male", col=c("#9966ff", "#99ccff"), labels=c("Denied", "Accepted"))
  legend("topright", legend = c("Denied", "Accepted"), fill = c("#9966ff", "#99ccff"))
  text(x=0, y=-0.9, labels=paste("Total Males:", total_male, "(", round(male_percentages[1], 2), "% Denied, ", round(male_percentages[2], 2), "% Accepted)"), cex=0.8)

  # Plot for females
  pie(female_counts, main="Female", col=c("#ff6666", "#ff99cc"), labels=c("Denied", "Accepted"))

  # Add rectangle to indicate protected attribute
  rect(xleft = -3.5, xright = 3.5, ybottom = -3.5, ytop = 3.5, col = rgb(0, 1, 0, 0.1), border = NA)

  legend("topright", legend = c("Denied", "Approved", "Protected Attribute"), fill = c("#ff6666", "#99ff99", rgb(0, 1, 0, 0.2)), title = "Legend")
  text(x=0, y=-0.9, labels=paste("Total Females:", total_female, "(", round(female_percentages[1], 2), "% Denied, ", round(female_percentages[2], 2), "% Accepted)"), cex=0.8)
  
  # Save the plot
  dev.off()
}


In [12]:
foreign_credits <- function(data) {
  # Extract marital status and outputs from data
  categories <- sapply(data$input, function(sublist) sublist$`Foreign.worker`)
  outputs <- unlist(data$output)

  # Replace '/' with newline character
  categories <- gsub("/", "\n", categories)
  
  # Create a data frame with categories and outputs
  data_df <- data.frame(Category = categories, Output = outputs)

  # Count occurrences of each output value for each category
  count_data <- table(data_df$Category, data_df$Output)

  # Transpose the data for plotting
  count_data <- t(count_data)

  # Calculate row sums (totals) for percentages
  row_sums <- apply(count_data, 1, sum)

  # Calculate percentages
  count_data_percent <- prop.table(count_data, margin = 1) * 100

  png("tex/media/foreign.png", width=400, height=400)  # Adjust width and height as needed
  # Plot the barplot
  barplot(as.matrix(count_data_percent), beside = TRUE, 
          col = c("#ff6666", "#99ff99"), 
          main = "Percentage of outputs by category",
          xlab = "Category", ylab = "Percentage",
          legend = c("Denied", "Approved"))
  
  # Add rectangle to indicate protected attribute
  rect(xleft = 8.75 , xright = 3.75, ybottom = -5, ytop = 100, col = rgb(0, 1, 0, 0.2), border = NA)
  
  # Add protected attribute to legend
  legend("topright", legend = c("Denied", "Approved", "Protected Attribute"), fill = c("#ff6666", "#99ff99", rgb(0, 1, 0, 0.2)), title = "Legend")

  dev.off()
}


In [13]:
marital_credits <- function(data) {
  # Extract marital status and outputs from data
  categories <- sapply(data$input, function(sublist) sublist$`Marital.Status`)
  outputs <- unlist(data$output)

  # Replace '/' with newline character
  categories <- gsub("/", "\n", categories)
  
  # Create a data frame with categories and outputs
  data_df <- data.frame(Category = categories, Output = outputs)

  # Count occurrences of each output value for each category
  count_data <- table(data_df$Category, data_df$Output)

  # Transpose the data for plotting
  count_data <- t(count_data)

  # Calculate column-wise percentages
  col_percentages <- prop.table(count_data, margin = 2) * 100

  png("tex/media/marital.png", width=600, height=400)  # Adjust width and height as needed
  par(mar=c(5, 4, 4, 6))  # Adjust the margin to accommodate the legend

  # Plot the barplot with percentages
  barplot(as.matrix(col_percentages), beside = TRUE, 
          col = c("#ff6666", "#99ff99"), 
          main = "Percentage of outputs by marital status",
          xlab = "Marital Status", ylab = "Percentage",
          legend = c("Denied", "Approved"))

  # Add rectangles to indicate protected attribute
  rect(xleft = 0, xright = 3.5, ybottom = 0, ytop = 100, col = rgb(0, 1, 0, 0.2), border = NA)
  rect(xleft = 6.5, xright = 10, ybottom = 0, ytop = 100, col = rgb(0, 1, 0, 0.2), border = NA)

  # Add protected attribute to legend
  legend("topright", legend = c("Denied", "Approved", "Protected Attribute"), fill = c("#ff6666", "#99ff99", rgb(0, 1, 0, 0.2)), title = "Legend")

  dev.off()
}

In [14]:
job_credits <- function(data) {
  # Extract job categories and outputs from data
  categories <- sapply(data$input, function(sublist) sublist$Job)
  outputs <- unlist(data$output)

  # Replace spaces with newline character
  categories <- gsub(" ", "\n", categories)
  
  # Create a data frame with categories and outputs
  data_df <- data.frame(Category = categories, Output = outputs)

  # Count occurrences of each output value for each category
  count_data <- table(data_df$Category, data_df$Output)

  # Transpose the data for plotting
  count_data <- t(count_data)

  # Calculate row-wise percentages
  row_percentages <- prop.table(count_data, margin = 1) * 100

  png("tex/media/jobs.png", width=600, height=400)  # Adjust width and height as needed
  par(mar=c(5, 4, 4, 6))  # Adjust the margin to accommodate the legend

  # Plot the barplot with percentages
  barplot(as.matrix(row_percentages), beside = TRUE, 
          col = c("#ff6666", "#99ff99"), 
          main = "Percentage of outputs by job category",
          xlab = "Job Category", ylab = "Percentage")

  # Add rectangles to indicate protected attribute
  rect(xleft = 6.5, xright = 9.5, ybottom = 0, ytop = 100, col = rgb(0, 1, 0, 0.2), border = NA)
  rect(xleft = 9.5, xright = 15, ybottom = 0, ytop = 100, col = rgb(0, 1, 0, 0.2), border = NA)

  # Add protected attribute to legend in a margin
  legend("topright", legend = c("Denied", "Approved", "Protected Attribute"), fill = c("#ff6666", "#99ff99", rgb(0, 1, 0, 0.2)), title = "Legend", xpd = TRUE)

  dev.off()
}


In [15]:
credit_inference <- function(data) {
  dates <- unlist(data$time)
  outputs <- as.numeric(unlist(data$output))

  # Replace ' ' with newline character in dates
  dates <- gsub(" ", "\n", dates)

  # Define colors based on output values
  colors <- ifelse(outputs == 1, "#ff6666", "#99ff99")

  # Create the bar chart
  png("tex/media/sequence.png", width=800, height=400)
  barplot(outputs, names.arg=dates, col=colors, 
          xlab='Time', ylab='Denied or Approved', 
          main='Inferences over time')

  # Update legend
  legend("topright", legend=c("Denied", "Approved"), fill=c("#ff6666", "#99ff99"))

  dev.off()
}


In [16]:
date <- as.Date("2024-03-20")
read_data <- read_log(date)
gender_credits(read_data$inferences)
foreign_credits(read_data$inferences)
marital_credits(read_data$inferences)
job_credits(read_data$inferences)
credit_inference(read_data$inferences)