## Question 2.a

In [None]:
estimate_test_size <- function(m = 95, n = 100, p = 0.5, q = 0.5, alpha = 0.1, num_simulations = 10000) {
  # estimate the empircial size of the chi-squared test for independence
  rejections <- 0
  
  for (i in 1:num_simulations) {
    # Simulate data
    S_x <- rbinom(1, m, p)
    S_y <- rbinom(1, n, q)
    F_x <- m - S_x
    F_y <- n - S_y
    
    # Contingency table
    table <- matrix(c(S_x, F_x, S_y, F_y), nrow = 2, byrow = TRUE)
    
    # Perform chi-squared test
    test <- suppressWarnings(chisq.test(table, correct = FALSE))
    
    # Reject H0 if p-value < alpha
    if (test$p.value < alpha) {
      rejections <- rejections + 1
    }
  }
  
  empirical_size <- rejections / num_simulations
  return(empirical_size)
}

In [12]:
m <- 95
n <- 100
pi_vals <- seq(0.1, 0.9, by = 0.1)
alpha <- 0.1
num_simulations <- 10000

empirical_sizes <- numeric(length(pi_vals))
for (i in seq_along(pi_vals)) {
  empirical_sizes[i] <- estimate_test_size(m, n, pi_vals[i], pi_vals[i], alpha, num_simulations)
  cat(sprintf("True probability (pi): %.2f, Empirical size: %.4f\n", pi_vals[i], empirical_sizes[i]))
}

True probability (pi): 0.10, Empirical size: 0.1018
True probability (pi): 0.20, Empirical size: 0.1009
True probability (pi): 0.30, Empirical size: 0.1059
True probability (pi): 0.40, Empirical size: 0.0942
True probability (pi): 0.50, Empirical size: 0.1033
True probability (pi): 0.60, Empirical size: 0.0941
True probability (pi): 0.70, Empirical size: 0.0980
True probability (pi): 0.80, Empirical size: 0.1067
True probability (pi): 0.90, Empirical size: 0.1043


## Question 2.b

Here I will find the m that minimize the function estimate_test_size with 10 000 simulations.

In [None]:
f_m <- function(m) {
  abs(estimate_test_size(m = round(m), n = round(m), p = 0.525, q = 0.475, alpha = 0.1, num_simulations = 50000) - 0.85)
}

result <- optimize(f_m, interval = c(100, 2000), maximum = FALSE)
cat(sprintf("Optimal m: %.2f\n", ceiling(result$minimum)))

Optimal m: 1448.00


## Question 2.c

In [20]:
f_c <- function(c) {
  # estimate the empircial size of the chi-squared test for independence
  rejections <- 0
  num_simulations <- 50000
  m <- 95
  n <- 100

  pi_vals <- seq(0.1, 0.9, by = 0.1)
  empirical_sizes <- numeric(length(pi_vals))

  for (j in seq_along(pi_vals)) {
    # initialize the probability values
    p <- pi_vals[j]
    q <- pi_vals[j]

    # reset the rejection count for each probability value
    rejections <- 0

    for (i in 1:num_simulations) {
      # Simulate data
      S_x <- rbinom(1, m, p)
      S_y <- rbinom(1, n, q)
      F_x <- m - S_x
      F_y <- n - S_y
      
      # Contingency table
      table <- matrix(c(S_x, F_x, S_y, F_y), nrow = 2, byrow = TRUE)
      
      # Perform chi-squared test
      test <- suppressWarnings(chisq.test(table, correct = FALSE))
      
      # Reject H0 if test statistic is greater than c
      if (test$statistic >= c) {
        rejections <- rejections + 1
      }
    }
    empirical_sizes[j] <- rejections / num_simulations
  }

  # Calculate the empirical size (we need it to be as close as possible to 0.1)
  to_optimize <- abs(max(empirical_sizes) - 0.1)
  return(to_optimize)
}

# optimize the critical value c such that the empirical size is 0.1
c_hat = optimize(f_c, interval = c(2.5, 2.9), maximum = FALSE)

In [21]:
cat(sprintf("Optimal critical value c: %.4f\n", c_hat$minimum))
cat(f_c(c_hat$minimum))

Optimal critical value c: 2.7452


0.00334

For 50 000 simulations:

- Optimal critical value is $c = 2.7452$.
- The size of the test is $0.1 \pm 0.0033$.