##Question 1 - 2##

In [30]:
# Imports
import numpy as np

# Prepare to record the result:
min_arr = []
rand_arr = []
fst_arr = []

# Run experiments
for _ in range(100000):
  # Create Matrix to store coin flip results
  coin_mat = np.random.choice([0, 1], (1000,10))

  coin_frac = np.sum(coin_mat, axis = 1)/10

  v_min = np.min(coin_frac)
  v_rand = coin_frac[np.random.randint(0, 1000)]
  v_1 = coin_frac[0]

  min_arr.append(v_min)
  rand_arr.append(v_rand)
  fst_arr.append(v_1)

print("Average values for v_1, v_rand, v_min are:",
      np.mean(fst_arr), np.mean(rand_arr), np.mean(min_arr))

Average values for v_1, v_rand, v_min are: 0.49985700000000005 0.4993109999999999 0.03776


As we can see from the simulation above, the average value of $\nu_{min}$ is cloest to $0.01$.   
Hence, I choose $[b]$ for Question $1$.  
$c_1, c_{rand}$ satisfies the Hoeffding Inequality because both of them can be considered as an experiment of flipping a single coin (as each coin flip is independent), which is performed $100000$ times. Hence, for them, $\mathbb{P}(|\nu-μ|>ϵ)$ can be effectively bounded by the Hoeffding Inequality.  
However, $c_{min}$ in this case, represents the worst case senario. Since we are considering the minumum of $1000$ coins, we have to consider the union of all of their probabilities. Hence, we can only use the "multiple bin" version of the Hoeffding Inequality to bound the probability of $|\nu - \mu|>ϵ$ in this case.
Hence, I choose $[d]$ for Question $2$.

##Question 3 - 4##
For question $3$, $h$ is going to make an error in the following situation:
  - $A$: $h(\mathrm{x})$ approximates $f(\mathrm{x})$ correctly, but $y\neq f(\mathrm{x})$
  - $B$: $h(\mathrm{x})$ approximates $f(\mathrm{x})$ incorrectly, but $y = f(\mathrm{x})$

Here, we have:
$$\mathbb{P}(A) = (1-\mu)(1-\lambda), \mathbb{P}(B) = \mu \lambda$$
Here $\mathbb{P}(A\cap B) = 0$ because the approximation of $h$ can't be both correct and incorrect. Hence, the probability of making an error is as follows:
$$\mathbb{P}(Error) = \mathbb{P}(A) + \mathbb{P}(B) + 0 = (1-\mu)(1-\lambda) + \mu \lambda$$
Therefore, I choose $[e]$ for Question $3$.  
For question $4$, consider the previous expression of $\mathbb{P}(Error)$,when $\mu = \frac{1}{2}$, we will have the following expression:
$$\mathbb{P}(Error) = (1-\frac{1}{2})(1-\lambda) + \frac{1}{2} \lambda = \frac{1}{2}(1 - λ) + \frac{1}{2}λ = \frac{1}{2}$$
In this case, the probability for $h$ to make an error is $\frac{1}{2}$, regardless of what $\lambda$ is.  
Hence, I choose $[b]$ for Question $4$.


##Question 5 - 6##

In [31]:
# Store E_in and E_out in different experiments
in_arr = []
out_arr = []
N = 100

# Experiment (1000 runs)
for _ in range(1000):
  # Generate lines and samples
  point_head = np.random.uniform(-1, 1, 2)
  point_tail = np.random.uniform(-1, 1, 2)
  a = point_head[1] - point_tail[1]
  b = point_tail[0] - point_head[0]
  c = a * point_head[0] + b * point_head[1]
  sample_points = np.random.uniform(-1, 1, (N, 2))
  dummy_points = np.c_[sample_points[:, 0], sample_points[:, 1], np.ones(N)]
  coeffs = np.array([a, b, c])

  # Create correct classification
  correct_classification = np.sign(np.dot(dummy_points, coeffs))

  # Linear regression
  g_coeff = np.linalg.lstsq(dummy_points, correct_classification, rcond = None)[0]

  # Compute in-sample performance
  approx_result = np.sign(np.dot(dummy_points, g_coeff))
  E_in = np.sum(approx_result != correct_classification)/N
  in_arr.append(E_in)

  # Compute Out-of-sample performance
  new_points = np.random.uniform(-1, 1, (1000, 2))
  new_dummy = np.c_[new_points[:, 0], new_points[:, 1], np.ones(1000)]
  actual_result = np.sign(np.dot(new_dummy, coeffs))
  g_result = np.sign(np.dot(new_dummy, g_coeff))
  E_out = np.sum(g_result != actual_result)/1000
  out_arr.append(E_out)

print("Average E_in when N = ", N, ":",np.mean(in_arr))
print("Average E_out when N = ", N, ":",np.mean(out_arr))


Average E_in when N =  100 : 0.03937
Average E_out when N =  100 : 0.048192000000000006


Based on the simulation result, I choose $[c]$ for Question $5$, and $[c]$ for Question $6$.

##Question 7##

In [32]:
# Set N = 10
N = 10
num_iter = []
for _ in range(1000):
  # Generate lines and samples
  point_head = np.random.uniform(-1, 1, 2)
  point_tail = np.random.uniform(-1, 1, 2)
  a = point_head[1] - point_tail[1]
  b = point_tail[0] - point_head[0]
  c = a * point_head[0] + b * point_head[1]
  sample_points = np.random.uniform(-1, 1, (N, 2))
  dummy_points = np.c_[sample_points[:, 0], sample_points[:, 1], np.ones(N)]
  coeffs = np.array([a, b, c])

  # Create correct classification
  correct_classification = np.sign(np.dot(dummy_points, coeffs))

  # Linear regression
  g_coeff = np.linalg.lstsq(dummy_points, correct_classification, rcond = None)[0]

  # Start PLA
  w = g_coeff
  iter = 0
  flag = False
  while flag == False:
    iter += 1
    # Perceptron
    h_x = np.sign(np.dot(dummy_points, w))

    # Check for misclassification
    mis_idx = []
    for i in range(N):
      if h_x[i] != correct_classification[i]:
        mis_idx.append(i)
    # Exit the run if all points are correctly classified
    if len(mis_idx) == 0:
      flag = True
    else:
      idx = np.random.choice(mis_idx)
      w += correct_classification[idx] * dummy_points[idx]
  num_iter.append(iter)

print("The average number of iterations it takes to converge is:", np.mean(num_iter))

The average number of iterations it takes to converge is: 6.679


Based on the simulation result, I choose $[a]$ for Question $7$.

##Question 8 - 9##


In [33]:
# Define the target function
target_f = lambda x: np.sign(x[0] ** 2 + x[1] ** 2 - 0.6)

# Stores E_in in different experiments
in_arr = []

# Stores the coefficients of candidate functions in the question
prob_arr = np.zeros(5)
coeff_arr = np.array([[-1, -0.05, 0.08, 0.13, 1.5, 1.5],
                      [-1, -0.05, 0.08, 0.13, 1.5, 15],
                      [-1, -0.05, 0.08, 0.13, 15, 1.5],
                      [-1, -1.5, 0.08, 0.13, 0.05, 0.05],
                      [-1, -0.05, 0.08, 1.5, 0.15, 0.15]])
# Perform experiments
for _ in range(1000):
  sample_points = np.random.uniform(-1, 1, (1000, 2))
  correct_output = np.array([target_f(sample_points[i]) for i in range(1000)])
  noised_index = np.random.choice(range(1000), 100, replace = False)
  noised_output = np.array([(-correct_output[i] if i in noised_index else correct_output[i]) for i in range(1000)])

  training_set = np.c_[np.ones(1000), sample_points[:,0], sample_points[:,1]]
  reg_w = np.linalg.lstsq(training_set, noised_output, rcond = None)[0]
  reg_output = np.sign(np.dot(training_set, reg_w))

  E_in = np.sum(reg_output != noised_output)/1000
  in_arr.append(E_in)

  # Non-linear transform
  transferred_set = np.c_[np.ones(1000), sample_points[:,0],
                          sample_points[:,1], sample_points[:,0] * sample_points[:,1],
                          sample_points[:,0] ** 2, sample_points[:,1] ** 2]

  reg_w_tilde = np.linalg.lstsq(transferred_set, noised_output, rcond = None)[0]
  nonlinear_result = np.sign(np.dot(transferred_set, reg_w_tilde))

  # Compare results
  for j in range(0, 5):
    result = np.sign(np.dot(transferred_set, coeff_arr[j]))
    prob_arr[j] += np.mean(nonlinear_result == result)



print("Average E_in:", np.mean(in_arr))
print("Average Probability for each set of coefficients:", prob_arr/1000)


Average E_in: 0.5046430000000001
Average Probability for each set of coefficients: [0.961738 0.663712 0.663259 0.630723 0.559973]


Based on the simulation result, I choose $[d]$ for Question $8$, $[a]$ for Question $9$.

##Question 10##

In [34]:
# Prepare to store E_out
out_arr = []

# Perform 1000 experiments
for _ in range(1000):
  sample_points = np.random.uniform(-1, 1, (1000, 2))
  correct_output = np.array([target_f(sample_points[i]) for i in range(1000)])
  noised_index = np.random.choice(range(1000), 100, replace = False)
  noised_output = np.array([(-correct_output[i] if i in noised_index else correct_output[i]) for i in range(1000)])
  testing_set = np.c_[np.ones(1000), sample_points[:,0],
                          sample_points[:,1], sample_points[:,0] * sample_points[:,1],
                          sample_points[:,0] ** 2, sample_points[:,1] ** 2]
  reg_out = np.sign(np.dot(testing_set, reg_w_tilde))
  E_out = np.sum(reg_out != noised_output)/1000
  out_arr.append(E_out)

print("Average out-of-sample performance:", np.mean(out_arr))

Average out-of-sample performance: 0.120999


Based on the simulation result, I choose $[b]$ for Question $10$.