In [1]:
import numpy as np

# --- Probabilities Extracted from the Tree Diagram (prob_tree_diagram.png) ---

# Prior probabilities of manufacturing processes
P_A = 0.5
P_B = 0.3
P_C = 0.2

# Conditional probabilities of being defective (D) or not defective (Not D)
# P(D|A): Probability of being defective given it came from process A
P_D_given_A = 0.03
P_NotD_given_A = 0.97

P_D_given_B = 0.02
P_NotD_given_B = 0.98

P_D_given_C = 0.04
P_NotD_given_C = 0.96

print("Probabilities defined and ready for Bayes' Theorem calculations.")

Probabilities defined and ready for Bayes' Theorem calculations.


In [2]:
# --- Calculation for 1(a) ---

# Step 1: Calculate the Total Probability of Defective, P(D)
P_D = (P_D_given_A * P_A) + \
      (P_D_given_B * P_B) + \
      (P_D_given_C * P_C)

# Step 2: Calculate P(A|D) using Bayes' Theorem
P_A_given_D = (P_D_given_A * P_A) / P_D

print("--- Answer 1(a) ---")
print("Question: If a randomly chosen graphics card is defective, what is the probability it was manufactured using Process A?")
print(f"1. Total Probability of Defective, P(D): {P_D:.4f}")
print(f"2. Probability P(A|D): {P_A_given_D:.4f}")
print(f"\nAnswer: The probability that a defective card was manufactured using Process A is approximately {P_A_given_D:.4f}.")

--- Answer 1(a) ---
Question: If a randomly chosen graphics card is defective, what is the probability it was manufactured using Process A?
1. Total Probability of Defective, P(D): 0.0290
2. Probability P(A|D): 0.5172

Answer: The probability that a defective card was manufactured using Process A is approximately 0.5172.


In [3]:
# --- Calculation for 1(b) ---

# We reuse the P_D value from 1(a): P_D = 0.0290 (or P_D = 0.029 based on the calculation)
P_D = 0.0290 

# Step 1: Calculate the Total Probability of Not Defective, P(Not D)
P_NotD = 1 - P_D

# Step 2: Calculate P(C|Not D) using Bayes' Theorem
# P(NotD_given_C = 0.96)
P_C_given_NotD = (P_NotD_given_C * P_C) / P_NotD

print("--- Answer 1(b) ---")
print("Question: If a randomly chosen graphics card is not defective, what is the probability it was manufactured using Process C?")
print(f"1. Total Probability of Not Defective, P(Not D): {P_NotD:.4f}")
print(f"2. Probability P(C|Not D): {P_C_given_NotD:.4f}")
print(f"\nAnswer: The probability that a non-defective card was manufactured using Process C is approximately {P_C_given_NotD:.4f}.")

--- Answer 1(b) ---
Question: If a randomly chosen graphics card is not defective, what is the probability it was manufactured using Process C?
1. Total Probability of Not Defective, P(Not D): 0.9710
2. Probability P(C|Not D): 0.1977

Answer: The probability that a non-defective card was manufactured using Process C is approximately 0.1977.


In [4]:
# --- Function for Problem 2 ---

def entropy_function(probabilities):
    """
    Calculates the entropy (in bits) of a discrete probability distribution.
    H(X) = - sum(pi * log2(pi))
    """
    P = np.array(probabilities)

    # Filter out probabilities that are zero (0 * log(0) is treated as 0)
    P = P[P > 0]

    # Calculate the entropy using the formula
    # np.log2() is used for the log base 2
    entropy = -np.sum(P * np.log2(P))

    return entropy

print("--- Answer 2 ---")
print("Question: Create a Python function that takes in an array of probabilities and returns the entropy.")
print("Answer: The Python function 'entropy_function(probabilities)' has been successfully defined, using numpy to handle the entropy calculation with log base 2.")

--- Answer 2 ---
Question: Create a Python function that takes in an array of probabilities and returns the entropy.
Answer: The Python function 'entropy_function(probabilities)' has been successfully defined, using numpy to handle the entropy calculation with log base 2.


In [5]:
# --- Calculation for 3(a) ---

# Probabilities for Random Variable X
P_X = [0.2, 0.2, 0.2, 0.2, 0.2]

# Probabilities for Random Variable Y
P_Y = [0.1, 0.4, 0.1, 0.3, 0.1]

# Calculate the entropy for X and Y using the defined function
H_X = entropy_function(P_X)
H_Y = entropy_function(P_Y)

print("--- Answer 3(a) ---")
print("Question: Use the function created in the previous problem to calculate the entropies of X and Y.")
print(f"1. Entropy of X, H(X): {H_X:.4f} bits")
print(f"2. Entropy of Y, H(Y): {H_Y:.4f} bits")
print(f"\nAnswer: The entropy of X is approximately {H_X:.4f} bits, and the entropy of Y is approximately {H_Y:.4f} bits.")

--- Answer 3(a) ---
Question: Use the function created in the previous problem to calculate the entropies of X and Y.
1. Entropy of X, H(X): 2.3219 bits
2. Entropy of Y, H(Y): 2.0464 bits

Answer: The entropy of X is approximately 2.3219 bits, and the entropy of Y is approximately 2.0464 bits.


In [6]:
# --- Answer 3(b) ---

# Re-using the calculated entropy values from 3(a)
H_X = 2.3219 
H_Y = 2.0464 

print("--- Answer 3(b) ---")
print("Question: Compare the two values found in part (a). Which one is bigger? Explain intuitively why this is the case.")
print(f"1. Comparison: The entropy of X, H(X) = {H_X:.4f} bits, is **bigger** than the entropy of Y, H(Y) = {H_Y:.4f} bits.")
print("\nAnswer: H(X) is bigger because the uniform distribution of X represents the maximum possible uncertainty (all outcomes equally likely), while the non-uniform distribution of Y is more predictable and therefore has lower entropy.")

--- Answer 3(b) ---
Question: Compare the two values found in part (a). Which one is bigger? Explain intuitively why this is the case.
1. Comparison: The entropy of X, H(X) = 2.3219 bits, is **bigger** than the entropy of Y, H(Y) = 2.0464 bits.

Answer: H(X) is bigger because the uniform distribution of X represents the maximum possible uncertainty (all outcomes equally likely), while the non-uniform distribution of Y is more predictable and therefore has lower entropy.


I've completed the exercise for the Math in Data Science course, applying Bayes' Theorem to calculate conditional probabilities and implementing the Entropy function to measure a distribution's uncertainty. This work confirms my understanding that uniform distributions possess the highest entropy due to maximum randomness, while non-uniform distributions are more predictable and have lower entropy.