# Solutions

In [3]:
# Activation (sigmoid) function
import numpy as np 
def sigmoid(x):
    return 1 / (1 + np.exp(-np.array(x, dtype=float)))

# Output (prediction) formula
def output_formula(features, weights, bias):
    return sigmoid(np.dot(features, weights) + bias)

# Error (log-loss) formula
def error_formula(y, output):
    return - y*np.log(output) - (1 - y) * np.log(1-output)

# Gradient descent step
def update_weights(x, y, weights, bias, learnrate):
    output = output_formula(x, weights, bias)
    d_error = y - output
    weights += learnrate * d_error * x
    bias += learnrate * d_error
    return weights, bias

In [5]:
import numpy as np

# --- Function Definitions ---
def sigmoid(x):
    """Activation (sigmoid) function."""
    return 1 / (1 + np.exp(-np.array(x, dtype=float)))

def output_formula(features, weights, bias):
    """Output (prediction) formula."""
    return sigmoid(np.dot(features, weights) + bias)

def error_formula(y, output):
    """Error (log-loss) formula."""
    return - y*np.log(output) - (1 - y) * np.log(1-output)

# --- Initial Setup ---
print("###### Numerical Example: Binary Classification Learning ######\n")
print("Problem Setup:")
print("Classify whether a student passes (1) or fails (0) based on study hours.\n")

# Training Data
student_1_x, student_1_y = np.array([2.0]), 0
student_2_x, student_2_y = np.array([8.0]), 1
print(f"Student 1: x = {student_1_x}, y = {student_1_y} (2 hours study, failed)")
print(f"Student 2: x = {student_2_x}, y = {student_2_y} (8 hours study, passed)\n")

# Initial Parameters
weights = np.array([0.5])
bias = 0.1
learnrate = 0.1
print("Initial Parameters:")
print(f"weights = {weights}, bias = {bias}, learning_rate = {learnrate}\n")
print("-" * 50)


# --- Training Step 1: Process Student 1 ---
print("###### Training Step 1: Process Student 1 ######")
print(f"Processing: features = {student_1_x}, y = {student_1_y}\n")

# Forward Pass
print("1. Forward Pass:")
linear_combination_1 = np.dot(student_1_x, weights) + bias
output_1 = sigmoid(linear_combination_1)
print(f"   linear_combination = ({student_1_x[0]} * {weights[0]}) + {bias} = {linear_combination_1:.3f}")
print(f"   output = sigmoid({linear_combination_1:.3f}) = {output_1:.3f}\n")

# Calculate Error
print("2. Calculate Error:")
error_1 = error_formula(student_1_y, output_1)
print(f"   error = -{student_1_y}*log({output_1:.3f}) - (1-{student_1_y})*log(1-{output_1:.3f}) = {error_1:.3f}\n")

# Weight Updates
print("3. Weight Updates:")
d_error_1 = student_1_y - output_1
updated_weights = weights + learnrate * d_error_1 * student_1_x
updated_bias = bias + learnrate * d_error_1
print(f"   d_error = {student_1_y} - {output_1:.3f} = {d_error_1:.3f}")
print(f"   new_weights = {weights[0]:.3f} + {learnrate} * {d_error_1:.3f} * {student_1_x[0]} = {updated_weights[0]:.3f}")
print(f"   new_bias = {bias:.3f} + {learnrate} * {d_error_1:.3f} = {updated_bias:.3f}\n")

# Update parameters for the next step
weights, bias = updated_weights, updated_bias

# Results After Step 1
print("Results After Step 1:")
print(f"weights = [{weights[0]:.3f}], bias = {bias:.3f}")
print("-" * 50)


# --- Training Step 2: Process Student 2 ---
print("###### Training Step 2: Process Student 2 ######")
print(f"Processing: features = {student_2_x}, y = {student_2_y}\n")

# Forward Pass
print("1. Forward Pass:")
linear_combination_2 = np.dot(student_2_x, weights) + bias
output_2 = sigmoid(linear_combination_2)
print(f"   linear_combination = ({student_2_x[0]} * {weights[0]:.3f}) + {bias:.3f} = {linear_combination_2:.3f}")
print(f"   output = sigmoid({linear_combination_2:.3f}) = {output_2:.3f}\n")

# Calculate Error
print("2. Calculate Error:")
error_2 = error_formula(student_2_y, output_2)
print(f"   error = -{student_2_y}*log({output_2:.3f}) - (1-{student_2_y})*log(1-{output_2:.3f}) = {error_2:.3f}\n")

# Weight Updates
print("3. Weight Updates:")
d_error_2 = student_2_y - output_2
updated_weights_2 = weights + learnrate * d_error_2 * student_2_x
updated_bias_2 = bias + learnrate * d_error_2
print(f"   d_error = {student_2_y} - {output_2:.3f} = {d_error_2:.3f}")
print(f"   new_weights = {weights[0]:.3f} + {learnrate} * {d_error_2:.3f} * {student_2_x[0]} = {updated_weights_2[0]:.3f}")
print(f"   new_bias = {bias:.3f} + {learnrate} * {d_error_2:.3f} = {updated_bias_2:.3f}\n")

# Final update
weights, bias = updated_weights_2, updated_bias_2

# Final Results After Step 2
print("Final Results After Step 2:")
print(f"weights = [{weights[0]:.3f}], bias = {bias:.3f}")
print("-" * 50)

###### Numerical Example: Binary Classification Learning ######

Problem Setup:
Classify whether a student passes (1) or fails (0) based on study hours.

Student 1: x = [2.], y = 0 (2 hours study, failed)
Student 2: x = [8.], y = 1 (8 hours study, passed)

Initial Parameters:
weights = [0.5], bias = 0.1, learning_rate = 0.1

--------------------------------------------------
###### Training Step 1: Process Student 1 ######
Processing: features = [2.], y = 0

1. Forward Pass:
   linear_combination = (2.0 * 0.5) + 0.1 = 1.100
   output = sigmoid(1.100) = 0.750

2. Calculate Error:
   error = -0*log(0.750) - (1-0)*log(1-0.750) = 1.387

3. Weight Updates:
   d_error = 0 - 0.750 = -0.750
   new_weights = 0.500 + 0.1 * -0.750 * 2.0 = 0.350
   new_bias = 0.100 + 0.1 * -0.750 = 0.025

Results After Step 1:
weights = [0.350], bias = 0.025
--------------------------------------------------
###### Training Step 2: Process Student 2 ######
Processing: features = [8.], y = 1

1. Forward Pass:
   li

###### Analysis of Learning Progress

**Error Reduction:**
- Student 1 error: 1.386 (high error for failed student predicted as likely to pass)
- Student 2 error: 0.058 (low error for passed student predicted correctly)

**Weight Evolution:**
- Initial: weight = 0.5, bias = 0.1
- After training: weight = 0.395, bias = 0.031
- **Interpretation**: Model learned that study hours have positive correlation with passing, but reduced the initial overconfident weight

**Prediction Improvement:**
- **Before training**: 2 hours → sigmoid(2.0 * 0.5 + 0.1) = 75% pass probability
- **After training**: 2 hours → sigmoid(2.0 * 0.395 + 0.031) = 69.4% pass probability

The model is learning to reduce overconfident predictions and adjust weights based on the error between predicted and actual outcomes. Each function call demonstrates how the gradient descent algorithm iteratively improves the model's ability to distinguish between passing and failing students.