# Neural Networks and Deep Learning 

## Chapter 1: Using neural nets to recognize handwritten digits

### Explanation of the Sigmoid Function:

The sigmoid function, also known as the logistic function, is a mathematical function that takes any real-valued number and maps it to a value between 0 and 1. It's defined as:

\begin{equation}
\sigma (x)=\dfrac{1}{1+e^{-x}}
\end{equation}

Where:
- $\sigma (x)$ is the sigmoid function
- $e$ is the base of the natural logarithm (Euler's number, approximately 2.71828)
- $x$ is the input value

The sigmoid function is often used in machine learning, particularly in neural networks, because of its useful properties:
- It's smooth and continuous
- It's bounded between 0 and 1
- It has a characteristic S-shaped curve
- Its derivative is easy to calculate, which is useful for training neural networks

## Python Function for Sigmoid:

Let's write a Python function to calculate the sigmoid:

In [None]:
import math
import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D

In [None]:
def sigmoid(z):
    """
    Calculates the sigmoid of x.
    
    Args:
    z (float): Input value
    
    Returns:
    float: Sigmoid of z
    """
    return 1 / (1 + np.exp(-z))

In [None]:
def plot_sigmoid():
    """
    Creates and displays a plot of the sigmoid function.
    """
    x = np.linspace(-10, 10, 1000)
    y = [sigmoid(i) for i in x]
    
    plt.figure(figsize=(10, 6))
    plt.plot(x, y)
    plt.title('Sigmoid Function')
    plt.xlabel('x')
    plt.ylabel('σ(x)')
    plt.grid(True)
    plt.axhline(y=0, color='r', linestyle='--')
    plt.axhline(y=1, color='r', linestyle='--')
    plt.axvline(x=0, color='g', linestyle='--')
    plt.text(0, 0.5, '0.5', ha='left', va='center')
    plt.show()

In [None]:
# Calculate some values
test_values = [-5, -2, -1, 0, 1, 2, 5]

print("x\t\tSigmoid(x)")
print("-----------------------")
for x in test_values:
    print(f"{x}\t\t{sigmoid(x):.6f}")

# Plot the sigmoid function
plot_sigmoid()

In [None]:
def linear_cost_one_var(X, y, w, b):
    """
    Compute the cost function for linear regression with one variable.
    
    Args:
    X (ndarray): Shape (m,) array of input feature values, where m is the number of examples.
    y (ndarray): Shape (m,) array of target values.
    w (float): Weight parameter.
    b (float): Bias parameter.
    
    Returns:
    float: The cost value.
    
    The cost function is defined as:
    J(w,b) = (1/2m) * Σ(h(x) - y)^2
    where h(x) = wx + b, and m is the number of training examples.
    """
    m = len(X)
    h = w * X + b  # Compute predictions
    cost = (1 / (2 * m)) * np.sum((h - y) ** 2)  # Compute mean squared error
    return cost

def linear_cost_two_var(X, y, w, b):
    """
    Compute the cost function for linear regression with two variables.
    
    Args:
    X (ndarray): Shape (m, 2) array of input feature values, where m is the number of examples.
    y (ndarray): Shape (m,) array of target values.
    w (ndarray): Shape (2,) array of weight parameters.
    b (float): Bias parameter.
    
    Returns:
    float: The cost value.
    
    The cost function is defined as:
    J(w,b) = (1/2m) * Σ(h(x) - y)^2
    where h(x) = w1*x1 + w2*x2 + b, and m is the number of training examples.
    """
    m = X.shape[0]
    h = np.dot(X, w) + b  # Compute predictions
    cost = (1 / (2 * m)) * np.sum((h - y) ** 2)  # Compute mean squared error
    return cost

In [None]:
# Example usage:
# One variable example
X_one = np.array([1, 2, 3, 4, 5])
y_one = np.array([2, 4, 6, 8, 10])
w_one = 2
b_one = 0
cost_one = linear_cost_one_var(X_one, y_one, w_one, b_one)
print(f"Cost for one variable: {cost_one}")

# Two variable example
X_two = np.array([[1, 1], [2, 2], [3, 3], [4, 4], [5, 5]])
y_two = np.array([3, 6, 9, 12, 15])
w_two = np.array([1, 1])
b_two = 1
cost_two = linear_cost_two_var(X_two, y_two, w_two, b_two)
print(f"Cost for two variables: {cost_two}")

In [None]:
def logistic_cost_one_var(X, y, w, b):
    """
    Compute the squared error cost function for logistic regression with one variable.
    
    Args:
    X (ndarray): Shape (m,) array of input feature values, where m is the number of examples.
    y (ndarray): Shape (m,) array of target values (0 or 1).
    w (float): Weight parameter.
    b (float): Bias parameter.
    
    Returns:
    float: The cost value.
    
    The cost function is defined as:
    J(w,b) = (1/2m) * Σ(h(x) - y)^2
    where h(x) = sigmoid(wx + b), and m is the number of training examples.
    """
    m = len(X)
    z = w * X + b
    h = sigmoid(z)
    
    # Compute the cost using squared error
    cost = (1 / (2 * m)) * np.sum((h - y) ** 2)
    
    return cost

def logistic_cost_two_var(X, y, w, b):
    """
    Compute the squared error cost function for logistic regression with two variables.
    
    Args:
    X (ndarray): Shape (m, 2) array of input feature values, where m is the number of examples.
    y (ndarray): Shape (m,) array of target values (0 or 1).
    w (ndarray): Shape (2,) array of weight parameters.
    b (float): Bias parameter.
    
    Returns:
    float: The cost value.
    
    The cost function is defined as:
    J(w,b) = (1/2m) * Σ(h(x) - y)^2
    where h(x) = sigmoid(w1*x1 + w2*x2 + b), and m is the number of training examples.
    """
    m = X.shape[0]
    z = np.dot(X, w) + b
    h = sigmoid(z)
    
    # Compute the cost using squared error
    cost = (1 / (2 * m)) * np.sum((h - y) ** 2)
    
    return cost

In [None]:
# Example usage:

# One variable example
X_one = np.array([1, 2, 3, 4, 5])
y_one = np.array([0, 0, 1, 1, 1])
w_one = 0.5
b_one = -1
cost_one = logistic_cost_one_var(X_one, y_one, w_one, b_one)
print(f"Logistic Regression Cost (Squared Error) for one variable: {cost_one}")

# Two variable example
X_two = np.array([[1, 1], [2, 2], [3, 3], [4, 4], [5, 5]])
y_two = np.array([0, 0, 0, 1, 1])
w_two = np.array([0.5, 0.5])
b_two = -3
cost_two = logistic_cost_two_var(X_two, y_two, w_two, b_two)
print(f"Logistic Regression Cost (Squared Error) for two variables: {cost_two}")

In [None]:
# Dataset
X_linear = np.array([1.0, 1.7, 2.0, 2.5, 3.0, 3.2])
y_linear = np.array([250, 300, 480, 430, 630, 730])

X_logistic = np.array([0, 1, 2, 3, 4, 5])
y_logistic = np.array([0, 0, 0, 1, 1, 1])

# Set b and w range
b_linear = 100
b_logistic = 0
w_range_linear = np.linspace(-100, 500, 1000)
w_range_logistic = np.linspace(-5, 5, 1000)

# Calculate costs for different w values
linear_costs = [linear_cost_one_var(X_linear, y_linear, w, b_linear) for w in w_range_linear]
logistic_costs = [logistic_cost_one_var(X_logistic, y_logistic, w, b_logistic) for w in w_range_logistic]

# Plotting
plt.figure(figsize=(12, 5))

# Linear Regression Cost Plot
plt.subplot(1, 2, 1)
plt.plot(w_range, linear_costs)
plt.title('Linear Regression Cost Function')
plt.xlabel('Weight (w)')
plt.ylabel('Cost')
plt.grid(True)

# Logistic Regression Cost Plot
plt.subplot(1, 2, 2)
plt.plot(w_range, logistic_costs)
plt.title('Logistic Regression Cost Function')
plt.xlabel('Weight (w)')
plt.ylabel('Cost')
plt.grid(True)

plt.tight_layout()
plt.show()

# Print minimum cost and corresponding w for each regression
linear_min_cost = min(linear_costs)
linear_best_w = w_range[np.argmin(linear_costs)]
print(f"Linear Regression - Minimum Cost: {linear_min_cost:.2f}, Best w: {linear_best_w:.2f}")

logistic_min_cost = min(logistic_costs)
logistic_best_w = w_range[np.argmin(logistic_costs)]
print(f"Logistic Regression - Minimum Cost: {logistic_min_cost:.2f}, Best w: {logistic_best_w:.2f}")