#### Foundations of Machine Learning Aug-Nov 2024
##### Sakshi Badole CS24MTECH11008
##### Assignment 4
Question 5: __Logistic Regression__ 

In [20]:
import numpy as np

#### Sigmoid Function

In [21]:
def sigmoid(z):
    return 1 / (1 + np.exp(-z))

#### Cross-entropy error as the error function

In [22]:
def compute_cost(y, y_hat):
    m = y.size
    return -1/m * (np.dot(y, np.log(y_hat)) + np.dot(1 - y, np.log(1 - y_hat)))


#### Gradient Descent

In [23]:
def gradient_descent(X, y, theta, learning_rate, num_iterations, tolerance):
    
    m = y.size
    previous_cost = float('inf')
    
    for i in range(num_iterations):
        z = np.dot(X, theta)
        y_hat = sigmoid(z)
        error = y_hat - y
        gradient = np.dot(X.T, error) / m
        theta -= learning_rate * gradient

        # Compute the cost
        cost = compute_cost(y, y_hat)


        # Check for convergence
        if abs(previous_cost - cost) < tolerance:
            print(f"\nConvergence reached at iteration {i + 1}")
            break
        
        previous_cost = cost

        if i == 0:
            # Print the model equation after one iteration
            print("\nWeights after one iteration (θ₀, θ₁, θ₂):", theta)
            print("\nUpdated logistic regression model after one iteration:")
            print(f"P(y=1|x₁,x₂) = 1 / (1 + exp(-({theta[0]:.3f} + {theta[1]:.3f}x₁ + {theta[2]:.3f}x₂)))")

        if i == num_iterations-1 :
            print(f"Iteration {i + 1}: Cost = {cost}, Weights = {theta}")    

    return theta


#### Prediction function

In [24]:
def predict(X, theta):

    z = np.dot(X, theta)
    y_hat = sigmoid(z)
    return (y_hat >= 0.5).astype(int)

#### Evaluating the predictions

In [25]:
def evaluate_model(y_true, y_pred):
    true_positive = np.sum((y_pred == 1) & (y_true == 1))
    true_negative = np.sum((y_pred == 0) & (y_true == 0))
    false_positive = np.sum((y_pred == 1) & (y_true == 0))
    false_negative = np.sum((y_pred == 0) & (y_true == 1))

    accuracy = (true_positive + true_negative) / len(y_true)
    precision = true_positive / (true_positive + false_positive) if (true_positive + false_positive) > 0 else 0
    recall = true_positive / (true_positive + false_negative) if (true_positive + false_negative) > 0 else 0

    return accuracy, precision, recall

#### Train data

In [26]:
X_train = np.array([[0.346, 0.780],
                        [0.303, 0.439],
                        [0.358, 0.729],
                        [0.602, 0.863],
                        [0.790, 0.753],
                        [0.611, 0.965]])
y_train = np.array([0, 0, 0, 1, 1, 1])

#### Training the model with learning rate 0.1 and Initial weights as  $θ_{0}$ = −1, $θ_{1}$ = 1.5, $θ_{2}$ = 0.5 

In [27]:
# Add intercept term
X_train = np.c_[np.ones(X_train.shape[0]), X_train]

# Initial weights
theta = np.array([-1, 1.5, 0.5])

# Hyperparameters
learning_rate = 0.1
num_iterations = 1500

# Train the model
theta = gradient_descent(X_train, y_train, theta, learning_rate, num_iterations, tolerance=1e-3)


Weights after one iteration (θ₀, θ₁, θ₂): [-1.00316626  1.50535086  0.50196867]

Updated logistic regression model after one iteration:
P(y=1|x₁,x₂) = 1 / (1 + exp(-(-1.003 + 1.505x₁ + 0.502x₂)))

Convergence reached at iteration 2


### Weights after one iteration 
***(θ₀, θ₁, θ₂): [-1.00316626  1.50535086  0.50196867]***

### Updated logistic regression model after one iteration:
***P(y=1|x₁,x₂) = 1 / (1 + exp(-(-1.003 + 1.505x₁ + 0.502x₂)))***

Convergence reached at iteration 2 (tolerance value was 1e-3)

___

#### Testing the logistic regression model on the test set

In [28]:
# Test data
X_test = np.array([[0.959, 0.382],
                    [0.750, 0.306],
                    [0.395, 0.760],
                    [0.823, 0.764],
                    [0.761, 0.874],
                    [0.844, 0.435]])
y_test = np.array([0, 0, 0, 1, 1, 1])

# Add intercept term to test data
X_test = np.c_[np.ones(X_test.shape[0]), X_test]

# Make predictions
predictions = predict(X_test, theta)

# Evaluate the model
accuracy, precision, recall = evaluate_model(y_test, predictions)



#### Predictions on the test data

In [29]:
# Print results
print("\nPredictions for test set:")
print("Predicted labels:", predictions)
print("True labels:     ", y_test)

print(f"\nAccuracy: {accuracy:.3f}")
print(f"Precision: {precision:.3f}")
print(f"Recall: {recall:.3f}")


Predictions for test set:
Predicted labels: [1 1 0 1 1 1]
True labels:      [0 0 0 1 1 1]

Accuracy: 0.667
Precision: 0.600
Recall: 1.000
