In [None]:
#Problem 1

import numpy as np

class ScratchLogisticRegression():
    """
    Scratch implementation of logistic regression

    Parameters
    ----------
    num_iter : int
      Number of iterations
    lr : float
      Learning rate
    bias : bool
      False if no bias term is included
    verbose : bool
      True to output the learning process

    Attributes
    ----------
    self.coef_ : The following form of ndarray, shape (n_features,)
      Parameters
    self.loss : The following form of ndarray, shape (self.iter,)
      Record losses on training data
    self.val_loss : The following form of ndarray, shape (self.iter,)
      Record loss on validation data
    """

    def __init__(self, num_iter=1000, lr=0.01, bias=True, verbose=False):
        #hyperparameters as attributes
        self.iter = num_iter
        self.lr = lr
        self.bias = bias
        self.verbose = verbose
        #arrays to record loss
        self.loss = np.zeros(self.iter)
        self.val_loss = np.zeros(self.iter)
        self.coef_ = None

    def _sigmoid(self, z):
        """Sigmoid function"""
        return 1 / (1 + np.exp(-z))

    def _add_bias(self, X):
        """Add bias term to features"""
        return np.c_[np.ones(X.shape[0]), X]

    def _initialize_weights(self, n_features):
        """Initialize weights with small random values"""
        return np.random.randn(n_features) * 0.01

    def _compute_loss(self, y, y_pred):
        """Compute binary cross-entropy loss"""
        epsilon = 1e-15  # to avoid log(0)
        y_pred = np.clip(y_pred, epsilon, 1 - epsilon)
        return -np.mean(y * np.log(y_pred) + (1 - y) * np.log(1 - y_pred))

    def fit(self, X, y, X_val=None, y_val=None):
        """
        Learn logistic regression. If validation data is entered, the loss and accuracy for it are also calculated for each iteration.

        Parameters
        ----------
        X : The following forms of ndarray, shape (n_samples, n_features)
            Features of training data
        y : The following form of ndarray, shape (n_samples,)
            Correct answer value of training data
        X_val : The following forms of ndarray, shape (n_samples, n_features)
            Features of verification data
        y_val : The following form of ndarray, shape (n_samples,)
            Correct value of verification data
        """
        if self.bias:
            X = self._add_bias(X)
            if X_val is not None:
                X_val = self._add_bias(X_val)

        n_samples, n_features = X.shape
        self.coef_ = self._initialize_weights(n_features)

        for i in range(self.iter):
            # 
            z = np.dot(X, self.coef_)
            #sigmoid function
            y_pred = self._sigmoid(z)
            
            #gradient
            error = y_pred - y
            gradient = np.dot(X.T, error) / n_samples
            
            # Update weights
            self.coef_ -= self.lr * gradient
            
            # loss
            self.loss[i] = self._compute_loss(y, y_pred)
            
            #validation loss if validation data is provided
            if X_val is not None and y_val is not None:
                val_pred = self._sigmoid(np.dot(X_val, self.coef_))
                self.val_loss[i] = self._compute_loss(y_val, val_pred)
            
            if self.verbose and i % 100 == 0:
                print(f"Iteration {i}: Training Loss = {self.loss[i]:.4f}", end="")
                if X_val is not None and y_val is not None:
                    print(f", Validation Loss = {self.val_loss[i]:.4f}")
                else:
                    print()

    def predict_proba(self, X):
        """
        Estimate the probability using logistic regression.

        Parameters
        ----------
        X : The following forms of ndarray, shape (n_samples, n_features)
            sample

        Returns
        -------
            The following form of ndarray, shape (n_samples,)
            Estimated probability by logistic regression
        """
        if self.bias:
            X = self._add_bias(X)
        return self._sigmoid(np.dot(X, self.coef_))

    def predict(self, X, threshold=0.5):
        """
        Estimate the label using logistic regression.

        Parameters
        ----------
        X : The following forms of ndarray, shape (n_samples, n_features)
            sample
        threshold : float
            Threshold for classification (default 0.5)

        Returns
        -------
            The following form of ndarray, shape (n_samples,)
            Estimated result by logistic regression
        """
        return (self.predict_proba(X) >= threshold).astype(int)

In [None]:
#Problem 2

