### Defining the sigmoid function
$sigmoid( w^T x + b) = \frac{1}{1 + e^{-(w^T x + b)}}$

In [11]:
sigmoid <- function(x) 1/(1 + exp(-x))

#### Forward propagation

$A = \sigma(w^T X + b) = (a^{(0)}, a^{(1)}, ..., a^{(m-1)}, a^{(m)})$

cost function: $J = -\frac{1}{m}\sum_{i=1}^{m}y^{(i)}\log(a^{(i)})+(1-y^{(i)})\log(1-a^{(i)})$

#### Backward propagation

$$ \frac{\partial J}{\partial w} = \frac{1}{m}X(A-Y)^T\tag{1}$$

$$ \frac{\partial J}{\partial b} = \frac{1}{m} \sum_{i=1}^m (a^{(i)}-y^{(i)})\tag{2}$$

(1) is denoted by ${\partial w}$ and (2) by ${\partial b}$ in the code

In [38]:
propagate <- function(x, y, w, b) {
    m <- nrow(matrix(x))
    X <- cbind(1,matrix(x))
    theta <- c(b, w)
    a <- sigmoid(X%*%theta)
    cost <- -(1/m)*sum(y*log(a) + (1-y)*log(1-a))
    dw <- (1/m)*t(matrix(x))%*%(matrix(a - y))
    db <- (1/m)*sum(a - y)
    return(list(cost, dw, db))
}

#### Optimization Step

the update rule is $ \theta = \theta - \alpha \text{ } d\theta$, where $\alpha$ is the learning rate.

In [39]:
optimize <- function(x, y, w, b, alpha, maxit, print_cost=FALSE) {
    for (i in 1:maxit) {
        w <- w - alpha*propagate(x, y, w, b)[[2]]
        b <- b - alpha*propagate(x, y, w, b)[[3]]
        if (print_cost & i %% 100 == 0)
            cat('Cost at step',i,'is',propagate(x, y, w, b)[[1]],'\n')
    }
    cat('b is',b,'\n')
    cat('w is',w,'\n')
}

#### Randomly generate data

In [None]:
x <- runif(1000)
y <- ifelse(x + rnorm(1000) > mean(x + rnorm(1000)), 1, 0)

#### Run model

In [None]:
optimize(x, y, 1, 1, 0.01, 50000, print_cost = T)