In [1]:
import tensorflow as tf
from tensorflow import keras
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import mglearn

In [2]:
class SingleLayer:
    
    def __init__(self, learning_rate=0.1, l1=0, l2=0) -> None:
        self.w = None
        self.b = None
        self.losses = []
        self.w_history = []
        self.lr = learning_rate
        # storing validation losses
        self.val_losses = []
        self.l1 = l1
        self.l2 = l2
    
    def forpass(self, X):
        z = np.dot(X, self.w) + self.b
        return z
    
    def backprop(self, X, err):
        m = len(X)
        w_grad = np.dot(X.T, err) / m
        b_grad = np.sum(err) / m
        return w_grad, b_grad
    
    def activation(self, z):
        a = 1 / (1 + np.exp(-z))
        return a

    def fit(self, X, y, epochs=100, X_val=None, y_val=None):
        y = y.reshape(-1, 1)
        y_val = y_val.reshape(-1, 1)
        m = len(X)
        self.w = np.ones((X.shape[1], 1))
        self.b = 0
        self.w_history.append(self.w.copy())
        for i in range(epochs):
            z = self.forpass(X)
            a = self.activation(z)
            err = -(y - a)
            w_grad, b_grad = self.backprop(X, err)
            w_grad += (self.l1 * np.sign(self.w) + self.l2 * self.w) / m
            self.w -= self.lr * w_grad
            self.b -= self.lr * b_grad
            self.w_history.append(self.w.copy())
            a = np.clip(a, 1e-10, 1-1e-10)
            loss = np.sum(-(y*np.log(a) + (1-y)*np.log(1-a)))
            self.losses.append((loss + self.reg_loss()) / m)
            self.update_val_loss(X_val, y_val)
            
    def reg_loss(self):
        return self.l1 * np.sum(np.abs(self.w)) + self.l2 / 2 * np.sum(self.w**2)
    
    def update_val_loss(self, X_val, y_val):
        z = self.forpass(X_val)
        a = self.activation(z)
        a = np.clip(a, 1e-10, 1-1e-10)
        val_loss = np.sum(-(y_val*np.log(a) + (1-y_val)*np.log(1-a)))
        self.val_losses.append((val_loss + self.reg_loss()) / len(y_val))
    
    def predict(self, X):
        z = self.forpass(X)
        return z > 0

    def score(self, X, y):
        return np.mean(self.predict(X) == y.reshape(-1, 1))

In [None]:
class DualLayer(SingleLayer):
    
    def __init__(self, unit, learning_rate=0.1, l1=0, l2=0) -> None:
        self.unit = unit
        self.w1 = None
        self.b1 = None
        self.w2 = None
        self.b2 = None
        self.a1 = None
        self.losses = []
        self.val_losses = []
        self.lr = learning_rate
        self.l1 = l1
        self.l2 = l2
        
    def forpass(self, X):
        z1 = np.dot(X, self.w1) + self.b1
        self.a1 = self.activation(z1)
        z2 = np.dot(self.a1, self.w2) + self.b2
        return z2

    def backprop(self, X, err):
        m = len(X)
        w2_grad = np.dot(self.a1.T, err) / m
        b1_grad = np.dot(np.ones(m), err) / m
        err_to_hidden = np.dot(err, self.w2.T) * self.a1 * (1 - self.a1)
        w1_grad = np.dot(X.T, err_to_hidden) / m
        b2_grad = np.dot(np.ones(m), err_to_hidden) / m
        return w1_grad, b1_grad, w2_grad, b2_grad
    
    def init_weight(self, n_features):
        self.w1 = np.ones((n_features, self.unit))
        self.b1 = np.zeros(self.unit)
        self.w2 = np.ones((self.unit, 1))
        self.b2 = 0
    
    def fit(self, X, y, epochs=100, X_val=None, y_val=None):
        y = y.reshape(-1, 1)
        y_val = y_val.reshape(-1, 1)
        m = len(X)
        self.init_weight(X.shape[1])