In [None]:
import matplotlib.pyplot as plt
import numpy as np
import torch
import pandas as pd
import checker
import utils

In [None]:
# Preparing datasets
torch.manual_seed(5)

# Regression dataset - Boston housing (https://scikit-learn.org/stable/modules/generated/sklearn.datasets.load_boston.html)

data_url = "http://lib.stat.cmu.edu/datasets/boston"
raw_df = pd.read_csv(data_url, sep="\s+", skiprows=22, header=None)
boston_data = np.hstack([raw_df.values[::2, :], raw_df.values[1::2, :2]])
boston_target = raw_df.values[1::2, 2]

boston_X = torch.tensor(boston_data, dtype=torch.float32)
boston_y = torch.tensor(boston_target, dtype=torch.float32)
boston_w = torch.randn(boston_X.shape[1], dtype=torch.float32, requires_grad=True)


boston_data = (boston_X, boston_y, boston_w)

# Multidimensional datasets
dataset_5d = torch.randn([1000, 5], dtype=torch.float32)
param_5d = torch.randn(5, requires_grad=True)

dataset_20d = torch.randn([325, 20], dtype=torch.float32)
param_20d = torch.randn(20, requires_grad=True)

multi_datasets = [(dataset_5d, param_5d), (dataset_20d, param_20d)]

In [None]:
def mean_squared_error(X: torch.Tensor, theta: torch.Tensor) -> torch.Tensor:
    squared_distances = torch.sum(torch.square(X - theta), dim=-1)
    return torch.mean(squared_distances)

checker.check_4_1_mse(mean_squared_error, multi_datasets)

In [None]:
def mean_error(X: torch.Tensor, theta: torch.Tensor) -> torch.Tensor:
    return torch.mean(torch.linalg.norm(X - theta, dim = 1))

checker.check_4_1_me(mean_error, multi_datasets)

In [None]:
def max_error(X: torch.Tensor, theta: torch.Tensor) -> torch.Tensor:
    return torch.max(torch.linalg.norm(X - theta, dim = 1))

checker.check_4_1_max(max_error, multi_datasets)

In [None]:
def linear_regression_loss(X: torch.Tensor, w: torch.Tensor, y: torch.Tensor) -> torch.Tensor:
    squared_distances = (X @ w - y) ** 2
    return torch.mean(squared_distances)

checker.check_4_1_lin_reg(linear_regression_loss, boston_data)

In [None]:
def regularized_regression_loss(X: torch.Tensor, w: torch.Tensor, y: torch.Tensor) -> torch.Tensor:
    alpha = 0.2
    squared_distances = (X @ w - y) ** 2
    return torch.mean(squared_distances) + alpha * torch.dot(w, w)

checker.check_4_1_reg_reg(regularized_regression_loss, boston_data)

In [None]:
dataset_1d = utils.get_classification_dataset_1d()
dataset_2d = utils.get_classification_dataset_2d()

def calculate_accuracy(logistic_reg, X, y):
    preds = logistic_reg.predict(X)
    correct_n = (preds == y).float().sum().item()
    return correct_n / len(y)

def plot_dataset_1d(logistic_reg, dataset_1d):
    plt.scatter(dataset_1d.data[:10], [0.5] * 10, c="purple", label="0")
    plt.scatter(dataset_1d.data[10:], [0.5] * 10, c="yellow", label="1")
    linspace = torch.linspace(-7.5, 15, steps=100).view(-1, 1)
    plt.plot(
        linspace.numpy().ravel(),
        logistic_reg.predict_proba(linspace).detach().numpy(),
        label="p(y=1 | x)"
    )
    plt.legend()
    plt.show()

def plot_dataset_2d(logistic_reg, dataset_2d):
    plt.scatter(dataset_2d.data[:50, 0], dataset_2d.data[:50, 1], c="purple", label="0")
    plt.scatter(dataset_2d.data[50:, 0], dataset_2d.data[50:, 1], c="yellow", label="1")

    linspace_x = torch.linspace(-4, 7, steps=100)
    linspace_y = (-logistic_reg.bias - logistic_reg.weight[0] * linspace_x) / logistic_reg.weight[1]

    linspace_y = linspace_y.detach().numpy()
    plt.plot(linspace_x.detach().numpy(), linspace_y, label="Granica decyzyjna")
    plt.legend()

In [None]:
class LogisticRegression:
    def __init__(self, input_dim):
        self.weight = None
        self.bias = None
        self.input_dim = input_dim

    def _sigmoid(self, x):
        return 1/(1 + torch.exp(-x))

    def fit(self, X, y, lr=1e-6, num_steps=int(1e4)):
        self.weight = torch.randn(self.input_dim, requires_grad=True)
        self.bias = torch.randn((), requires_grad=True)
        for idx in range(num_steps):
            self.weight.requires_grad = True
            self.bias.requires_grad = True

            loss_val = self.loss(X, y)
            loss_val.backward()

            w_grad = self.weight.grad
            b_grad = self.bias.grad
            with torch.no_grad():
                self.weight = self.weight - lr * w_grad
                self.bias = self.bias - lr * b_grad


    def predict_proba(self, X: torch.Tensor) -> torch.Tensor:
        with torch.no_grad():
            return self._sigmoid(X @ self.weight + self.bias)

    def greater_than_half(self, x):
        if x < 0.5:
            return float(0)
        return float(1)

    def predict(self, X: torch.Tensor) -> torch.FloatTensor:
        z = self.predict_proba(X)
        return z.apply_(self.greater_than_half)

    def loss(self, X: torch.Tensor, y: torch.Tensor) -> torch.Tensor:
        z = X @ self.weight + self.bias
        y2 = self._sigmoid(z)
        return torch.mean((y-1) * torch.log(1-y2) - (y * torch.log(y2)))


checker.check_04_logistic_reg(LogisticRegression)

In [None]:
input = torch.randn(30, 20, dtype=torch.double, requires_grad=True) * 3
a = torch.randn(20, 30, requires_grad=True).double() * 2 - 5
b = torch.randn(20, 30, requires_grad=True).double() + 6


preds = torch.rand(30, requires_grad=True).double()
labels_dist = torch.distributions.Bernoulli(probs=0.7)
labels = labels_dist.sample([30]).double()

In [None]:
class MyAdd(torch.autograd.Function):
    @staticmethod
    def forward(self, a, b):
        self.save_for_backward(a, b)
        return a + b

    @staticmethod
    def backward(self, grad_output):
        a, b = self.saved_tensors
        a_grad = 1
        b_grad = 1
        return grad_output * a_grad, grad_output * b_grad

add_fn = MyAdd.apply
torch.autograd.gradcheck(add_fn, (a, b), eps=1e-3, atol=1e-2, rtol=1e-2)

In [None]:
class MyDiv(torch.autograd.Function):
    @staticmethod
    def forward(self, a, b):
        self.save_for_backward(a, b)
        return a / b

    @staticmethod
    def backward(self, grad_output):
        a, b = self.saved_tensors
        a_grad = 1/b
        b_grad = -a/(b**2)
        return grad_output * a_grad, grad_output * b_grad

div_fn = MyDiv.apply
torch.autograd.gradcheck(div_fn, (a, b), eps=1e-3, atol=1e-2, rtol=1e-2)

In [None]:
class MySigmoid(torch.autograd.Function):
    @staticmethod
    def forward(self, input_):
        self.save_for_backward(input_)
        return 1/(1 + torch.exp(-input_))

    @staticmethod
    def backward(self, grad_output):
        input_, = self.saved_tensors
        return grad_output * 1/(1 + torch.exp(-input_)) * (1 - 1/(1 + torch.exp(-input_)))


sigmoid_fn = MySigmoid.apply
torch.autograd.gradcheck(sigmoid_fn, input)

In [None]:
class MyBinaryCrossEntropy(torch.autograd.Function):
    @staticmethod
    def forward(self, preds, labels, bias=None):
        self.save_for_backward(preds, labels)
        return torch.mean((labels-1) * torch.log(1-preds) - labels * torch.log(preds))

    @staticmethod
    def backward(self, grad_output):
        preds, labels = self.saved_tensors
        grad_labels = None
        # Why this works??
        return grad_output * ((-labels/preds + (1-labels)/(1-preds)) / labels.size(dim=0)), grad_labels

bce_fn = MyBinaryCrossEntropy.apply
torch.autograd.gradcheck(bce_fn, (preds, labels), eps=1e-3, atol=1e-2, rtol=1e-1)