# Mathematics of Machine Learning

## Programming tasks: Examples for Logistic Regression

### (0) Preparation

Import required modules

In [None]:
import torch
import torch.nn as nn

import numpy as np
import matplotlib.pyplot as plt
import scipy.optimize as opt 

Generate the training data

In [None]:
# Size of the dataset
m = 50
x = np.random.uniform(low=-3, high=3, size=(2, m))
# print(x)

In [None]:
# true parameters
w_true = np.array([[1], [2]])
print(w_true)

In [None]:
# Probabilities for label +1 according to the Bernoulli model with h_{w_true,0}
p = 1/(1 + np.exp(-(np.dot(w_true.T, x))))
# print(p)

In [None]:
# Dice out the random markers according to the probabilities p
y = 2*(np.random.uniform(low=0, high=1, size=(1, m)) <= p) - 1
# print(y)

### (1) Graphical visualization of the training data

First plot the true hyperplane for x in [-3,3].

Then enter the classified points

In [None]:
# Points with mark 1
inds = [i for (i, val) in enumerate(y[0]) if val == 1]
print(inds)

In [None]:
# Points with mark -1
indm = [i for (i, val) in enumerate(y[0]) if val == -1]
print(indm)

In [None]:
# First plot the true hyperplane for x in [-3,3].
fig, ax = plt.subplots()
ax.plot([-3,3], -w_true[0]/w_true[1]*[-3,3], "--", label="true hyperplane")
ax.scatter(x[0][inds], x[1][inds], c="b", marker="+", linewidths = 2)
ax.scatter(x[0][indm], x[1][indm], c="r", marker="d", linewidths = 2)
plt.legend()
plt.xlabel("x1")
plt.ylabel("x2")
ax.set(xlim=(-3, 3), ylim=(-3, 3))
ax.axis('equal')
fig.tight_layout()

### (2) Logistic regression

Empirical risk function

In [None]:
def RS(w): return np.mean(np.log(1 + np.exp(- np.multiply(y, (np.dot(w.T, x))))), axis=1)
# RS = @(w) mean( log(1 + exp(- y .* (w' * x))),2);

NOTES:
<br> 1) w is a column vector with two rows OR a matrix with two rows and multiple columns for multiple weight vectors w.
<br> 2) mean(. ,2) calculates the mean value per column
<br> 3) The function should be evaluable for multiple w vectors, so that the result contains as row vector the empirical risk values of the individual w vectors.

Plot this function

In [None]:
# Discretization of the w-values per axis
ws = np.array([[i] for i in np.arange(-5, 10 + 0.01, 0.01)])

In [None]:
# Create discretization grid
WW1, WW2 = np.meshgrid(ws, ws)
WW1_ravel = np.ravel(WW1, order='F')
WW2_ravel = np.ravel(WW2, order='F')
print(WW1_ravel)
print(WW2_ravel)
print(len(WW2_ravel))

In [None]:
# Evaluate RS at the grid points
RS_Ws = RS(np.concatenate((WW1_ravel[:, None], WW2_ravel[:, None]), axis=1).T)
print(np.concatenate((WW1_ravel[:, None], WW2_ravel[:, None]), axis=1).T)
print(RS_Ws)

In [None]:
# Generate graphic (contour plot):
fig, ax = plt.subplots()

CS = ax.contour(WW1, WW2, np.reshape(np.log(RS_Ws), (len(ws), len(ws))), 25)
ax.clabel(CS, inline=True, fontsize=10)

ax.set_title('log R_S(h_w)')
ax.set_xlabel('w_1')
ax.set_ylabel('w_2')

fig.colorbar(CS)
plt.show()

In [None]:
# Numerical calculation of the ERM parameters
# result = opt.fmin_tnc(RS,  np.zeros((2, 1)))
# xopt, fopt, iter, funcalls, warnflag = fmin(T,0, full_output=True, disp=False)
w, RS_min, iter, funcalls, warnflag = opt.fmin(RS,  np.zeros((2, 1)), maxiter=1000, full_output=True, disp=False)

print(w)
print(RS_min)
print(iter)
print(funcalls)
print(warnflag)

In [None]:
# Plot the learned parameters

fig, ax = plt.subplots()

CS = ax.contour(WW1, WW2, np.reshape(np.log(RS_Ws), (len(ws), len(ws))), 10)
ax.clabel(CS, inline=True, fontsize=10)

# learned
ax.scatter(w[0], w[1], c="r")
# true
# ax.scatter(w_true[0], w_true[1], marker="+")

ax.set_title('log R_S(h_w)')
ax.set_xlabel('w_1')
ax.set_ylabel('w_2')

fig.colorbar(CS)
plt.show()

### Hypothesen und Daten zeichnen

In [None]:
fig, ax = plt.subplots()
ax.plot([-3,3], -w_true[0]/w_true[1]*[-3,3], "--", label="True hypothesis")
ax.plot([-3,3], -w[0]/w[1]*np.array([-3,3]), c="g", label="learned hypothesis")
ax.scatter(x[0][inds], x[1][inds], c="b", marker="+", linewidths = 2)
ax.scatter(x[0][indm], x[1][indm], c="r", marker="d", linewidths = 2)
plt.legend(loc="upper right")
plt.xlabel("x1")
plt.ylabel("x2")
ax.set(xlim=(-3, 3), ylim=(-3, 3))
ax.axis('equal')
fig.tight_layout()

### (3) Logistic regression in Pytorch

In [None]:
class LogisticRegression(torch.nn.Module):
     def __init__(self, input_dim, output_dim):
         super(LogisticRegression, self).__init__()
         self.linear = torch.nn.Linear(input_dim, output_dim)
     def forward(self, x):
         outputs = torch.sigmoid(self.linear(x))
         return outputs

In [None]:
x = torch.from_numpy(x.T.astype(np.float32))
print(x.size())
# y = torch.from_numpy(y.astype(np.float32))

for ind, val in enumerate(y[0]):
    if val == 1:
        y[0][ind] = +1
    else:
        y[0][ind] = 0

y = torch.from_numpy(y.astype(np.float32))
# print(y)

In [None]:
# assigning some hyper-parameters:
epochs = 100 # Indicates the number of passes through the entire training dataset the network has completed
input_dim = 2
output_dim = 1 # Single output 
# learning_rate = 0.01

model = LogisticRegression(input_dim, output_dim)

In [None]:
# Binary Cross Entropy Loss
criterion = torch.nn.BCELoss() 

In [None]:
# LBFGS: Implements L-BFGS algorithm, heavily inspired by minFunc
optimizer = torch.optim.LBFGS(model.parameters())

In [None]:
for epoch in range(epochs):
    def closure():
        optimizer.zero_grad() # Setting our stored gradients equal to zero
        outputs = model(x)
        loss = criterion(torch.squeeze(outputs), y[0]) 
        loss.backward() 
        # print(list(model.parameters()))
        # print(loss.item())
        return loss
    
    optimizer.step(closure) # Updates weights and biases with the optimizer
# print(loss.item())
# print(list(model.parameters()))

In [None]:
optimizer = torch.optim.SGD(model.parameters(), lr=0.01)
for epoch in range(epochs):
    optimizer.zero_grad() # Setting our stored gradients equal to zero
    outputs = model(x)
    loss = criterion(torch.squeeze(outputs), y[0]) 
    loss.backward() 
    optimizer.step() # Updates weights and biases with the optimizer
    
print(loss.item())
print(list(model.parameters()))