In [None]:
%matplotlib inline
from IPython.display import clear_output
import matplotlib.pyplot as plt
import numpy as np
import time
from sklearn.datasets import make_blobs, make_moons

In [None]:
def add_intercept(X):
    # to simplify calculations we add ones column to data for multiplication with intercept
    intercept = np.ones((X.shape[0], 1))
    return np.concatenate((intercept, X), axis=1)

def vizualize_linear_regression(X, y, theta):
    plt.scatter(X[:, 1], y, alpha=0.5)
    x_axis = np.arange(np.min(X), np.max(X), 0.01).reshape(-1, 1)
    y_axis = linear_predict(add_intercept(x_axis), theta)
    plt.plot(x_axis, y_axis, lw=2, c="r")
    plt.show()

def vizualize_logistic_regression(X, y, theta, treshold=0.5):
    plt.scatter(X[:, 1], X[:, 2], c=y, alpha=0.5)
    eps = 0.0001
    y_axis = np.arange(np.min(X[:, 2]), np.max(X[:, 2]), 0.01).reshape(-1, 1)
    x_axis = (-np.log((1-treshold)/treshold) - add_intercept(y_axis).dot(theta[:2]))/(theta[2] + eps)
    plt.plot(x_axis, y_axis, lw=2, c="r")
    plt.show()
    
def data_generator_linear_regression(n=100, intercept=0, coef=np.array([1]), var=0.1):
    X = np.random.rand(n, coef.shape[0]) 
    theta = np.insert(coef, 0, intercept)
    y = add_intercept(X).dot(theta) + np.random.normal(0, var, (n))
    return X, y

def data_generator_logistic_regression(n=100, data="blobs"):
    if data == "blobs":
        X, y = make_blobs(n, centers=2)
    elif data == "moons":
        X, y = make_moons(n)
    return X, y

# Linear regression

(2 points) Implement predictor for linear regression:
$$ h_\theta (X) = \theta^TX $$

In [None]:
def linear_predict(X, theta):
    # your code hear

(3 points) Implement linear cost function:
$$ J(\theta) = -\dfrac{1}{2n}\sum_{i=1}^{n}(h_\theta (X^{(i)})-y^{(i)})^2 $$

In [None]:
def linear_cost(X, y, theta):
    # your code hear

(3 points) Implement gradient function for linear regression:
$$\frac{\partial}{\partial \theta_j} J(\theta) = -\dfrac{1}{n}\sum_{i=1}^{n}\left( h_\theta(x^{(i)}) - y^{(i)} \right) x^{(i)} $$

In [None]:
def linear_gradient(X, y, theta):
    # your code hear

(2 points) Implement weights initialization and weights update:
$$ \theta = \theta - \alpha \nabla_\theta J(\theta) $$

In [None]:
def fit_linear(X, y, lr=2, max_iter=10, epsilon=0.01, visualize=False):
    X = add_intercept(X)
    # randomly initialize weights vector with ones coresponding to X shape
    theta = # your code here
    cost = linear_cost(X, y, theta)
    cost_list = [cost]
    for i in range(max_iter):
        # update values of weights based on gradient
        theta -= # your code here
        
        cost = linear_cost(X, y, theta)
        cost_list.append(cost)
        print(cost)
        if visualize == True:
            time.sleep(0.1)
            clear_output(wait=True)
            vizualize_linear_regression(X, y, theta)
        
        if np.abs(cost_list[-1] - cost_list[-2]) < epsilon:
            break 
    print("theta", theta)
    print("cost", cost)
    return theta

(2 points) Try different parameters of lr and max_iter, what is optimal value? 
Experiment with different data generators.  
Write short summary on experiments.  
(In case of single dimensional data you can use vizualization.)

In [None]:
X, y = data_generator_linear_regression()
fit(X, y)

(4 points) Try to find coeficients just with linear algebra toolbox instead of optimization:

In [None]:
def least_squere(X, y):
    # your code here

(2 points) Compare precision of results and time of execution of least squered and optimization solutions  
Tip: Try to use *%timeit* from ipython magic

# Logistic regression 

(1 point) Implement sigmoid function:
$$ \sigma (z) =  \frac{\mathrm{1} }{\mathrm{1} + e^{-z}}  $$ 


In [None]:
def sigmoid(z):
    # your code here

Vizualize sigmoid to check your code:

In [None]:
def vizualize_sigmoid(range_min=-10, range_max=10):
    x = np.linspace(range_min,range_max,100)   
    y = sigmoid(x_axis)
    plt.plot(x_axis,y_axis)
    plt.show()

vizualize_sigmoid()

(1 point) Implement predictor for logistic regression:
$$ h_\theta (X) = \sigma (\theta^TX) $$

In [None]:
def logistic_predict(X, theta):
    # your code here

(2 points) Implement cross entropy cost function:
$$ J(\theta) = -\dfrac{1}{n}\sum_{i=1}^{n}(y^{(i)}*log(h_\theta (X^{(i)}))+(1−y^{(i)})*log(1−h_\theta (X^{(i)}))) $$

In [None]:
def logistic_cost(X, y, theta):
    # your code here

(4 points) Calculate derivative from cross entropy and implement gradient step

In [None]:
def logistic_gradient(X, y, theta):
    # your code here

(2 points) Similarly to previous example implement training procedure

In [None]:
def fit_logistic(X, y, lr=2, max_iter=10, epsilon=0.01, visualize=False):
    # your code hear

(2 points) Play with different parameters of lr and max_iter, try different datasets.  
Does algorithm always find optimal line for separation?   
What is a problem? How should optimal line look like to your mind?

In [None]:
X, y = data_generator_logistic_regression()
fit_logistic(X, y)