# Quaternions SGD

In [None]:
import numpy as np  

import sys
sys.path.append("..") 
from cryoem.quaternions import Q, quaternion2point, d_Q

 ### Example of SGD

In [None]:
def stocashtic_gradient_descent(X, y, theta, learning_rate=0.01, iterations=10):
    '''
    X    = Matrix of X with added bias units
    y    = Vector of Y
    theta=Vector of thetas np.random.randn(j,1)
    learning_rate 
    iterations = no of iterations
    
    Returns the final theta vector and array of cost history over no of iterations
    https://towardsdatascience.com/gradient-descent-in-python-a0d07285742f
    '''
    m = len(y)
    cost_history = np.zeros(iterations)
    
    
    for it in range(iterations):
        cost = 0.0
        for i in range(m):
            rand_ind = np.random.randint(0,m)
            X_i = X[rand_ind,:].reshape(1,X.shape[1])
            y_i = y[rand_ind].reshape(1,1)
            prediction = np.dot(X_i,theta)

            cost = cal_cost(theta,X_i,y_i)
            #print(cost)
            theta = theta -(1/m)*learning_rate*( X_i.T.dot((prediction - y_i)))
            cost += cal_cost(theta,X_i,y_i)
        cost_history[it]  = cost
        
    return theta, cost_history


def  cal_cost(theta,X,y):
    '''
    
    Calculates the cost for given X and Y. The following shows and example of a single dimensional X
    theta = Vector of thetas 
    X     = Row of X's np.zeros((2,j))
    y     = Actual y's np.zeros((2,1))
    
    where:
        j is the no of features
    '''
    
    m = len(y)
    
    predictions = X.dot(theta)
    cost = (1/2*m) * np.sum(np.square(predictions-y))
    return cost

In [None]:
# create some data
X = 2 * np.random.rand(100,1)
y = 4 +3 * X+np.random.randn(100,1)

In [None]:
lr =0.5
n_iter = 50

theta = np.random.randn(2,1)

X_b = np.c_[np.ones((len(X),1)),X]
theta,cost_history = stocashtic_gradient_descent(X_b,y,theta,lr,n_iter)


print('Theta0:          {:0.3f},\nTheta1:          {:0.3f}'.format(theta[0][0],theta[1][0]))
print('Final cost/MSE:  {:0.3f}'.format(cost_history[-1]))

# Brute-Force

In [None]:
alpha_i, beta_i, gamma_i, alpha_j, beta_j, gamma_j = 0,0,3,0,0,1

step = 1
min_cost = 10000000000
best_solution = []

for _alpha_i in np.arange(0, 2*np.pi, step):
    for _beta_i in np.arange(0, 2*np.pi, step):
        for _gamma_i in np.arange(0, 2*np.pi, step):
            for _alpha_j in np.arange(0, 2*np.pi, step):
                for _beta_j in np.arange(0, 2*np.pi, step):
                    for _gamma_j in np.arange(0, 2*np.pi, step):
                        cost = np.power(np.abs(Quaternion.distance(Q([_alpha_i, _beta_i, _gamma_i]), Q([_alpha_j, _beta_j, _gamma_j])) - \
                                               Quaternion.distance(Q([alpha_i, beta_i, gamma_i]), Q([alpha_j, beta_j, gamma_j]))),
                                        2)
                        if cost < min_cost:
                            min_cost = cost
                            best_solution = [_alpha_i, _beta_i, _gamma_i, _alpha_j, _beta_j, _gamma_j]
        

In [None]:
min_cost, best_solution

### SGD implementation on quaternions

In [None]:
from pyquaternion import Quaternion
import sympy as sp

In [None]:
sp.__version__

In [None]:
from sympy import Symbol, Matrix, Function, simplify
from sympy.tensor.array import derive_by_array

sigma=0.5

eta = Symbol('eta')
xi = Symbol('xi')

x = Matrix([[xi],[eta]])

h = [Function('h_'+str(i+1))(x[0],x[1]) for i in range(3)]
z = [Symbol('z_'+str(i+1)) for i in range(3)]

lamb = 0
for i in range(3):
    lamb += 1/(2*sigma**2)*(z[i]-h[i])**2
simplify(lamb)

derive_by_array(lamb, (eta, xi))

In [None]:
def loss_function_full(angles, angles_true):
    print(angles)
    # n rows, 3 cols
    if len(angles) != len(angles_true):
        return
    
    n = len(angles)
    
    Qs = np.zeros((n, 1))
    for i, a in enumerate(angles):
        Qs[i] = Q(a)
        
    Qs_true = np.zeros((n, 1))
    for j, b in enumerate(angles_true):
        Qs_true[j] = Q(b)
    
    print(Qs)
    
    loss = 0
    for q1 in Qs:
        for q2 in Qs:
            for q1_true in Qs_true:
                for q2_true in Qs_true:
                    loss += np.sum(np.power(np.abs( d_Q(q1, q2) - d_Q(q1_true, q2_true) ), 2))   
    
    return loss

In [None]:
loss_function_full(angles=np.array([np.array([0,0,0]),np.array([0,0,10])]), 
                   angles_true=np.array([np.array([0,0,5]),np.array([0,0,10])]))

In [None]:
def loss_function(angles1, angles2, angles1_true, angles2_true):
    # n rows, 3 cols
    
    Q1_true = Q(angles1_true)   # y
    Q2_true = Q(angles2_true)
    
    Q1 = Q(angles1)
    Q2 = Q(angles2)
    
    loss = np.sum(np.power(np.abs( d_Q(Q1, Q2) - d_Q(Q1_true, Q2_true) ), 2))    
    
    return loss

In [None]:
loss_function(angles1=[0,0,0], angles2=[0,0,10], 
              angles1_true=[0,0,5], angles2_true=[0,0,10])

In [None]:
m = len(y)
cost_history = np.zeros(iterations)


for it in range(iterations):
    cost = 0.0
    for i in range(m):
        rand_ind = np.random.randint(0,m)
        X_i = X[rand_ind,:].reshape(1,X.shape[1])
        y_i = y[rand_ind].reshape(1,1)
        prediction = np.dot(X_i,theta)

        theta = theta -(1/m)*learning_rate*( X_i.T.dot((prediction - y_i)))
        cost += cal_cost(theta, X_i, y_i)
    cost_history[it]  = cost

return theta, cost_history

In [None]:
def stocashtic_gradient_descent(X, y, theta, learning_rate=0.01, iterations=10):
    '''
    X    = Matrix of X with added bias units
    y    = Vector of Y
    theta=Vector of thetas np.random.randn(j,1)
    learning_rate 
    iterations = no of iterations
    
    Returns the final theta vector and array of cost history over no of iterations
    https://towardsdatascience.com/gradient-descent-in-python-a0d07285742f
    '''
    m = len(y)
    cost_history = np.zeros(iterations)
    
    
    for it in range(iterations):
        cost = 0.0
        for i in range(m):
            rand_ind = np.random.randint(0,m)
            X_i = X[rand_ind,:].reshape(1,X.shape[1])
            y_i = y[rand_ind].reshape(1,1)
            prediction = np.dot(X_i,theta)

            theta = theta -(1/m)*learning_rate*( X_i.T.dot((prediction - y_i)))
            cost += cal_cost(theta,X_i,y_i)
        cost_history[it]  = cost
        
    return theta, cost_history

