# Linear Regression, Gradient Descent, and Cross Entropy

## Linear Model

In [1]:
import numpy as np
import matplotlib.pyplot as plt
from statsmodels.graphics.gofplots import qqplot
from numpy.random import seed

seed(1)

# MSE
def cost_function(x, y, m, b):
    N = len(x)
    total_error = 0.0
    for i in range(N):
        total_error += (y[i] - (m*x[i] + b))**2   #y[i]: true y value, m*x[i] + b: predicted y value, error = difference of the two
    return total_error / N

# Derivative of MSE
def update_weights(m, b, X, Y, learning_rate):
    m_deriv = 0
    b_deriv = 0
    N = len(X)
    for i in range(N):
        # Calculate partial derivatives
        # -2x(y - (mx + b))
        m_deriv += -2*X[i] * (Y[i] - (m*X[i] + b))
        # -2(y - (mx + b))
        b_deriv += -2*(Y[i] - (m*X[i] + b))
    
    # We subtract because the derivatives point in direction of steepest ascent
    m -= (m_deriv / float(N)) * learning_rate
    b -= (b_deriv / float(N)) * learning_rate

    return m, b  

# Generate arbitrary data to fit
def generate_data(m,b,start,end):
    xs = np.array(range(start,end))
    ys = m*(xs) + b
    return xs,ys

# Randomizes generation of parameters to generate data
def ini_param(p_range):
    m,b = np.random.randint(-p_range,p_range,2)
    return m,b

def train(m, b, xs, ys, alpha, iterations):
    for i in range(iterations):
        m, b = update_weights(m,b,xs,ys,alpha)
    return m,b

def predict_ys(m, b, xs):
    y_preds = []
    for x in xs:
        y = m*x + b
        y_preds.append(y)
    return y_preds

def mean_squared_error(y_preds, y_trues):
    return np.square(np.subtract(y_preds, y_trues)).mean()

## Define The Linear Model In a Python Class

In [3]:
# define model   
class Linear_Model:
    def __init__(self, m, b):
        self.m = m
        self.b = b

    def predict_ys(self, xs):
        y_preds = np.array([])
        for x in xs:
            y = self.m * x + self.b
            y_preds = np.append(y_preds, y)
        return y_preds

# set parameters
m_true = 2
b_true = 1
lr = 0.01
iterations = 750
p_range = 50

#initialize models
m,b = ini_param(p_range)
true_linear = Linear_Model(m_true,b_true)
linear = Linear_Model(m,b)
# true data
xs = np.array(range(0,10))
true_ys = true_linear.predict_ys(xs)

# Report Initial
print("PARAMETERS")
print(f"Actual - m,b: {true_linear.m},{true_linear.b}")
print(f"Initial - m,b: {linear.m},{linear.b} - cost: {cost_function(xs,true_ys,linear.m,linear.b)}")

# Train
linear.m,linear.b = train(linear.m,linear.b,xs,true_ys,lr,iterations)

# Prediction and accuracy
preds_ys = linear.predict_ys(xs)
MSE = mean_squared_error(preds_ys, true_ys)

# Report Final
print(f"Estimate - m,b: {linear.m},{linear.b} - cost: {cost_function(xs,true_ys,linear.m,linear.b)}")
true_xys = zip(xs,true_ys)
pred_xys = zip(xs,preds_ys)
pred_xys_str = ", ".join(map(str, pred_xys))
true_xys_str = ", ".join(map(str, true_xys))
print(f"\nY VALUES")
print(f"Actual (xs,ys) - [{true_xys_str}]")
print(f"Final Predictions (xs,ys) - [{pred_xys_str}]")
print(f"MSE: {MSE}\n")

PARAMETERS
Actual - m,b: 2,1
Initial - m,b: 22,-41 - cost: 5604.0
Estimate - m,b: 2.100501816091919,0.36979473475724733 - cost: 0.11499524190796909

Y VALUES
Actual (xs,ys) - [(0, 1.0), (1, 3.0), (2, 5.0), (3, 7.0), (4, 9.0), (5, 11.0), (6, 13.0), (7, 15.0), (8, 17.0), (9, 19.0)]
Final Predictions (xs,ys) - [(0, 0.36979473475724733), (1, 2.470296550849166), (2, 4.570798366941085), (3, 6.671300183033004), (4, 8.771801999124923), (5, 10.872303815216842), (6, 12.972805631308761), (7, 15.07330744740068), (8, 17.173809263492597), (9, 19.274311079584518)]
MSE: 0.1149952419079691



## Gradient Descent Minimizing a Quadratic Function

In [4]:
def find_min_x(starting_x, iterations, learning_rate):
    x = starting_x

    for i in range(iterations):
        x_deriv = 2 * x
        # We subtract because the derivatives point in direction of steepest ascent
        x -= x_deriv * learning_rate

    return x 

def predict_y(x):
    return x * x + 1

# min_x = find_min_x(np.random.randint(-p_range,p_range), iterations, lr)
min_x = find_min_x(8, iterations, lr)
print(f'true (x,y): (0,1)')
print(f'predicted (x,y): ({min_x},{predict_y(min_x)})\n')

true (x,y): (0,1)
predicted (x,y): (2.102067977781766e-06,1.0000000000044187)



## Cross Entropy Method Implementation

In [6]:
# Initialize parameters
μ = 10
σ2 = 100
t = 0
maxits = 1000
N = 100
Ne = 10
# While maxits not exceeded and not converged
while t < maxits and σ2 > 0.001:
    # Obtain N samples from current sampling distribution
    X = np.random.normal(μ, σ2, N)
    # Evaluate objective function at sampled points
    S = np.exp(-(X - 2)**2) + 0.8 * np.exp(-(X + 2)**2)
    # Sort X by objective function values in descending order
    zipped_lists = zip(S,X)
    sorted_zipped_lists = sorted(zipped_lists, reverse=True)
    sorted_list_X = [element for _, element in sorted_zipped_lists]
    # Update parameters of sampling distribution                  
    μ = np.mean(sorted_list_X[0:Ne])
    σ2 = np.var(sorted_list_X[0:Ne])
    t = t + 1
# Return mean of final sampling distribution as solution
print('true mean: 2')
print(f'estimate mean: {np.mean(X)}')

true mean: 2
estimate mean: 1.9849102141201271
