In [None]:
# Jadie Adams
import numpy as np
import math
import matplotlib.pyplot as plt
import pandas
from sklearn.model_selection import KFold
from sklearn.linear_model import Lasso
from sklearn.linear_model import LinearRegression

def warn(*args, **kwargs):
    pass
import warnings
warnings.warn = warn

font = {'size'   : 16}
plt.rc('font', **font)

angles = np.arange(0, 2*np.pi, np.pi/4) + 0.5 # for 8 correspondences 

# Ellipse Data

In [None]:
# Define dimensions
N = 30 # number of samples
d = 2 # dimension of correspondence points
M = 8 # number of correspondence points
L = 2 # dimension of latent space
P = d*M  # dimension of observation space 
T = 8   # number of time points
period = 8

In [None]:
'''
Input: N - number of samples, T - number of time points
Output: Generated data with shape (N, T, P) where P = dM (here d is 2 and M is 9)
'''
def generate_ellipse_data(N=3, T=16, period=8, x_radius_std=0.02, noise_std=0.02, plot=False):
    # Major radius random value between 2 and 4 
    # Minor radius varies between 1 and 3 on a sin wave dependent on t
    data = []
    if plot:
        plt_x = []
        plt_y = []
    for sample_index in range(N):
        data.append([])
        major_radius = np.random.normal(0.6, x_radius_std)/3
        # clip
        if major_radius < 0:
            major_radius = 0
        if major_radius > 1:
            major_radius =1
        x_values = major_radius*np.cos(angles)
        for i in range(T):
            minor_diameter = 0.4*np.sin(i*(2*np.pi/period)) + 0.6 #between 1 and .2
            minor_radius = minor_diameter/2
            y_values = minor_radius*np.sin(angles)
            data[sample_index].append([x_values,y_values])
    data = np.asarray(data, np.float32)
    # Add noise
    data = data.reshape(N, T, 2*len(angles))
    data = data + np.random.normal(0, noise_std, data.shape)
    
    if plot:
        data2 = data.reshape(N, T, 2, len(angles))
        plt_x = data2[:,:,0,:]
        plt_y = data2[:,:,1,:]
        plt.figure(figsize=(100, 30))
        dim = math.ceil(T/8)
        I = 3
        J = min(16,len(plt_x[0]))
        fig, axs = plt.subplots(I, J, figsize=(J,I+1))
        for ax in axs.flat:
            ax.set_xlim([-0.5, 0.5])
            ax.set_ylim([-0.5, 0.5])
            ax.label_outer()
        row = 0
        colors = ['b', 'teal', 'g', 'y', 'orange', 'r', 'pink', 'm', 'c']
        for i in range(I):
            for j in range(J):
                for p in range(len(plt_x[i][j])):
                    axs[row][j].scatter(plt_x[i][j][p], plt_y[i][j][p], color=colors[p])
                axs[0][j].set_title('t =' + str(j+1))
            row += 1
        plt.show()
    return data

In [None]:
# Generate data
Z = generate_ellipse_data(N, T, period, plot=True)
print(Z.shape)

# Polynomial Regression

In [None]:
# Reshape data
z = Z.reshape(N*T, d*M) # Shapes
t = np.tile(np.arange(1,T+1), N) # Time points - explanatory

In [None]:
# Get polynomial predictor vectors
# TODO - rewrite without loops
def get_predictors(degree, t):
    X = np.zeros((len(t), degree))
    for index in range(len(t)):
        for deg in range(0,degree):
            X[index][deg] = t[index]**(deg+1)
    return np.array(X)

## K-Fold Cross Validation
To get optimal polynomial degree

In [None]:
k = 10
kf = KFold(n_splits=k, shuffle=True)

In [None]:
degrees = range(1, T)

test_mse = []
for train_index, test_index in kf.split(t, z):
    split_mse = []
    train_t, train_z = t[train_index], z[train_index]
    test_t, test_z = t[test_index], z[test_index]
    for degree in degrees:
        train_X = get_predictors(degree, train_t)
        test_X = get_predictors(degree, test_t)
        model = LinearRegression(normalize=True)
        model.fit(train_X, train_z)
        pred_test_z = model.predict(test_X)
        mse = np.mean((pred_test_z - test_z)**2)
        split_mse.append(mse)      
    test_mse.append(split_mse)
test_mse = np.mean(test_mse, axis=0)

optimal_degree = np.argmin(test_mse)+1
print("Optimal degree", optimal_degree)

plt.figure(figsize=(10, 6))
plt.plot(degrees, test_mse)
plt.xlabel("Degree of the polynome")
plt.ylabel("k-fold cross-validated mse")
plt.show()

In [None]:
degrees = range(1, T)

fig, axs = plt.subplots(len(degrees), 16, figsize=(32,len(degrees)*2))
for ax in axs.flat:
    ax.set_xlim([0, T+1])
    ax.set_ylim([-0.6, 0.6])
    ax.label_outer()

mses=[]
for degree in degrees:
    X = get_predictors(degree, t)
    model = LinearRegression(normalize=True)
    model.fit(X, z)
    z_hat = model.predict(X)
    mse = np.mean((z_hat - z)**2)
    print("Degree", degree, "MSE", mse)
    
    betas = np.zeros((M*d, degree+1))
    betas[:,0]= model.intercept_
    betas[:,1:] = model.coef_

    # Plot
    row = 0
    for index in range(0,16):
        # Polynomial regression
        w = np.flip(betas[index,:])
        x = np.linspace(0, 10, 100)
        z_hat = np.polyval(w, x)
        plt.figure(figsize=(10, 6))
        axs[degree-1][row].plot(x, z_hat, label=(str(degree)+"-degree polynomial"))
        axs[degree-1][row].scatter(t, z[:,index])
        axs[degree-1][0].set_title('degree =' + str(degree))
        row += 1
plt.show()

In [None]:
degree = 5
X = get_predictors(degree, t)
model = LinearRegression(normalize=True)
model.fit(X, z)
z_hat = model.predict(X)
mse = np.mean((z_hat - z)**2)
print("Regular polynomial regression MSE:")
print(mse)

betas = np.zeros((M*d, degree+1))
betas[:,0]= model.intercept_
betas[:,1:] = model.coef_
row_labels = ["Value "+str(i) for i in range(betas.shape[0])]
column_labels = ["Beta "+str(i) for i in range(betas.shape[1])]
df = pandas.DataFrame(betas, columns=column_labels, index=row_labels)
pandas.options.display.float_format = '{:,.2g}'.format
df

# LASSO Polynomial Regression

In [None]:
alpha = 1e-4
lassoreg = Lasso(alpha=alpha,normalize=True, max_iter=100000)
lassoreg.fit(X,z)
z_hat = lassoreg.predict(X)

print("LASSO polynomial regression MSE:")
print(mse)

betas = np.zeros((M*d, degree+1))
betas[:,0]= lassoreg.intercept_
betas[:,1:] = lassoreg.coef_
row_labels = ["Value "+str(i) for i in range(betas.shape[0])]
column_labels = ["Beta "+str(i) for i in range(betas.shape[1])]
df = pandas.DataFrame(betas, columns=column_labels, index=row_labels)
pandas.options.display.float_format = '{:,.2g}'.format
df

## K Fold Cross Validation

In [None]:
degrees = range(1, T)
alphas = [1e-15, 1e-10, 1e-5, 1e-4, 1e-3,1e-2,1e-1, 1, 5, 10]

test_mse = []
for train_index, test_index in kf.split(t, z):
    split_mse = []
    train_t, train_z = t[train_index], z[train_index]
    test_t, test_z = t[test_index], z[test_index]
    for degree in degrees:
        split_degree_mse = []
        for alpha in alphas:
            train_X = get_predictors(degree, train_t)
            test_X = get_predictors(degree, test_t)
            k_lassoreg = Lasso(alpha=alpha,normalize=True, max_iter=100000)
            k_lassoreg.fit(train_X, train_z)
            pred_test_z = k_lassoreg.predict(test_X)
            mse = np.mean((pred_test_z - test_z)**2)
            split_degree_mse.append(mse)
        split_mse.append(split_degree_mse)      
    test_mse.append(split_mse)
test_mse = np.mean(test_mse, axis=0)

row_labels = ["Degree "+str(degrees[i]) for i in range(test_mse.shape[0])]
column_labels = ["Alpha "+str(alphas[i]) for i in range(test_mse.shape[1])]
df = pandas.DataFrame(test_mse, columns=column_labels, index=row_labels)
pandas.options.display.float_format = '{:,.4g}'.format
df