# Visualizable Illustration

In this example, we will plot the loss function of $J(\theta_1, \theta_2)$, which means our model consists of only two parameters

We use `logistic regression` model with `MSE` loss function, which is in practice not a good setting, but what we need is it's `non-convex` property.

In [35]:
import os

import numpy as np
from sklearn.datasets.samples_generator import make_classification

import torch
import torch.nn as nn
import torch.nn.functional as F

import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib.animation as animation
from mpl_toolkits.mplot3d import Axes3D
from matplotlib import cm

In [66]:
n_samples = 100
lr = .5
n_epochs = 500000
print_every = n_epochs // 20
patience = 1000
debug=False

In [57]:
def get_loss(Y, pred):
    criterion1 = nn.MSELoss()
    criterion2 = nn.L1Loss()
    loss = criterion1(pred, Y) / criterion2(pred, Y)
    # loss = criterion1(pred, Y)
    # loss = criterion2(pred, Y.long())
    return loss

## Data generation
generate classification data with only one feature ($f(x) = \frac{1}{1 + e^{-w_1x_1 + w_2}}$) 

In [38]:
def make_data(n_samples):
    x, y = make_classification(n_samples = n_samples,
                   n_features=1, 
                   n_informative=1, 
                   n_redundant=0, 
                   n_repeated=0,
                   n_clusters_per_class=1,
                   random_state=777)


    X = torch.from_numpy(x).double().flatten()
    Y = torch.from_numpy(y).double().flatten()
    
    return X, Y

## Train

In [65]:
def train(X, Y):
    no_improve_since = 0
    min_loss = 1e5
    
    history = {
        "w1": [],
        "w2": [],
        "loss": []
    }
    
    torch.manual_seed(777)
    theta = torch.tensor([40.0, 40], requires_grad=True)
    # theta = torch.tensor([-12.0, 3], requires_grad=True)

    
    for epoch in range(n_epochs):
        output = torch.sigmoid(X * theta[0] + theta[1])
        loss = get_loss(Y, output)
        loss.backward()
        
        if loss < min_loss:
            no_improve_since = 0
            min_loss = loss
        else:
            no_improve_since += 1
            if no_improve_since >= patience:
                print(f"reached patience {patience} after {epoch} epochs,  loss: {loss.item()}")
                break
        
        if loss < .25:
            print(f"converged after {epoch} epochs,  loss: {loss.item()}")
            break

        if debug:
            print(theta.grad.data)

        # back propagation
        theta.data = theta.data - theta.grad.data * lr
        grad_to_print = theta.grad.data
        theta.grad.data.zero_()

        if not print_every == -1 and epoch % print_every == 1:
            with torch.no_grad():
                pred = (output > 0.5)
                acc = torch.sum((pred == Y).float()) / Y.size()[0]
            print(f"epoch: {epoch - 1} \t acc: {acc} \t loss: {loss.item()} \t theta: {theta.data}")
        
        history['w1'].append(theta.detach().numpy()[0])
        history['w2'].append(theta.detach().numpy()[1])
        history['loss'].append(loss.item())
        
    with torch.no_grad():
        pred = (output > 0.5)
        acc = torch.sum((pred == Y).float()) / Y.size()[0]
    print(f"converged after {epoch - 1} epochs \t acc: {acc} \t loss: {loss.item()} \t theta: {theta.data}")
    
    return X, Y, history

## Plot and Save figures

In [60]:
def plot3D_and_save(X, Y, history):
    
    w1range = np.linspace(-60 , 60, 60)
    w2range = np.linspace(-60 , 60, 60)
    W1, W2 = np.meshgrid(w1range, w2range)

    J = np.array([get_loss(torch.sigmoid(theta[0] * X + theta[1]), Y).item()
                   for theta in zip(torch.from_numpy(np.ravel(W1)), torch.from_numpy(np.ravel(W2)))])
    J = J.reshape(W1.shape)


    # draw and save figure
    dirpath = f"./figures/#samples/{n_samples}"
    os.makedirs(dirpath, exist_ok=True)
    os.makedirs("./figures/compare (train)", exist_ok=True)
    os.makedirs("./figures/compare (contour)", exist_ok=True)
    os.makedirs("./figures/compare (surface)", exist_ok=True)
    os.makedirs("./figures/compare (surface + contour)", exist_ok=True)
    os.makedirs("./figures/compare (surface + train)", exist_ok=True)
    os.makedirs("./figures/compare (contour + train)", exist_ok=True)
    os.makedirs("./figures/compare (surface + contour + train)", exist_ok=True)
    

    fig = plt.figure(figsize=(20, 10))
    fig.suptitle(f"Loss Function - {n_samples}", fontsize=24, fontweight='bold')
    ax = fig.add_subplot(111, projection='3d')
    ax.set_xlabel('w1', labelpad=30, fontsize=24, fontweight='bold')
    ax.set_ylabel('w2', labelpad=30, fontsize=24, fontweight='bold')
    ax.set_zlabel('J(w1,w2)', labelpad=30, fontsize=24, fontweight='bold')
    # ax.view_init(elev=48., azim=42)
    ax.view_init(elev=48., azim=75)
    
    surface = ax.plot_surface(W1, W2, J, alpha=0.9, cmap=cm.jet)
    surface_alpha = ax.plot_surface(W1, W2, J, alpha=0.65, cmap=cm.jet)
    train = ax.plot(history['w1'], history['w2'], history['loss'] , color='k', markerfacecolor='k', markeredgecolor='k', marker='.', markersize=5)[0]
    train_z = ax.plot(history['w1'], history['w2'] , color='k', markerfacecolor='k', markeredgecolor='k', marker='.', markersize=1, zdir='z', alpha=0.7)[0]
    train_y = ax.plot(history['w2'], history['loss'] , color='k', markerfacecolor='k', markeredgecolor='k', marker='.', markersize=1, zdir='y', zs=-65, alpha=0.7)[0]
    train_x = ax.plot(history['w1'], history['loss'] , color='k', markerfacecolor='k', markeredgecolor='k', marker='.', markersize=1, zdir='x', zs=-65, alpha=0.7)[0]

    
    contourz = ax.contour(W1, W2, J, zdir='z', offset=0, cmap=cm.jet, alpha=1)
    contoury = ax.contour(W1, W2, J, zdir='y', offset=-65, cmap=cm.jet, alpha=0.4)
    contourx = ax.contour(W1, W2, J, zdir='x', offset=-65, cmap=cm.jet, alpha=0.4)
    contours = [contourx, contoury, contourz]

    def set_opacity(b1, b2, b3, b4):
        surface.set_visible(b1)
        surface_alpha.set_visible(b2)
        train.set_visible(b3)
        train_x.set_visible(b3)
        train_y.set_visible(b3)
        train_z.set_visible(b3)
        list(map(lambda contourset: list(map(lambda contour: contour.set_visible(b4), contourset.collections)), contours))


    # surface
    set_opacity(True, False, False, False)
    
    plt.savefig(os.path.join(dirpath, 'surface.png'), transparent=True)
    plt.savefig(f'./figures/compare (surface)/{n_samples}.png', transparent=True)
    
    
    # contour
    set_opacity(False, False, False, True)
    
    plt.savefig(os.path.join(dirpath, 'contour.png'), transparent=True)
    plt.savefig(f'./figures/compare (contour)/{n_samples}.png', transparent=True)
    
    
    # train
    set_opacity(False, False, True, False)
    
    plt.savefig(os.path.join(dirpath, 'train.png'), transparent=True)
    plt.savefig(f'./figures/compare (train)/{n_samples}.png', transparent=True)
    
    # surface + contour
    set_opacity(False, True, False, True)
    
    plt.savefig(os.path.join(dirpath, 'surface + contour.png'), transparent=True)
    plt.savefig(f'./figures/compare (surface + contour)/{n_samples}.png', transparent=True)
    
    # surface + train
    set_opacity(False, True, True, False)
    
    plt.savefig(os.path.join(dirpath, 'surface + train.png'), transparent=True)
    plt.savefig(f'./figures/compare (surface + train)/{n_samples}.png', transparent=True)
    
    # contour + train
    set_opacity(False, False, True, True)
    
    plt.savefig(os.path.join(dirpath, 'contour + train.png'), transparent=True)
    plt.savefig(f'./figures/compare (contour + train)/{n_samples}.png', transparent=True)
    
    # surface + contour + train
    set_opacity(False, True, True, True)
    
    plt.savefig(os.path.join(dirpath, 'surface + contour + train.png'), transparent=True)
    plt.savefig(f'./figures/compare (surface + contour + train)/{n_samples}.png', transparent=True)

    plt.show()

In [62]:
def plot2D_and_save(X, Y, history):
    # draw and save figure
    dirpath = f"./figures/#samples/{n_samples}"
    os.makedirs(dirpath, exist_ok=True)
    os.makedirs("./figures/compare (contourz + train)", exist_ok=True)
    os.makedirs("./figures/compare (contourz)", exist_ok=True)
    
    
    w1range = np.linspace(-60 , 60, 60)
    w2range = np.linspace(-60 , 60, 60)
    W1, W2 = np.meshgrid(w1range, w2range)

    J = np.array([get_loss(torch.sigmoid(theta[0] * X + theta[1]), Y).item()
                   for theta in zip(torch.from_numpy(np.ravel(W1)), torch.from_numpy(np.ravel(W2)))])
    J = J.reshape(W1.shape)
    
    plt.clf()
    fig = plt.figure(figsize=(20, 10))
    fig.suptitle(f"Loss Function - {n_samples}", fontsize=24, fontweight='bold')
    ax = fig.add_subplot(111)
    ax.set_xlabel('w1', labelpad=30, fontsize=24, fontweight='bold')
    ax.set_ylabel('w2', labelpad=30, fontsize=24, fontweight='bold')

    contourz = ax.contour(W1, W2, J, cmap=cm.jet)
    plt.savefig(os.path.join(dirpath, 'contourz.png'), transparent=True)
    plt.savefig(f'./figures/compare (contourz)/{n_samples}.png', transparent=True)
    
    train_z = ax.plot(history['w1'], history['w2'] , color='k', markerfacecolor='k', markeredgecolor='k', marker='.', markersize=1)[0]
    plt.savefig(os.path.join(dirpath, 'contourz + train.png'), transparent=True)
    plt.savefig(f'./figures/compare (contourz + train)/{n_samples}.png', transparent=True)
    
    plt.show()
    

## Save result and data for further processing

In [42]:
def save_to_csv(X, Y, history):
    dirpath = f'./result/{n_samples}'
    os.makedirs(dirpath, exist_ok=True)
    pd.DataFrame.from_dict(history).to_csv(os.path.join(dirpath, 'history.csv'))
    
    data = np.c_[X.detach().numpy(), Y.detach().numpy()]
    np.savetxt(os.path.join(dirpath, 'data.csv'), data)

In [43]:
def load_from_csv(n):
    history = pd.read_csv(f"./result/{n}/history.csv").reset_index().to_dict(orient='list')
    data = np.loadtxt(f"./result/{n}/data.csv")
    X = torch.from_numpy(data[:, 0])
    Y = torch.from_numpy(data[:, 1])
    return X, Y, history

## Full Experiment

In [69]:
%matplotlib
# for n_samples in [100000, 10000, 1000, 100, 50, 20, 10, 6, 4]:
for n_samples in [1000000]:
    X, Y = make_data(n_samples)
    X, Y, history = train(X, Y)
    # save_to_csv(X, Y, history)
    plot2D_and_save(X, Y, history)
    plot3D_and_save(X, Y, history)

Using matplotlib backend: MacOSX
epoch: 0 	 acc: 0.7432680130004883 	 loss: 0.9790044882843658 	 theta: tensor([39.9999, 39.9996])
epoch: 25000 	 acc: 0.7648209929466248 	 loss: 0.9758872108886384 	 theta: tensor([38.5499, 33.9701])
epoch: 50000 	 acc: 0.7911890149116516 	 loss: 0.9702877236293741 	 theta: tensor([35.9202, 26.1194])
epoch: 75000 	 acc: 0.8239780068397522 	 loss: 0.9561894149388893 	 theta: tensor([29.4430, 14.8813])
reached patience 1000 after 92247 epochs,  loss: 0.3976490785195139
converged after 92246 epochs 	 acc: 0.8591330051422119 	 loss: 0.3976490785195139 	 theta: tensor([ 0.8587, -0.0198])


In [63]:
%matplotlib
for n_samples in [100000, 10000, 1000, 100, 50, 20, 10]:
# for n_samples in [10000]:
    X, Y, history = load_from_csv(n_samples)
    # plot3D_and_save(X, Y, history)    
    plot2D_and_save(X, Y, history)

Using matplotlib backend: MacOSX


## An Example

In [46]:
'''
%matplotlib
n_samples = 100000
train_args = make_data()
plot_args = train(*train_args)
plot3D_and_save(*plot_args)
'''

'\n%matplotlib\nn_samples = 100000\ntrain_args = make_data()\nplot_args = train(*train_args)\nplot3D_and_save(*plot_args)\n'

## Plot Blank

In [54]:
'''
fig = plt.figure(figsize=(20, 10))
fig.suptitle(f"Loss Function Smoothness", fontsize=24, fontweight='bold')
ax = fig.add_subplot(111, projection='3d')
ax.set_xlabel('w1', labelpad=30, fontsize=24, fontweight='bold')
ax.set_ylabel('w2', labelpad=30, fontsize=24, fontweight='bold')
ax.set_zlabel('J(w1,w2)', labelpad=30, fontsize=24, fontweight='bold')
ax.view_init(elev=48., azim=42)
plt.show()
'''

'\nfig = plt.figure(figsize=(20, 10))\nfig.suptitle(f"Loss Function Smoothness", fontsize=24, fontweight=\'bold\')\nax = fig.add_subplot(111, projection=\'3d\')\nax.set_xlabel(\'w1\', labelpad=30, fontsize=24, fontweight=\'bold\')\nax.set_ylabel(\'w2\', labelpad=30, fontsize=24, fontweight=\'bold\')\nax.set_zlabel(\'J(w1,w2)\', labelpad=30, fontsize=24, fontweight=\'bold\')\nax.view_init(elev=48., azim=42)\nplt.show()\n'