In [3]:
""" 
NOTE:
Stuff on fitting motor programs to strokes is moved to :
"stroke_to_MP_scor..."

Here is testing parsing, generally from images, mostly based on pyBPL tutorials.
"""

' \nNOTE:\nStuff on fitting motor programs to strokes is moved to :\n"stroke_to_MP_scor..."\n\nHere is testing parsing, generally from images, mostly based on pyBPL tutorials.\n'

In [None]:
%load_ext autoreload
%autoreload 2

import pandas as pd
import pickle5 as pickle
import numpy as np
import matplotlib.pyplot as plt

In [None]:
# Load
SDIR = "/data2/analyses/database/expts/Red-lines5-formodeling-210309_102050"

with open(f"{SDIR}/dat.pkl", "rb") as f:
    dat = pickle.load(f)
with open(f"{SDIR}/metadat.pkl", "rb") as f:
    metadat = pickle.load(f)

    
    

In [None]:
# === SET THESE PARAMS

# Are you trying to parse ground truth images (that monkey saw) or the image resulting from onkey beahvior?
if True:
    stim_ver = "task_image" # stimulus monkey saw
else:
    stim_ver = "monkey_strokes" # drawing produced by monkey
    
    
# == coordinates of the sketchpad used by monkey
# max width or hiegh (whichever one greater), num pixels for
# half of page, so that sketchpad will be square with edges in 
# both dimensions of : (-WH, WH)
canvas_max_WH = np.max(np.abs(metadat["sketchpad_edges"])) # smallest square that bounds all the stimuli

# == coordinates that BPL parsing wants
# num pixels per dimension for the output image
image_WH = 105 # I think 105 is used for pyBPL. CONFIRM THIS.

# Get example stimulus (for tutorial purpose).
import random
ind = random.randint(0, len(dat)-1)
if stim_ver=="task_image":
    strokes = dat["strokes_task"].values[ind] # in same format as behavior 'strokes'
elif stim_ver=="monkey_strokes":
    strokes = dat["strokes_beh"].values[ind] # in same format as behavior 'strokes'
else:
    assert False



In [None]:
# == CONVERT TASK INTO BINARY IMAGE
from pythonlib.drawmodel.primitives import prog2pxl
from pythonlib.drawmodel.strokePlots import plotDatStrokes

I = prog2pxl(strokes, WHdraw=canvas_max_WH*2, WH=image_WH, smoothing=1)
I = np.array(I>0.5) # binarize

# ----- PLOTS
# plot strokes
fig, ax = plt.subplots(figsize=(10,10))
plotDatStrokes(strokes, ax, each_stroke_separate=True)

# plot hist of values
plt.figure()
plt.hist(I[:], log=True)

# plot
plt.figure()
plt.imshow(I, cmap="gray")
plt.colorbar()
plt.title("after binarize")

print(I.shape)



In [None]:
# This section deals with issue that corners are in general not used as junctions in 
# the undirected graph that represents the drawing during parsing.
# For monkey, they often break down L-shaped things into two lines, so we want to consider
# parses where corners can have a junction.

# Solution: determine coordinates of all "segment" endpoints, which will generally include
# corners (since things like L's are represnted as two lines usually). 
# Will just pass in all endpoints, since anything that is redundant with the 
# BPL junctions will be automaticalyl discarded.

if stim_ver=="task_image":
    from pythonlib.drawmodel.image import convStrokecoordToImagecoord

    sketchpad_edges = np.array([[-canvas_max_WH, canvas_max_WH], [-canvas_max_WH, canvas_max_WH]])
    image_edges = np.array([[1, image_WH-1], [1, image_WH-1]]) # 1 on edges, since there is a slight border.

    extra_junctions = []
    for pts in strokes:
        pts_image_inds = convStrokecoordToImagecoord(pts, sketchpad_edges, image_edges)
        extra_junctions.append(pts_image_inds[0])
        extra_junctions.append(pts_image_inds[-1])

    extra_junctions = np.stack(extra_junctions, axis=0)
    print(extra_junctions)
elif stim_ver=="monkey_strokes":
    # then dont need to do this, since if monkey raises finger at corner, that will be
    # detected by BPL, since it will not be a perfectly clean corner, and so will
    # be considered a "cross". [have not verified this by testing]
    pass
    
    

In [None]:
# === PARSE USING PYBPL
from pybpl.matlab.bottomup import generate_random_parses

# generate random parses
parses = generate_random_parses(I, seed=3, max_ntrials=150, max_nwalk=150,
                                        max_nstroke=100, ver="lucas", 
                                        extra_junctions=extra_junctions)
# parses = generate_random_parses(img, seed=3, ver="lucas")
# parses = generate_random_parses(img, seed=3,max_ntrials=150, max_nwalk=150,
#                                         max_nstroke=100, ver="reuben")

### RUNNING EXAMPLES FROM PYBPL AND PLYING AROUND WITH IT

In [None]:
examples_dir = "/data1/code/python/pyBPL/examples/"

###  examples/parse_image.py

In [None]:
import math
import imageio
import numpy as np
import matplotlib.pylab as plt

from pybpl.util import dist_along_traj
from pybpl.matlab.bottomup import generate_random_parses



def plot_stroke(ax, stk, color, lw=2):
    if len(stk) > 1 and dist_along_traj(stk) > 0.01:
        ax.plot(stk[:,0], -stk[:,1], color=color, linewidth=lw)
    else:
        ax.plot(stk[0,0], -stk[0,1], color=color, linewidth=lw, marker='.')

def plot_parse(ax, strokes, lw=2):
    ns = len(strokes)
    colors = ['r','g','b','m','c']
    for i in range(ns):
        plot_stroke(ax, strokes[i], colors[i], lw)
    ax.set_xticks([])
    ax.set_yticks([])
    ax.set_xlim(0,105)
    ax.set_ylim(105,0)

def main():
    # load image to numpy binary array
    img = imageio.imread(f"{examples_dir}./image_H.jpg")
    img = np.array(img > 200)

    # generate random parses
    parses = generate_random_parses(img, seed=3)

    # plot parsing results
    nparse = len(parses)
    n = math.ceil(nparse/10)
    m = 10
    fig, axes = plt.subplots(n,m+1,figsize=(m+1, n))
    # first column
    axes[0,0].imshow(img, cmap=plt.cm.binary)
    axes[0,0].set_xticks([]); axes[0,0].set_yticks([])
    axes[0,0].set_title('Input')
    for i in range(1,n):
        axes[i,0].set_axis_off()
    # remaining_columns
    for i in range(n):
        for j in range(1,m+1):
            ix = i*m + (j-1)
            if ix >= nparse:
                axes[i,j].set_axis_off()
                continue
            plot_parse(axes[i,j], parses[ix])
    plt.subplots_adjust(hspace=0., wspace=0.)
    plt.show()

main()

### examples.generate_character.py

In [None]:
import matplotlib.pyplot as plt

from pybpl.library import Library
from pybpl.model import CharacterModel


def display_type(c):
    print('----BEGIN CHARACTER TYPE INFO----')
    print('num strokes: %i' % c.k)
    for i in range(c.k):
        print('Stroke #%i:' % i)
        print('\tsub-stroke ids: ', list(c.part_types[i].ids.numpy()))
        print('\trelation category: %s' % c.relation_types[i].category)
    print('----END CHARACTER TYPE INFO----')

def main():
    print('generating character...')
    lib = Library(use_hist=True)
    model = CharacterModel(lib)
    fig, axes = plt.subplots(nrows=10, ncols=3, figsize=(1.5, 5))
    for i in range(10):
        ctype = model.sample_type()
        ll = model.score_type(ctype)
        print('type %i' % i)
        display_type(ctype)
        print('log-likelihood: %0.2f \n' % ll.item())
        # sample a few character tokens and visualize them
        for j in range(3):
            ctoken = model.sample_token(ctype)
            img = model.sample_image(ctoken)
            axes[i,j].imshow(img, cmap='Greys')
            axes[i,j].tick_params(
                which='both',
                bottom=False,
                left=False,
                labelbottom=False,
                labelleft=False
            )
        axes[i,0].set_ylabel('%i' % i, fontsize=10)
    plt.show()

main()

### examples/optimize_type.py

In [None]:
"""
Sample a character type and then optimize its parameters to maximize the
likelihood of the type under the prior
"""
import argparse
import torch
import numpy as np
import matplotlib.pyplot as plt

from pybpl.library import Library
from pybpl.model import CharacterModel
from pybpl.objects import CharacterType



def optimize_type(model, c, lr, nb_iter, eps, show_examples=True):
    """
    Take a character type and optimize its parameters to maximize the
    likelihood under the prior, using gradient descent

    Parameters
    ----------
    model : CharacterModel
    c : CharacterType
    lr : float
    nb_iter : int
    eps : float
    show_examples : bool

    Returns
    -------
    score_list : list of float

    """
    # round nb_iter to nearest 10
    nb_iter = np.round(nb_iter, -1)
    # get optimizable variables & their bounds
    c.train()
    params = c.parameters()
    lbs = c.lbs(eps)
    ubs = c.ubs(eps)
    # optimize the character type
    score_list = []
    optimizer = torch.optim.Adam(params, lr=lr)
    if show_examples:
        fig, axes = plt.subplots(10, 4, figsize=(4, 10))
    interval = int(nb_iter / 10)
    for idx in range(nb_iter):
        if idx % interval == 0:
            # print optimization progress
            print('iteration #%i' % idx)
            if show_examples:
                # sample 4 tokens of current type (for visualization)
                for i in range(4):
                    token = model.sample_token(c)
                    img = model.sample_image(token)
                    axes[idx//interval, i].imshow(img, cmap='Greys')
                    axes[idx//interval, i].tick_params(
                        which='both',
                        bottom=False,
                        left=False,
                        labelbottom=False,
                        labelleft=False
                    )
                axes[idx//interval, 0].set_ylabel('%i' % idx)
        # zero optimizer gradients
        optimizer.zero_grad()
        # compute log-likelihood of the token
        score = model.score_type(c)
        score_list.append(score.item())
        # gradient descent step (minimize loss)
        loss = -score
        loss.backward()
        optimizer.step()
        # project all parameters into allowable range
        with torch.no_grad():
            for param, lb, ub in zip(params, lbs, ubs):
                if lb is not None:
                    torch.max(param, lb, out=param)
                if ub is not None:
                    torch.min(param, ub, out=param)

    return score_list

def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--ns', required=False, type=int,
                        help="number of strokes")
    parser.add_argument('--lr', default=1e-3, type=float,
                        help='learning rate')
    parser.add_argument('--eps', default=1e-4, type=float,
                        help='tolerance for constrained optimization')
    parser.add_argument('--nb_iter', default=1000, type=int,
                        help='number of optimization iterations')
    args = parser.parse_args()

    # load the library
    lib = Library()
    # create the BPL graphical model
    model = CharacterModel(lib)

    # sample a character type
    c = model.sample_type(k=args.ns)
    print('num strokes: %i' % c.k)
    print('num sub-strokes: ', [p.nsub.item() for p in c.part_types])

    # optimize the character type that we sampled
    score_list = optimize_type(model, c, args.lr, args.nb_iter, args.eps)

    # plot log-likelihood vs. iteration
    plt.figure()
    plt.plot(score_list)
    plt.ylabel('log-likelihood')
    plt.xlabel('iteration')
    plt.show()


main()

In [None]:
%tb

## LOAD PRE-TRAINED MODEL (README)

In [None]:
from pybpl.library import Library
from pybpl.model import CharacterModel

# load the hyperparameters of the BPL graphical model (i.e. the "library")

lib = Library(use_hist=True)

In [None]:
# create the BPL graphical model
model = CharacterModel(lib)

In [None]:
# sample a character type from the prior P(Type) and score its log-probability
char_type = model.sample_type()
ll_type = model.score_type(char_type)

# sample a character token from P(Token | Type=type) and score its log-probability
char_token = model.sample_token(char_type)
ll_token_given_type = model.score_token(char_type, char_token)

# sample an image from P(Image | Token=token)
image = model.sample_image(char_token)
ll_image_given_token = model.score_image(char_token, image)

import matplotlib.pyplot as plt
plt.figure()
plt.imshow(image, cmap="gray")
# plt.title(ll_image_given_token)

print(ll_type, ll_token_given_type, ll_image_given_token)

In [None]:
## === scoring
model.score_image(ctok, image)
model.score_token(cto

In [None]:
ctype = model.sample_type()
ctok = model.sample_token(ctype)
pimg = model.get_pimg(ctok)
plt.figure()
plt.imshow(pimg)

In [None]:
model.type_dist.sample_part_type(torch.tensor(1))

In [None]:
p = model.type_dist.sample_part_type(torch.tensor(2))
print(p)

In [None]:
char_type.parameters()

## ===== FIT IMAGE

In [None]:
from pybpl.model.model import fit_image
from pybpl.library import Library
lib = Library(use_hist=False)
fit_image(image, lib)

### LOAD character model and play around

In [None]:
lib = Library(use_hist=True)
model = CharacterModel(lib)


In [None]:
ctype = model.sample_type()
model.score_type(ctype)
ctoken = model.sample_token(ctype)
img = model.sample_image(ctoken)
plt.figure()
plt.imshow(img, cmap="Greys")

In [None]:
model.token_dist

In [None]:
model.score_image(ctoken, img)
model.score_token(ctype, ctoken)
dir(model)