In [30]:
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:90% !important; }</style>"))

In [31]:
import pandas as pd
import os
import re
import codecs
from IPython.display import display
from six.moves import cPickle as pickle
import string
from PIL import Image
import numpy as np
import h5py

In [32]:
width = None
pd.options.display.max_rows = 600
pd.options.display.max_columns = width
pd.options.display.max_colwidth = 600
pd.options.display.width = width
pd.options.display.max_seq_items = None
pd.options.display.expand_frame_repr = False
pd.options.display.colheader_justify = 'left'

In [33]:
import data_commons as dtc
import dl_commons as dlc

In [94]:
class VisualizeDir(object):
    def __init__(self, storedir, gen_datadir='../data/generated2'):
        self._storedir = storedir
        self._logdir = os.path.join(storedir, '..')
        self._hyper = dtc.load(self._logdir, 'hyper.pkl')
        self._args = dtc.load(self._logdir, 'args.pkl')

        self._word2id = pd.read_pickle(os.path.join(gen_datadir, 'dict_vocab.pkl'))
        i2w = pd.read_pickle(os.path.join(gen_datadir, 'dict_id2word.pkl'))
        for i in range(-1,-11,-1):
            i2w[i] = '%d'%i
        self._id2word = {}
        ## Append space after all commands beginning with a backslash (except backslash alone)
        for i, w in i2w.items():
            if w[0] == '\\':
              self._id2word[i] = w + " "  
            else:
                self._id2word[i] = w 
        self._id2word[self._word2id['id']['\\']] = '\\'
    
    @property
    def storedir(self):
        return self._storedir
    
    @property
    def w2i(self):
        return self._word2id['id']

    @property
    def i2w(self):
        return self._id2word
    
    @property
    def max_steps(self):
        steps = [int(os.path.basename(f).split('_')[-1].split('.')[0]) for f in os.listdir(self._storedir)]
        epoch_steps = [int(os.path.basename(f).split('_')[-1].split('.')[0]) for f in os.listdir(self._storedir) if f.startswith('validation')]
        return sorted(steps)[-1], sorted(epoch_steps)[-1]
        
    @property
    def args(self):
        return self._args
    
    @property
    def hyper(self):
        return self._hyper
    
    def keys(self, graph, step):
        with h5py.File(os.path.join(self._storedir, '%s_%d.h5'%(graph,step))) as h5:
            return h5.keys()

    def np(self, graph, step, key):
        """
        Args:
            graph: 'training' or 'validation'
            step:  step who's output is to be fetched
            key:   key of object to fetch - e.g. 'predicted_ids'
        """
        with h5py.File(os.path.join(self._storedir, '%s_%d.h5'%(graph,step))) as h5:
            return h5[key][...]
    
    def df(self, graph, step, key):
        return pd.DataFrame(self.np(graph, step, key))
    
    def words(self, graph, step, key, key2=None):
        df = self.df(graph, step, key)
        df2 = self.df(graph, step, key2) if (key2 is not None) else None
        
        if key2 is None:
            return df.applymap(lambda x: self._id2word[x])
        else:
            return pd.DataFrame({'%s'%key: df.applymap(lambda x: self._id2word[x]), '%s'%key2: df2.applymap(lambda x: self._id2word[x])})

    def strs(self, graph, step, key, key2=None, mingle=True):
        df_str = self.words(graph, step, key)
        df_str2 = self.words(graph, step, key2) if (key2 is not None) else None
        
        ## each token's string version - excepting backslash - has a space appended to it,
        ## therefore the string output should be compile if the prediction was syntactically correct
        if key2 == None:
            return pd.DataFrame(["".join(row) for row in df_str.itertuples(index=False)])
        else:
            if mingle:
                ar1 = ["".join(row) for row in df_str.itertuples(index=False)]
                ar2 = ["".join(row) for row in df_str2.itertuples(index=False)]
                data = {'%s_%d %s / %s'%(graph, step, key, key2): [e for t in zip(ar1, ar2) for e in t]}
                return pd.DataFrame(data)
            else:
                data = {'%s_%d.%s'%(graph, step, key): ["".join(row) for row in df_str.itertuples(index=False)], '%s_%d.%s'%(graph, step, key2): ["".join(row) for row in df_str2.itertuples(index=False)]}
                return pd.DataFrame(data)
        
    def prune_logs(self, save_epochs=1):
        """Save the latest save_epochs logs and remove the rest."""
        epoch_steps = [int(os.path.basename(f).split('_')[-1].split('.')[0]) for f in os.listdir(self._storedir) if f.startswith('validation')]
        if len(epoch_steps) <= save_epochs:
            print('Only %d full epochs were found. Deleting nothing.'%epoch_steps)
            return False
        else:
            epoch_steps.sort(reverse=True)
            max_step = epoch_steps[save_epochs]
            training_steps = [int(os.path.basename(f).split('_')[-1].split('.')[0]) for f in os.listdir(self._storedir) if f.startswith('training')]
            steps_to_remove = filter(lambda s: (s<max_step) and (s not in epoch_steps), training_steps)
            files_to_remove = [f for f in os.listdir(self._storedir) if f.startswith('training') and (int(os.path.basename(f).split('_')[-1].split('.')[0]) in steps_to_remove) ]
            print 'The following files will be removed', files_to_remove

class VisualizeStep():
    def __init__(self, visualizer, step):
        self._step = step
        self._visualizer = visualizer
        
    def keys(self, graph):
        return self._visualizer.keys(graph, self._step)
    
    def np(self, graph, key):
        return self._visualizer.np(graph, self._step, key)
    
    def df(self, graph, step, key):
        return pd.DataFrame.df(self.np(graph, step, key))
    
    def words(self, graph, key, key2=None):
        return self._visualizer.words(graph, self._step, key, key2)

    def strs(self, graph, key, key2=None, mingle=True):
        return self._visualizer.strs(graph, self._step, key, key2, mingle)

class DiffParams(object):
    def __init__(self, dir1, dir2):
        self._dir1 = dir1
        self._dir2 = dir2
        
    def get(self, filename, to_str):
        one = dtc.load(self._dir1, filename)
        two = dtc.load(self._dir2, filename)
        if (to_str):
            one = dlc.to_dict(one)
            two = dlc.to_dict(two)
        return one, two

    def print_dict(self, filename, to_str):
        one, two = self.get(filename, to_str)
        dtc.pprint(dlc.diff_dict(one, two))
    
    def _table(self, filename):
        one, two = self.get(filename, False)
        head, tail = dlc.diff_table(one, two)
        display(pd.DataFrame(head))
        display(pd.DataFrame(tail))
        
    def args(self, to_str=True):
        self._table('args.pkl')        
        
    def hyper(self, to_str=True):
        self._table('hyper.pkl')
    
    def get_args(self):
        return self.get('args.pkl', to_str=True)
    def get_hyper(self):
        return self.get('hyper.pkl', to_str=True)

In [95]:
# v = Visualize('./tb_metrics_dev/2017-10-06 17-56-47 PDT/store', '../data/generated2')
# v = VisualizeDir('./tb_metrics/2017-10-08 12-26-45 PDT/store')
# v = VisualizeDir('./tb_metrics_dev/2017-10-09 12-45-15 PDT/store')
vd = VisualizeDir('./tb_metrics/2017-10-09 17-43-49 PDT/store')

In [96]:
vd.max_steps

(103600, 103600)

In [98]:
vs = VisualizeStep(vd, 103600)
vs.strs('validation', 'predicted_ids', 'y', mingle=False)

Unnamed: 0,validation_103600.predicted_ids,validation_103600.y
0,"{\cal A}_{{\bf j},{\bf \xi }_{\mu }}(z)=\frac {1}{a!}\frac {d^+}{p^+\over m^+}G_{\mu ,\nu }(p){\cal A}_{\mu }(p)=\frac {1}{k!}\frac {d}{d\kappa }\frac {1}{x!}{\cal R}_{\mu }(a)+\frac {1}{k^{2}+m^{2}}G_{\mu \nu ,\nu \sigma }\partial _{\sigma }\eos -1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1","\label {recurs1}{\cal A}_{n+1}(x)=\frac {1}{n}{\cal D}{\cal A}_n(x)=\frac {1}{n!}{\cal D}^{n+1}\ln {\cal A}_0(x)\;\;\\mbox{with}\\\{\cal A}_0(x)=\frac {x-1}{x\ln x}\,\eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos"
1,"\langle \psi _2^{\mu \nu }\rangle =\frac 1{2\pi \over \eta }\left (\frac {1}{g}-\frac {1}{\sqrt {2\pi ^2\alpha }}\frac {P^2}{\sqrt {2\pi ^+\gamma ^2}}(\frac {1}{2},\frac {p^2}{m})\biggr ]\frac {u_0}{\sqrt {2\pi ^2m}}\quad .\eos -1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1",\langle \psi ^{ml}_{{\xi }}\vert \frac {{\bf {p}}^2}{2m}\vert \psi ^{ml}_{{\xi }}\rangle =\frac {2\sqrt {\beta }}{\pi }\int _{-\infty }^{+\infty }\frac {dp}{(1+\beta p^2)^2}\frac {p^2}{2m}=\frac {1}{2m\beta }\quad .\eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos
2,"\partial _t\alpha =\partial ^{(1)}\Gamma ^0\Gamma \partial ^{(1+1)}\Gamma ^{(1)}\otimes \Theta ^{(2)}\partial _\pm {\bar \Gamma }^{(2)}\partial _-{\bar \Gamma }^{(1)}\partial _-{\Gamma ^{(2)}\Gamma ^{(1)}\Gamma ^{(1)}\Theta ^{(2)+}-{\bf R}\Gamma ^{(\pm )1}],\eos \eos -1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1","\partial _-(\partial _+\Gamma ^{(1)}\,\Gamma ^{(1)-1})=-\Gamma ^{(2)}\,\Gamma ^{(1)-1},\qquad \partial _-(\partial _+\Gamma ^{(2)}\,\Gamma ^{(2)-1})=\Gamma ^{(2)}\,\Gamma ^{(1)-1}.\label {51}\eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos"
3,\frac {\partial }{\partial x_{m}}\partial _{j}\frac {\partial }{\partial t_{j}}=\frac {\partial A_{k}}{\partial x_{k}}-\frac {\partial ^{2}g\sigma _{jk}}{\partial x_{k}}-\frac {\partial ^{2}g\sigma _{jk}}{\partial x_{k}}-\frac {\partial ^{2}g\partial _{j}\delta q_{k}}{\partial x_{k}}-\frac {\partial t(\varphi )}{\partial x_{j}}.\eos \eos \eos -1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1,"\frac {\partial ^2\tau }{\partial x_i\partial x_j}=0,\;(\theta _i-\theta _j)\frac {\partial ^2\alpha ^0}{\partial x_i\partial x_j}=2[\frac {\partial \tau }{\partial x_i}\frac {\partial \alpha ^0}{\partial x_j}-\frac {\partial \tau }{\partial x_j}\frac {\partial \alpha ^0}{\partial x_i}].\eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos"
4,"\label {eq:ts.09}\phi _{\beta _{s+1},\phi _{1},\phi _{2},\phi _{1},\phi _{1},\theta _{1},\theta _{1},\theta _{1},\beta _{8},\beta _{6}}(\phi _{0},\varphi _{8},\sigma _{8},\sigma _{8},0)=\Big (e^{-\phi _{1}+\phi _{s}}\phi _{s_{2s+1}+\delta _{s+2}}+\phi _{p_{s-1}-\phi _{2-s-1}}\biggr )\eos -1-1-1-1",\phi _{-n_2}\partial _{-n_4}\phi _{-n_3}+\phi _{-n_3}\partial _{-n_4}\phi _{-n_2}+\phi _{-n_4}\partial _{-n_3}\phi _{-n_2}+\left ({\phi _{-n_2}\phi _{-n_3}\phi _{-n_4}\over W'}\right )'\eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos
5,"\psi _l(0)=J_j+2\sum _{j=1}^{j_j}x_j\cdot x_j^{-k},\,\,e_j+dx_l^{-1}={&ds},\,\,\,\,e_j^{-1}\approx 0,\,\,\,\exp (0-t_{-1}^{-1}),\,\,\,x_0\zeta '_{-j}^{j_0}\zeta (.t_{-}.t_{-}),\eos \eos \eos \eos -1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1",J^i_N(\sigma )=J_0^i+\sum _{n=1}^\infty \cos n\sigma (J_n^i+J_{-n}^i)\quad {\rm and}\quad J^i_D(\sigma )=i\sum _{n=1}^\infty \sin n\sigma (J_n^i-J_{-n}^i).\eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos
6,"D_{\nu }D^{\mu }(\bar \psi _{\nu \nu }\Phi ^{\nu \nu }+\bar \psi _{\mu \nu }\bar \psi ^{\nu \nu })\psi _\mu (z^{\mu },\bar \beta ^{\nu \nu }+\bar \psi _{\mu \nu }\psi ^{N}\psi )\psi =0^2-\frac 42A_{\mu \nu }^{(4)}\psi ^{(4)}=0.\label {4}\eos -1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1",D_{\nu }D^{\nu }\psi +m^{2}\psi -\frac {i}{2}eF_{\mu \nu }S^{\mu }{}^{\nu }\psi -\frac {e}{2m}\left (\beta ^{\mu }\beta ^{\nu }\beta ^{\alpha }+\beta ^{\mu }\eta ^{\nu \alpha }\right )D_{\nu }\left (F_{\alpha \mu }\psi \right )=0.\label {eq49}\eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos
7,".=\sum _{j,j=1}^N\frac {2}{(z_j-x_j)}-\sum _{j_k=1}^{2}\frac {1}{(x_j-p_j)^2}\sum _{j=1,k_k=0}^{2}\frac {1}{(z_{j}-1)^{\eta _{j}}}\,.\eos -1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1","0=\left (\frac {\partial U}{\partial u_\nu }\right )_{u_\mu =e_\mu }\rightarrow \frac {1}{g}+\sum _{\mu =1(\neq \nu )}^{N}\frac {2}{e_\mu -e_\nu }=\sum _{j=1}^\Omega \frac {1}{z_j-e_\nu }\;,\label {51}\eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos"
8,"I_{E_{11}}V(X_{1/2})=\sum _{j}\int _{\chi _{\sigma }}X^{-{1\over 2}}\partial _{\mu }(x_{\mu })G^{\mu }(y^{\prime },\phi ^{\prime }(x_{\mu }))^{-1/2}-\frac {1}{2!}[m^{\prime }(p_{1}^{2})-f^{2}]\,.\eos \eos \eos \eos \eos \eos \eos -1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1","L_R=\sum _i\int _Vd^3{\bf x}e^{3Ht}\frac {1}{2}\left [(\partial _0\eta _i({\bf x},t))^2-(e^{-Ht}{\bf \nabla }\eta _i({\bf x},t))^2-\mu _i^2\eta _i^2({\bf x},t)\right ].\label {lres}\eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos"
9,"P_{(2)}(g,x^2)\,=\,\int \frac {d^4k}{(2\pi )^4}\,\delta ^4(x-x^2)\,e^{x^{-1}x(y)}\quad =~\frac {\dot {p}p^2}{(2\pi )^4}\delta (p^2-p^2)\,e^{kp-(y-x^0)}\quad \eos \eos \eos \eos -1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1","P(x,y)\;=\;\int \frac {d^4k}{(2\pi )^4}\:\hat {P}(k)\:e^{-ik(x-y)}\;\;,\;\;\;\;\;Q(x,y)\;=\;\int \frac {d^4k}{(2\pi )^4}\:\hat {Q}(k)\:e^{-ik(x-y)}\;,\eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos \eos"


In [43]:
# diff = DiffParams('./tb_metrics/2017-10-07 14-33-35 PDT', './tb_metrics_dev/2017-10-09 12-45-15 PDT')
# diff = DiffParams('./tb_metrics/2017-09-26 22-40-18 PDT', './tb_metrics/2017-10-07 14-33-35 PDT')
# diff = DiffParams('./tb_metrics/2017-10-07 14-33-35 PDT', './tb_metrics/2017-10-08 12-26-45 PDT')
diff = DiffParams('./tb_metrics/2017-09-26 22-40-18 PDT', './tb_metrics/2017-10-08 12-26-45 PDT')
# diff = DiffParams('./tb_metrics/2017-09-26 22-40-18 PDT/w=1', './tb_metrics/2017-10-08 12-26-45 PDT')
# diff = DiffParams('./tb_metrics/2017-09-26 22-40-18 PDT', './tb_metrics_dev/2017-10-09 12-45-15 PDT')

In [None]:
diff.get_hyper()[0]['CALSTM_STACK']

In [None]:
diff.args()

In [None]:
diff.get_hyper()[1]['squash_input_seq']

In [68]:
v.w2i['\\']

61