## RBM Name Generation

In [1]:
import argparse
import pickle
from sklearn.model_selection import train_test_split
from GridEncoder import GridEncoder
import Utils
from ShortTextCodec import ShortTextCodec, BinomialShortTextCodec
from RBM import BernoulliRBM
import Sampling
import sample
import sys
import colorama
import numpy as np
import sys
import os
import inspect

currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
parent =  currentdir + '\RBM_Git'
sys.path.insert(0,parent)
%reload_ext autoreload
%autoreload 2
%matplotlib inline
import time
import matplotlib.pyplot as plt
import numexpr  as ne
import profile
import rbm as Rbm
import pandas
from random import randint
from timeit import default_timer as timer

In [2]:
BIASED_PRIOR = 0

class CharBernoulli(BernoulliRBM):

    def __init__(self, **kwargs):
        """
        codec is the ShortTextCodec used to create the vectors being fit. The
        most important function of the codec is as a proxy to the shape of the
        softmax units in the visible layer (if you're using the CharBernoulliRBMSoftmax
        subclass). It's also used to decode and print
        fantasy particles at the end of each epoch.
        """
        # Attaching this to the object is really helpful later on when models
        # are loaded from pickle in visualize.py and sample.py
        self.codec = kwargs.pop("codec")
        self.softmax_shape = codec.shape()
        # Old-style class :(
        BernoulliRBM.__init__(self, **kwargs)

    def wellness_check(self, epoch, duration, train, validation):
        BernoulliRBM.wellness_check(self, epoch, duration, train, validation)
        fantasy_samples = '|'.join([self.codec.decode(vec) for vec in
                                    self._sample_visibles(self.h_samples_[:3], temperature=0.1)])
        print ("Fantasy samples: {}".format(fantasy_samples))

    def corrupt(self, v):
        n_softmax, n_opts = self.softmax_shape
        # Select a random index in to the indices of the non-zero values of each input
        # TODO: In the char-RBM case, if I wanted to really challenge the model, I would avoid selecting any
        # trailing spaces here. Cause any dumb model can figure out that it should assign high energy to
        # any instance of /  [^ ]/
        meta_indices_to_corrupt = self.rng_.randint(0, n_softmax, v.shape[0]) + np.arange(0, n_softmax * v.shape[0], n_softmax)

        # Offset these indices by a random amount (but not 0 - we want to actually change them)
        offsets = self.rng_.randint(1, n_opts, v.shape[0])
        # Also, do some math to make sure we don't "spill over" into a different softmax.
        # E.g. if n_opts=5, and we're corrupting index 3, we should choose offsets from {-3, -2, -1, +1}
        # 1-d array that matches with meta_i_t_c but which contains the indices themselves
        indices_to_corrupt = v.indices[meta_indices_to_corrupt]
        # Sweet lucifer
        offsets = offsets - (n_opts * (((indices_to_corrupt % n_opts) + offsets.ravel()) >= n_opts))

        v.indices[meta_indices_to_corrupt] += offsets
        return v, (meta_indices_to_corrupt, offsets)

    def uncorrupt(self, visibles, state):
        mitc, offsets = state
        visibles.indices[mitc] -= offsets
        
class CharBernoulliSoftmax(CharBernoulli):
    def __init__(self,**kwargs):
        CharBernoulli.__init__(self, **kwargs)
        
    def _sample_visibles(self, h, temperature=1.0):
        """Sample from the distribution P(v|h). This obeys the softmax constraint
        on visible units. i.e. sum(v) == softmax_shape[0] for any visible
        configuration v.

        h : array-like, shape (n_samples, n_components)
            Values of the hidden layer to sample from.

        Returns
        -------
        v : array-like, shape (n_samples, n_features)
            Values of the visible layer.
        """
        p = np.dot(h, self.components_/temperature)
        p += self.intercept_visible_/(min(1.0, temperature) if BIASED_PRIOR else temperature)
        nsamples, nfeats = p.shape
        reshaped = np.reshape(p, (nsamples,) + self.softmax_shape)
        return Utils.softmax_and_sample(reshaped).reshape((nsamples, nfeats))


In [3]:
codec_kls = ShortTextCodec
codec = codec_kls('',10,0,True,False)
codec.debug_description()
model_kwargs = {'codec': codec,
                        'n_components': 100,
                        'learning_rate': 0.06,
                        'lr_backoff': False,
                        'n_iter': 200,
                        'verbose': 1,
                        'batch_size': 1,
                        'weight_cost': 0.0001,
                        }
kls = CharBernoulliSoftmax
rbm = kls(**model_kwargs)
codec.alphabet

'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ$'

### English Names File

In [4]:
vecs = Utils.vectors_from_txtfile("./names.txt", codec)
train, validation = train_test_split(vecs, test_size=0.5)
print(train.shape,validation.shape)

(28429, 530) (28429, 530)


In [5]:
rbm.fit(train,validation)

[CharBernoulliSoftmax] Iteration 1/200	t = 35.75s
Pseudo-log-likelihood sum: -37429.76	Average per instance: -1.32
E(vali):	-1.17	E(train):	-1.20	difference: 0.03
Fantasy samples: Branen$$$$
[CharBernoulliSoftmax] Iteration 2/200	t = 38.21s
Pseudo-log-likelihood sum: -34209.58	Average per instance: -1.20
E(vali):	-1.60	E(train):	-1.71	difference: 0.10
Fantasy samples: Alerin$$$$
[CharBernoulliSoftmax] Iteration 3/200	t = 38.10s
Pseudo-log-likelihood sum: -31990.49	Average per instance: -1.13
E(vali):	-2.65	E(train):	-2.78	difference: 0.13
Fantasy samples: Mchar$$$$$
[CharBernoulliSoftmax] Iteration 4/200	t = 40.89s
Pseudo-log-likelihood sum: -30874.14	Average per instance: -1.09
E(vali):	-2.95	E(train):	-3.14	difference: 0.19
Fantasy samples: Freschier$
[CharBernoulliSoftmax] Iteration 5/200	t = 39.66s
Pseudo-log-likelihood sum: -31023.83	Average per instance: -1.09
E(vali):	-4.32	E(train):	-4.54	difference: 0.22
Fantasy samples: Targensen$
[CharBernoulliSoftmax] Iteration 6/200	t = 37

E(vali):	-5.81	E(train):	-6.24	difference: 0.42
Fantasy samples: Labbie$$$$
[CharBernoulliSoftmax] Iteration 44/200	t = 43.61s
Pseudo-log-likelihood sum: -26175.84	Average per instance: -0.92
E(vali):	-5.28	E(train):	-5.75	difference: 0.47
Fantasy samples: Ala$$$$$$$
[CharBernoulliSoftmax] Iteration 45/200	t = 44.32s
Pseudo-log-likelihood sum: -25655.48	Average per instance: -0.90
E(vali):	-7.85	E(train):	-8.26	difference: 0.41
Fantasy samples: Esconazi$$
[CharBernoulliSoftmax] Iteration 46/200	t = 43.10s
Pseudo-log-likelihood sum: -26438.77	Average per instance: -0.93
E(vali):	-7.48	E(train):	-7.89	difference: 0.41
Fantasy samples: Heighuet$$
[CharBernoulliSoftmax] Iteration 47/200	t = 38.92s
Pseudo-log-likelihood sum: -26142.92	Average per instance: -0.92
E(vali):	-3.88	E(train):	-4.34	difference: 0.46
Fantasy samples: Manei$$$$$
[CharBernoulliSoftmax] Iteration 48/200	t = 37.48s
Pseudo-log-likelihood sum: -27529.19	Average per instance: -0.97
E(vali):	-8.88	E(train):	-9.26	differenc

E(vali):	-8.72	E(train):	-9.13	difference: 0.41
Fantasy samples: Braks$$$$$
[CharBernoulliSoftmax] Iteration 87/200	t = 33.46s
Pseudo-log-likelihood sum: -25890.70	Average per instance: -0.91
E(vali):	-6.76	E(train):	-7.20	difference: 0.45
Fantasy samples: Roelman$$$
[CharBernoulliSoftmax] Iteration 88/200	t = 39.10s
Pseudo-log-likelihood sum: -29861.58	Average per instance: -1.05
E(vali):	-11.39	E(train):	-11.82	difference: 0.43
Fantasy samples: Lopkina$$$
[CharBernoulliSoftmax] Iteration 89/200	t = 44.94s
Pseudo-log-likelihood sum: -25600.37	Average per instance: -0.90
E(vali):	-8.34	E(train):	-8.79	difference: 0.45
Fantasy samples: Willer$$$$
[CharBernoulliSoftmax] Iteration 90/200	t = 37.64s
Pseudo-log-likelihood sum: -25459.15	Average per instance: -0.90
E(vali):	-9.86	E(train):	-10.27	difference: 0.42
Fantasy samples: Blauch$$$$
[CharBernoulliSoftmax] Iteration 91/200	t = 34.00s
Pseudo-log-likelihood sum: -26080.06	Average per instance: -0.92
E(vali):	-6.31	E(train):	-6.77	differ

E(vali):	-8.03	E(train):	-8.46	difference: 0.43
Fantasy samples: Alvovics$$
[CharBernoulliSoftmax] Iteration 130/200	t = 46.14s
Pseudo-log-likelihood sum: -26598.29	Average per instance: -0.94
E(vali):	-8.76	E(train):	-9.21	difference: 0.45
Fantasy samples: Backe$$$$$
[CharBernoulliSoftmax] Iteration 131/200	t = 40.36s
Pseudo-log-likelihood sum: -29672.64	Average per instance: -1.04
E(vali):	-13.08	E(train):	-13.47	difference: 0.39
Fantasy samples: Altzma$$$$
[CharBernoulliSoftmax] Iteration 132/200	t = 45.10s
Pseudo-log-likelihood sum: -27389.29	Average per instance: -0.96
E(vali):	-8.33	E(train):	-8.78	difference: 0.45
Fantasy samples: Golss$$$$$
[CharBernoulliSoftmax] Iteration 133/200	t = 37.62s
Pseudo-log-likelihood sum: -25960.45	Average per instance: -0.91
E(vali):	-8.07	E(train):	-8.53	difference: 0.46
Fantasy samples: Marli$$$$$
[CharBernoulliSoftmax] Iteration 134/200	t = 35.16s
Pseudo-log-likelihood sum: -26531.06	Average per instance: -0.93
E(vali):	-9.73	E(train):	-10.20	d

Fantasy samples: Mohla$$$$$
[CharBernoulliSoftmax] Iteration 172/200	t = 40.31s
Pseudo-log-likelihood sum: -24822.95	Average per instance: -0.87
E(vali):	-9.68	E(train):	-10.11	difference: 0.43
Fantasy samples: Rier$$$$$$
[CharBernoulliSoftmax] Iteration 173/200	t = 39.65s
Pseudo-log-likelihood sum: -26738.11	Average per instance: -0.94
E(vali):	-10.30	E(train):	-10.72	difference: 0.42
Fantasy samples: Taberithi$
[CharBernoulliSoftmax] Iteration 174/200	t = 37.22s
Pseudo-log-likelihood sum: -27380.60	Average per instance: -0.96
E(vali):	-10.36	E(train):	-10.85	difference: 0.49
Fantasy samples: Roben$$$$$
[CharBernoulliSoftmax] Iteration 175/200	t = 40.26s
Pseudo-log-likelihood sum: -27147.62	Average per instance: -0.95
E(vali):	-12.05	E(train):	-12.46	difference: 0.41
Fantasy samples: Standlouet
[CharBernoulliSoftmax] Iteration 176/200	t = 39.49s
Pseudo-log-likelihood sum: -28835.02	Average per instance: -1.01
E(vali):	-9.70	E(train):	-10.16	difference: 0.46
Fantasy samples: Balema$$$$

CharBernoulliSoftmax()

In [6]:
SAMPLES = []
def horizontal_cb(strings, i, energy=None):
    global SAMPLES
    if energy is not None:
        SAMPLES.append(zip(strings, energy))
    else:
        SAMPLES.append(strings)
        
def print_columns(maxlen):
    col_width = maxlen+2
    for fantasy_index in range(len(SAMPLES[0])):
        particles = [s[fantasy_index] for s in SAMPLES]
        print ("".join(s[fantasy_index].ljust(col_width) for s in SAMPLES))
        
sample_indices = [1000-1]
kwargs = dict(start_temp=1.0, final_temp=1.0, sample_energy=False, 
                    callback=horizontal_cb)

vis = Sampling.sample_model(rbm, 30, 1000, sample_indices, **kwargs)
print_columns(rbm.codec.maxlen)
fe = rbm._free_energy(vis)
print('Final energy: {:.2f} (stdev={:.2f})\n'.format(fe.mean(), fe.std()))

Vani        
Rima        
Wudde       
Genzir      
Hika        
Henas       
Ring        
Duney       
Enta        
Iyti        
Zolkan      
Mard        
Vasa        
Packowski   
Sanas       
Saliwe      
Hepa        
Gade        
Ramkon      
Rabit       
Hungon      
Mutes       
Heugen      
Jica        
Lebbr       
Waxmon      
Huin        
Sanos       
Kelton      
Kyma        
Final energy: -23.55 (stdev=2.71)



## Spanish Name File

In [7]:
codec_kls = ShortTextCodec
codec = codec_kls('áéíóúñÁÉÍÓÚÑ',10,0,True,False)
codec.debug_description()
model_kwargs = {'codec': codec,
                        'n_components': 200,
                        'learning_rate': 0.06,
                        'lr_backoff': False,
                        'n_iter': 200,
                        'verbose': 1,
                        'batch_size': 5,
                        'weight_cost': 0.0001,
                        }
print(codec.alphabet)
kls = CharBernoulliSoftmax
rbm = kls(**model_kwargs)
vecs = Utils.vectors_from_txtfile("./spanish_Dict.txt", codec)
train, validation = train_test_split(vecs, test_size=0.5)
print(train.shape,validation.shape)

abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ$áéíóúñÁÉÍÓÚÑ
(33822, 650) (33822, 650)


In [8]:
rbm.fit(train,validation)

[CharBernoulliSoftmax] Iteration 1/200	t = 24.58s
Pseudo-log-likelihood sum: -39089.13	Average per instance: -1.16
E(vali):	-19.10	E(train):	-19.14	difference: 0.04
Fantasy samples: haparo$$$$|entaror$$$|arerinaro$
[CharBernoulliSoftmax] Iteration 2/200	t = 26.36s
Pseudo-log-likelihood sum: -32407.29	Average per instance: -0.96
E(vali):	-15.99	E(train):	-16.06	difference: 0.08
Fantasy samples: moriadora$|pracaro$$$|asuitado$$
[CharBernoulliSoftmax] Iteration 3/200	t = 26.94s
Pseudo-log-likelihood sum: -29804.77	Average per instance: -0.88
E(vali):	-15.60	E(train):	-15.72	difference: 0.13
Fantasy samples: ceiba$$$$$|alaparía$$|cranrlo$$$
[CharBernoulliSoftmax] Iteration 4/200	t = 25.78s
Pseudo-log-likelihood sum: -27337.50	Average per instance: -0.81
E(vali):	-15.94	E(train):	-16.10	difference: 0.16
Fantasy samples: empuladaro|alanacer$$|amedal$$$$
[CharBernoulliSoftmax] Iteration 5/200	t = 25.14s
Pseudo-log-likelihood sum: -25885.63	Average per instance: -0.77
E(vali):	-17.89	E(train):

Fantasy samples: embrico$$$|chancueado|abosa$$$$$
[CharBernoulliSoftmax] Iteration 39/200	t = 26.77s
Pseudo-log-likelihood sum: -19415.11	Average per instance: -0.57
E(vali):	-34.97	E(train):	-35.78	difference: 0.81
Fantasy samples: muida$$$$$|gurente$$$|escretilla
[CharBernoulliSoftmax] Iteration 40/200	t = 26.94s
Pseudo-log-likelihood sum: -20354.14	Average per instance: -0.60
E(vali):	-34.67	E(train):	-35.50	difference: 0.83
Fantasy samples: goróndico$|tarainillo|arro$$$$$$
[CharBernoulliSoftmax] Iteration 41/200	t = 26.25s
Pseudo-log-likelihood sum: -19306.91	Average per instance: -0.57
E(vali):	-33.36	E(train):	-34.22	difference: 0.85
Fantasy samples: cornitolla|carrata$$$|pona$$$$$$
[CharBernoulliSoftmax] Iteration 42/200	t = 25.81s
Pseudo-log-likelihood sum: -19593.91	Average per instance: -0.58
E(vali):	-31.69	E(train):	-32.58	difference: 0.90
Fantasy samples: breblógico|procridad$|relanco$$$
[CharBernoulliSoftmax] Iteration 43/200	t = 25.95s
Pseudo-log-likelihood sum: -20067.8

E(vali):	-47.19	E(train):	-48.35	difference: 1.16
Fantasy samples: cocado$$$$|estrímira$|enchilar$$
[CharBernoulliSoftmax] Iteration 77/200	t = 25.80s
Pseudo-log-likelihood sum: -18056.89	Average per instance: -0.53
E(vali):	-47.58	E(train):	-48.69	difference: 1.11
Fantasy samples: mangranal$|pervicitar|torco$$$$$
[CharBernoulliSoftmax] Iteration 78/200	t = 25.13s
Pseudo-log-likelihood sum: -19423.00	Average per instance: -0.57
E(vali):	-47.68	E(train):	-48.83	difference: 1.15
Fantasy samples: empusitad$|desesator$|mancar$$$$
[CharBernoulliSoftmax] Iteration 79/200	t = 24.94s
Pseudo-log-likelihood sum: -18483.69	Average per instance: -0.55
E(vali):	-49.16	E(train):	-50.36	difference: 1.20
Fantasy samples: esgurtar$$|guancuelo$|sijallo$$$
[CharBernoulliSoftmax] Iteration 80/200	t = 25.01s
Pseudo-log-likelihood sum: -19321.39	Average per instance: -0.57
E(vali):	-48.37	E(train):	-49.59	difference: 1.22
Fantasy samples: tambisco$$|puchulan$$|resatiante
[CharBernoulliSoftmax] Iteration 81/

E(vali):	-59.38	E(train):	-60.70	difference: 1.32
Fantasy samples: panto$$$$$|lagurayona|cugente$$$
[CharBernoulliSoftmax] Iteration 115/200	t = 28.67s
Pseudo-log-likelihood sum: -19065.30	Average per instance: -0.56
E(vali):	-57.53	E(train):	-58.91	difference: 1.38
Fantasy samples: vestisceo$|zumbo$$$$$|mariera$$$
[CharBernoulliSoftmax] Iteration 116/200	t = 28.39s
Pseudo-log-likelihood sum: -19375.60	Average per instance: -0.57
E(vali):	-62.31	E(train):	-63.64	difference: 1.33
Fantasy samples: ponsorario|antolancia|empepacho$
[CharBernoulliSoftmax] Iteration 117/200	t = 26.83s
Pseudo-log-likelihood sum: -21392.92	Average per instance: -0.63
E(vali):	-61.65	E(train):	-62.99	difference: 1.34
Fantasy samples: estabador$|antilio$$$|cegófica$$
[CharBernoulliSoftmax] Iteration 118/200	t = 27.41s
Pseudo-log-likelihood sum: -18924.72	Average per instance: -0.56
E(vali):	-59.37	E(train):	-60.69	difference: 1.32
Fantasy samples: petrí$$$$$|eneonadura|enirete$$$
[CharBernoulliSoftmax] Iteration

E(vali):	-67.53	E(train):	-69.02	difference: 1.50
Fantasy samples: anezanar$$|caruducto$|monatorio$
[CharBernoulliSoftmax] Iteration 153/200	t = 27.85s
Pseudo-log-likelihood sum: -19970.16	Average per instance: -0.59
E(vali):	-67.72	E(train):	-69.11	difference: 1.39
Fantasy samples: acarisco$$|ropostosia|machana$$$
[CharBernoulliSoftmax] Iteration 154/200	t = 39.42s
Pseudo-log-likelihood sum: -20335.94	Average per instance: -0.60
E(vali):	-65.36	E(train):	-66.77	difference: 1.42
Fantasy samples: encominado|rdido$$$$$|caquinazo$
[CharBernoulliSoftmax] Iteration 155/200	t = 30.56s
Pseudo-log-likelihood sum: -29579.41	Average per instance: -0.87
E(vali):	-71.40	E(train):	-72.90	difference: 1.49
Fantasy samples: machero$$$|chichageco|grifo$$$$$
[CharBernoulliSoftmax] Iteration 156/200	t = 31.29s
Pseudo-log-likelihood sum: -20421.72	Average per instance: -0.60
E(vali):	-65.92	E(train):	-67.39	difference: 1.48
Fantasy samples: marte$$$$$|calzado$$$|conchera$$
[CharBernoulliSoftmax] Iteration

E(vali):	-74.38	E(train):	-75.78	difference: 1.40
Fantasy samples: trinsadere|anciesis$$|excresiro$
[CharBernoulliSoftmax] Iteration 191/200	t = 25.25s
Pseudo-log-likelihood sum: -61276.90	Average per instance: -1.81
E(vali):	-78.30	E(train):	-79.81	difference: 1.51
Fantasy samples: perdecera$|ameístado$|baquí$$$$$
[CharBernoulliSoftmax] Iteration 192/200	t = 25.25s
Pseudo-log-likelihood sum: -21053.40	Average per instance: -0.62
E(vali):	-74.46	E(train):	-75.83	difference: 1.37
Fantasy samples: delidino$$|fajo$$$$$$|neticueno$
[CharBernoulliSoftmax] Iteration 193/200	t = 25.25s
Pseudo-log-likelihood sum: -19783.81	Average per instance: -0.58
E(vali):	-73.80	E(train):	-75.23	difference: 1.43
Fantasy samples: perta$$$$$|gracamente|embolioso$
[CharBernoulliSoftmax] Iteration 194/200	t = 25.18s
Pseudo-log-likelihood sum: -19125.10	Average per instance: -0.57
E(vali):	-73.97	E(train):	-75.49	difference: 1.52
Fantasy samples: rebuscar$$|emboricar$|monólogo$$
[CharBernoulliSoftmax] Iteration

CharBernoulliSoftmax()

In [9]:
SAMPLES = []
def horizontal_cb(strings, i, energy=None):
    global SAMPLES
    if energy is not None:
        SAMPLES.append(zip(strings, energy))
    else:
        SAMPLES.append(strings)
def print_columns(maxlen):
    col_width = maxlen+2
    for fantasy_index in range(len(SAMPLES[0])):
        particles = [s[fantasy_index] for s in SAMPLES]
        print ("".join(s[fantasy_index].ljust(col_width) for s in SAMPLES))
sample_indices = [1000-1]
kwargs = dict(start_temp=1.0, final_temp=1.0, sample_energy=False, 
                    callback=horizontal_cb)

vis = Sampling.sample_model(rbm, 30, 1000, sample_indices, **kwargs)
print_columns(rbm.codec.maxlen)
fe = rbm._free_energy(vis)
print('Final energy: {:.2f} (stdev={:.2f})\n'.format(fe.mean(), fe.std()))

arricuar    
arruca      
procasión   
ojiladero   
golcactón   
arrancar    
ligurriano  
perqué      
cambédego   
asufólejo   
nestojero   
necato      
abepadero   
agopídero   
infontero   
nionación   
diolótico   
aprino      
cictálimo   
profosión   
asobólero   
atotódego   
chanquero   
atapísero   
agilídero   
arregar     
abreyar     
procación   
chircheco   
tablésido   
Final energy: -87.86 (stdev=7.10)



## Test with RBM Implementation

In [10]:
import sys
import os
import inspect

currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
parent =  currentdir + '\RBM_Git'
sys.path.insert(0,parent)
%reload_ext autoreload
%autoreload 2
%matplotlib inline
import time
import matplotlib.pyplot as plt
import numexpr  as ne
import profile
import rbm as Rbm
import pandas
from random import randint
from timeit import default_timer as timer

In [11]:
codec_kls = ShortTextCodec
codec = codec_kls('áéíóúñÁÉÍÓÚÑ',10,0,True,False)
vecs = Utils.vectors_from_txtfile("./spanish_Dict.txt", codec)
visible_dim = vecs.shape[1]
hidden_dim = 200
epochs = 100
K = 1
lr = 0.1
batch_size = 10

In [12]:
rbm_ = Rbm.RBM(visible_dim=visible_dim,
               hidden_dim=hidden_dim,
               seed=42,
               mu=0, 
               sigma=0.3,
               monitor_time=True)
rbm_.W.shape, rbm_.b.shape, rbm_

((650, 200), (650,), <rbm.RBM at 0x2060766fb70>)

In [13]:
test_Data_Vector_Aux = np.array(vecs.toarray(), dtype="float64")

In [14]:
%%time
rbm_.fit(test_Data_Vector_Aux, 
         method='CDK',
         K=K,
         lr=lr,
         epochs=1,
         batch_size=300,
         plot_weights=False)

	Last epoch:ime per epoch: 119.79	total time: 119.80 0 	time per epoch: 119.79	total time: 119.80
	Training finished


Wall time: 1min 59s


In [15]:
%%time
rbm_.fit(test_Data_Vector_Aux, 
         method='vectorized_CDK',
         K=K,
         lr=0.01,
         epochs=1,
         batch_size=128,
         plot_weights=False)

	Last epoch:ime per epoch: 4.99	total time: 4.99 0 	time per epoch: 4.99	total time: 4.99
	Training finished


Wall time: 5.01 s


In [16]:
%%time
rbm_.fit(test_Data_Vector_Aux, 
         method='vectorized_CDK',
         K=K,
         lr=0.01,
         epochs=500,
         batch_size=128,
         plot_weights=False)

	epoch: 0 	time per epoch: 4.43	total time: 4.44 1 	time per epoch: 4.32	total time: 8.76 2 	time per epoch: 4.02	total time: 12.78 3 	time per epoch: 3.91	total time: 16.69 4 	time per epoch: 3.85	total time: 20.55 5 	time per epoch: 3.88	total time: 24.43 6 	time per epoch: 4.71	total time: 29.15 7 	time per epoch: 4.13	total time: 33.29 8 	time per epoch: 4.42	total time: 37.71 9 	time per epoch: 4.65	total time: 42.37 10 	time per epoch: 4.70	total time: 47.07 11 	time per epoch: 4.23	total time: 51.31 12 	time per epoch: 5.06	total time: 56.37 13 	time per epoch: 4.49	total time: 60.87 14 	time per epoch: 4.15	total time: 65.02 15 	time per epoch: 4.06	total time: 69.08 16 	time per epoch: 4.08	total time: 73.16 17 	time per epoch: 3.78	total time: 76.95 18 	time per epoch: 3.90	total time: 80.91 19 	time per epoch: 4.34	total time: 85.26 20 	time per epoch: 4.39	total time: 89.66 21 	time per epoch: 4.01	total time: 93.67 22 	time per epoch: 3.70	total time: 97.38 23 	time per ep

	epoch: 185 	time per epoch: 3.78	total time: 821.64 186 	time per epoch: 4.47	total time: 826.11 187 	time per epoch: 5.96	total time: 832.08 188 	time per epoch: 4.76	total time: 836.85 189 	time per epoch: 4.42	total time: 841.28 190 	time per epoch: 4.37	total time: 845.65 191 	time per epoch: 4.07	total time: 849.72 192 	time per epoch: 4.02	total time: 853.74 193 	time per epoch: 4.03	total time: 857.78 194 	time per epoch: 4.04	total time: 861.83 195 	time per epoch: 3.74	total time: 865.57 196 	time per epoch: 4.12	total time: 869.70 197 	time per epoch: 5.62	total time: 875.35 198 	time per epoch: 6.24	total time: 881.59 199 	time per epoch: 5.74	total time: 887.34 200 	time per epoch: 4.87	total time: 892.22 201 	time per epoch: 4.77	total time: 897.00 202 	time per epoch: 4.76	total time: 901.77 203 	time per epoch: 4.33	total time: 906.11 204 	time per epoch: 4.46	total time: 910.58 205 	time per epoch: 4.40	total time: 914.98 206 	time per epoch: 4.26	total time: 919.25 20

	Last epoch:	time per epoch: 4.27	total time: 1651.09 365 	time per epoch: 3.96	total time: 1655.06 366 	time per epoch: 3.96	total time: 1659.03 367 	time per epoch: 3.91	total time: 1662.94 368 	time per epoch: 3.93	total time: 1666.88 369 	time per epoch: 3.99	total time: 1670.88 370 	time per epoch: 5.41	total time: 1676.29 371 	time per epoch: 4.11	total time: 1680.41 372 	time per epoch: 3.86	total time: 1684.28 373 	time per epoch: 3.72	total time: 1688.00 374 	time per epoch: 4.60	total time: 1692.61 375 	time per epoch: 5.32	total time: 1697.97 376 	time per epoch: 3.98	total time: 1701.96 377 	time per epoch: 3.85	total time: 1705.82 378 	time per epoch: 3.90	total time: 1709.73 379 	time per epoch: 4.01	total time: 1713.75 380 	time per epoch: 4.05	total time: 1717.80 381 	time per epoch: 4.28	total time: 1722.09 382 	time per epoch: 4.09	total time: 1726.18 383 	time per epoch: 3.93	total time: 1730.11 384 	time per epoch: 4.18	total time: 1734.29 385 	time per epoch: 4.06	

In [17]:
word = codec.encode_onehot("acabar")
print(word.shape)

(650,)


In [18]:
x_hat, x_hat_p = rbm_.sample_visible_from_visible(word, n_gibbs=2000)
print(x_hat.shape, x_hat_p.shape)

(650,) (650,)


In [19]:
fW = codec.decode(x_hat)
print(fW)
# for i in range(10):
#     x_hat1,x_hat_p1 = rbm_.sample_visible_from_visible(x_hat, n_gibbs=2000)
#     print(codec.decode(x_hat1))

reca?ía$$$
