In [0]:
from google.colab import drive
drive.mount('/content/gdrive')

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).


In [0]:
import pandas as pd
from pickle import dump
import numpy as np

In [0]:
articles = pd.read_csv('/content/gdrive/My Drive/Notebook_train/all_data.csv', delimiter = ',')

text = articles['Title'].str.cat(sep='\n')
text_size = len(text)

chars = sorted(list(set(text)))
mapping = dict((c,i) for i,c in enumerate(chars))
dump(mapping, open('/content/gdrive/My Drive/Notebook_train/mapping1.pkl', 'wb'))

vocab_size = len(mapping)
print("Vocab size: ", vocab_size)

encoded_text = [mapping[char] for char in text]
encode_size = len(encoded_text)
print('Code size:', encode_size)

seqlen = 10
batchsize = 1024
batchnum = int((encode_size - seqlen) / batchsize)

Vocab size:  158
Code size: 9641158


In [0]:
from keras.utils import to_categorical

def myGenerator():
    while 1:
        for i in range(batchnum):
            X_batch = []
            y_batch = []

            for j in range(batchsize):
                X_batch.append(encoded_text[i*batchsize+j:i*batchsize+j+seqlen])
                y_batch.append(encoded_text[i*batchsize+j+seqlen:i*batchsize+j+seqlen+1])

            X_batch = np.array([to_categorical(x, num_classes=vocab_size) for x in X_batch])
            y_batch = np.array(to_categorical(y_batch, num_classes=vocab_size))

            yield (X_batch, y_batch)


Using TensorFlow backend.


NameError: ignored

In [0]:
from keras.models import Sequential
from keras.layers import Input, Dense, LSTM, SimpleRNN
from keras.models import Model
from keras.callbacks import ModelCheckpoint

In [0]:
model = load_model(r"/content/gdrive/My Drive/Notebook_train/Checkpoints_models/checkpt--08.hdf5")

In [0]:
#model = Sequential()
#model.add(LSTM(300, return_sequences=True, input_shape=(seqlen, vocab_size)))
#model.add(LSTM(150, return_sequences=True))
#model.add(LSTM(75))
#model.add(Dense(vocab_size, activation='softmax'))
#print(model.summary())

my_generator = myGenerator()
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
checkpoint_path = r"/content/gdrive/My Drive/Notebook_train/Checkpoints_models/3rd_checkpt--{epoch:02d}.hdf5"
checkpoint = ModelCheckpoint(checkpoint_path, monitor='val_loss', verbose=1, save_best_only=False, save_weights_only=False, mode='auto', period=4)
callbacks_list = [checkpoint]

history = model.fit_generator(my_generator, steps_per_epoch = batchnum, callbacks = callbacks_list, epochs = 12, verbose=1)
full_model_path = F"/content/gdrive/My Drive/Notebook_train/full_model_3lay.h5"
model.save(full_model_path)


Epoch 1/12
Epoch 2/12
Epoch 3/12
Epoch 4/12

Epoch 00004: saving model to /content/gdrive/My Drive/Notebook_train/Checkpoints_models/3rd_checkpt--04.hdf5
Epoch 5/12
Epoch 6/12
Epoch 7/12
Epoch 8/12

Epoch 00008: saving model to /content/gdrive/My Drive/Notebook_train/Checkpoints_models/3rd_checkpt--08.hdf5
Epoch 9/12
Epoch 10/12
Epoch 11/12
Epoch 12/12

Epoch 00012: saving model to /content/gdrive/My Drive/Notebook_train/Checkpoints_models/3rd_checkpt--12.hdf5


In [0]:
from pickle import load
from keras.models import load_model
from keras.utils import to_categorical
from keras.preprocessing.sequence import pad_sequences
import random

In [0]:
def generate_seq(model, mapping, seq_length, seed_text, n_chars):
    in_text = seed_text
    print("Generating text now . . .")
    for _ in range(n_chars):
        encoded = [mapping[char2] for char2 in in_text]
        encoded = pad_sequences([encoded], maxlen=seq_length, truncating='pre')
        encoded = to_categorical(encoded, num_classes=len(mapping))
        probs = model.predict_proba(encoded)
        yhat = random.choices(range(0,vocab_size), weights=probs[0], k=1)[0]
        out_char = ''
        for char, index in mapping.items():
            if index == yhat:
                out_char = char
                break
        in_text += out_char
        if char =="\n":
            break
    return in_text

In [0]:
trained_model = load_model(r"/content/gdrive/My Drive/Notebook_train/full_model_3lay.h5")
#trained_model = load_model(r"/content/gdrive/My Drive/Notebook_train/Checkpoints_models/checkpt--08.hdf5")
mapping = load(open('/content/gdrive/My Drive/Notebook_train/mapping1.pkl', 'rb'))

In [0]:
for i in range(100):
  print(generate_seq(trained_model, mapping, seqlen, 'Donald Tusk', random.randint(200, 400)))

Generating text now . . .
Donald Tusk - wzroślił na mieście: więcej Woli juniorską koszykówki

Generating text now . . .
Donald Tusk - poruszał zamknąć Spójnię "komponauw. Pierwszy raz 255 zł?

Generating text now . . .
Donald Tusk wicetarg Peszko

Generating text now . . .
Donald Tusk ochronienie

Generating text now . . .
Donald Tusk na zwy zapłacisz dzieci przed zima: Co za inwestycji wie, wygrana Imprezy

Generating text now . . .
Donald Tusk na Motławie

Generating text now . . .
Donald Tusk skończyła tunele stracą galaktyką

Generating text now . . .
Donald Tusk na wandalizmu. Tyson przy ul. Zobacz fotele, kadłowie z Gdańska wysok na lodowę z las ORE

Generating text now . . .
Donald Tusk Polska i Gdańsk i Zagłębia na lotnisku. Enabek z Arką Ślonowe niedopingu

Generating text now . . .
Donald Tusk w I lidze

Generating text now . . .
Donald Tusk był tydzień

Generating text now . . .
Donald Tusk i Ciebie będą robić? Siedem atrakcji na Starym Prasą na Wyspach Stoczniowca recenzji