In [None]:
import numpy as np
import random
import os
import json
from google.colab import drive
import keras
from keras import layers

In [None]:
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
with open("/content/drive/MyDrive/rapbot_text_all.txt", "r", encoding="utf-8") as f:
    text_All = f.read()

meta = json.load(open("/content/drive/MyDrive/rapbot_char_meta.json", "r"))
chars = meta["chars"]
char_indices = meta["char_indices"]
indices_char = {int(k): v for k, v in meta["indices_char"].items()}

print("Corpus size:", len(text_All))
print("Unikalūs simboliai:", len(chars))

Corpus size: 1378646
Unikalūs simboliai: 40


In [None]:
maxlen = 60
step = 2
max_corpus_window = 150000

def get_x_y():
    """Sukuria (X, Y) mokymo porą iš atsitiktinio teksto lango."""

    if len(text_All) <= max_corpus_window:
        text_chunk = text_All
    else:
        start_pos = random.randint(0, len(text_All) - max_corpus_window)
        end_pos = start_pos + max_corpus_window
        text_chunk = text_All[start_pos:end_pos]

    sentences = []
    next_chars = []

    for i in range(0, len(text_chunk) - maxlen, step):
        sentences.append(text_chunk[i : i + maxlen])
        next_chars.append(text_chunk[i + maxlen])

    x = np.zeros((len(sentences), maxlen, len(chars)), dtype="bool")
    y = np.zeros((len(sentences), len(chars)), dtype="bool")

    for i, sentence in enumerate(sentences):
        for t, char in enumerate(sentence):
            x[i, t, char_indices[char]] = 1
        y[i, char_indices[next_chars[i]]] = 1

    return x, y

In [None]:
model = keras.Sequential([
    keras.Input(shape=(maxlen, len(chars))),
    layers.LSTM(128, dropout=0.2, recurrent_dropout=0.2),
    layers.Dense(len(chars), activation="softmax")
])

optimizer = keras.optimizers.RMSprop(learning_rate=0.003, clipnorm=1.0)
model.compile(loss="categorical_crossentropy", optimizer=optimizer)

model.summary()

In [None]:
epochs = 60
batch_size = 128

model_path = "/content/drive/MyDrive/rap_lyrics_lstm_v3.keras"

print("\n--- Pradedamas treniravimas ---")

for epoch in range(epochs):
    print(f"\nEpocha {epoch+1}/{epochs}")

    x, y = get_x_y()   # atsitiktinis 500k simbolių gabalas
    model.fit(x, y, batch_size=batch_size, epochs=1)

    # Išsaugome modelį Drive
    model.save(model_path)
    print("Modelis išsaugotas:", model_path)



--- Pradedamas treniravimas ---

Epocha 1/60
[1m586/586[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m98s[0m 165ms/step - loss: 1.7570
Modelis išsaugotas: /content/drive/MyDrive/rap_lyrics_lstm_v3.keras

Epocha 2/60
[1m586/586[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m98s[0m 168ms/step - loss: 1.7514
Modelis išsaugotas: /content/drive/MyDrive/rap_lyrics_lstm_v3.keras

Epocha 3/60
[1m586/586[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m97s[0m 166ms/step - loss: 1.7872
Modelis išsaugotas: /content/drive/MyDrive/rap_lyrics_lstm_v3.keras

Epocha 4/60
[1m586/586[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m96s[0m 163ms/step - loss: 1.6763
Modelis išsaugotas: /content/drive/MyDrive/rap_lyrics_lstm_v3.keras

Epocha 5/60
[1m586/586[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m97s[0m 166ms/step - loss: 1.7210
Modelis išsaugotas: /content/drive/MyDrive/rap_lyrics_lstm_v3.keras

Epocha 6/60
[1m586/586[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m96s[0m 163ms/step - loss