# MANIERODESYLABIZATOR

### Importy

In [13]:
import time
import random
import matplotlib.pyplot as plt
import numpy as np
import tensorflow as tf
from keras.models import Sequential
from keras.layers import LSTM, Dense
from keras.models import load_model

### Funkcje Pomocnicze

In [14]:
slownik = ['a','ą','b','c','ć','d','e','ę','f','g','h','i','j','k','l','ł','m','n','ń','o','ó','p','q','r','s','ś','t','u','x','v','w','y','z','ź','ż']
char_to_int = dict((c, i) for i, c in enumerate(slownik))
max_length = 32

In [15]:
def resizer(X, length):
    if len(X) < length:
        X = X + [0]*(length-len(X))
    else:
        X = X[:length]
    return X

def word_to_indices(word, length):
    indices = [slownik.index(c)+1 for c in word]
    indices = resizer(indices, length)
    return indices

def locate_syllables(locations, length):
    s = [0]*length
    for i in locations:
        s[i] = 1
    return s

### Dane treningowe

In [16]:
# data_file = open("training_set_dummy.txt", "r", encoding="utf-8")
data_file = open("training_set_expert.txt", "r", encoding="utf-8")
data = data_file.read().split("\n")
# remove empty lines
data = [d for d in data if ' ' in d]

words = [word_to_indices(line.split()[0], max_length) for line in data]
X_train = np.zeros((len(words), max_length, len(slownik)))
for i, word in enumerate(words):
    for j, char in enumerate(word):
        if char != 0:
            X_train[i, j, char-1] = 1
# adjust X_train to be 2D for simple dense
X_train = X_train.reshape((X_train.shape[0], X_train.shape[1]*X_train.shape[2]))

y_train = [list(map(int, d.split()[1:])) for d in data]
y_train = np.array(y_train)
# print(X_train.shape)
# print(y_train.shape)

### Model

In [17]:
def create_model(max_length, learning_rate):
    optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate)
    model = tf.keras.Sequential([
        tf.keras.layers.Dense(256, activation='sigmoid', input_shape=(max_length*len(slownik),)),
        tf.keras.layers.Dense(128, activation='sigmoid'),
        tf.keras.layers.Dense(128, activation='sigmoid'),
        tf.keras.layers.Dense(128, activation='sigmoid'),
        tf.keras.layers.Dense(max_length, activation='sigmoid')
    ])
    model.compile(optimizer=optimizer, loss='binary_crossentropy', metrics=['accuracy'])
    return model

### Trening

In [18]:
train = input("Train model? (y/n): ")
if train == 'y':
    # create the model
    model = load_model('model_desylabizator.h5')
    # model = create_model(max_length, learning_rate=0.0005)
    # train the model
    history = model.fit(X_train, y_train, epochs=50)
    # save the model
    model.save('model_desylabizator.h5')
    #  plot loss and accuracy
    plt.subplot(211)
    plt.title('Loss')
    plt.plot(history.history['loss'], label='train')
    plt.subplot(212)
    plt.title('Accuracy')
    plt.plot(history.history['accuracy'], label='train')
    plt.legend()
    plt.show()

### Testy

In [19]:
from keras.models import load_model
model = load_model('model_desylabizator.h5')

In [23]:
def test_model(model, input_wrod, max_len):
    X_new = np.zeros((1, max_len, len(slownik)))
    for j, char in enumerate(input_wrod):
        if j == max_len:
            break
        X_new[0, j, char_to_int[char]] = 1
    X_new = X_new.reshape((X_new.shape[0], X_new.shape[1]*X_new.shape[2]))
    
    y_pred = model.predict(X_new, verbose=0)[0]
    y_pred = [int(round(x)) for x in y_pred]
    
    output = []
    for i in range(len(input_wrod)):
        if y_pred[i] == 1:
            output.append('-')
        output.append(input_wrod[i])
    return "".join(output)[1:]

In [27]:
while True:
    word = input("Podaj słowo (q - wyjście)")
    if word == "q":
        break
    desylabizowane = test_model(model, word, max_length)
    print(word, "\t=>", desylabizowane)
    # print(word)

mechanika 	=> me-cha-ni-ka
resuscytacja 	=> re-su-scy-ta-cja
kaloryfer 	=> ka-lo-ry-fer
kwantyzacja 	=> kwan-ty-za-cja
miriady 	=> mir-ia-dy
