# Imports

In [None]:
# Append location to path to allow custom modules to be used.
import sys, os
sys.path.append(os.path.abspath(os.path.join("..", "..")))

In [None]:
import cgael
from cgael.models.extras.LanguageDiscriminator import *
from cgael.metrics import brevity

import tensorflow as tf
import tensorflow.keras as keras
import tensorflow.keras.layers as layer

import pygad
import pygad.kerasga

import numpy as np
import pandas as pd

from PIL import Image

from matplotlib import pyplot as plt

# Set Up

In [None]:
root_folder = os.path.join("C:",os.sep,"Users","nicho","PyProjects","CGAEL_Results")
experiment_folder = "discriminator"
experiment_prefix = "eng_L10"

In [None]:
os.makedirs(os.path.join(root_folder, experiment_folder), exist_ok=True)

In [None]:
ID_FORMAT = "{experiment_prefix}_{i}"

i = 1
while os.path.exists(os.path.join(root_folder, experiment_folder, f"{ID_FORMAT.format(experiment_prefix=experiment_prefix, i=i)}.npy")):
    i += 1
    
experiment_id = ID_FORMAT.format(experiment_prefix=experiment_prefix, i=i)
filename = f"{experiment_id}.npy"
filepath = os.path.join(root_folder, experiment_folder, filename)
print(filepath)

# Training

In [None]:
ts = cgael.LanguageTokenSet("CHAT", '-')

In [None]:
encode_length = 10

In [None]:
english_words = set([
    "A", 
    "AT", "HA", 
    "ACT", "CAT", "HAT", 
    "CHAT", "TACT", "THAT", 
    "CATCH", "HATCH"
])

In [None]:
gen = LanguageDiscriminatorGenerator(
    tokens=ts, 
    real_words=english_words, 
    encode_length=encode_length, 
    batch_size=16, 
    batch_count=100
)

In [None]:
discrim = LanguageDiscriminatorModel(word_length=encode_length)
history = discrim.train(gen, epochs=100)

In [None]:
np.save(filepath, np.array(discrim.model.get_weights(), dtype="object"))

# Evaluation

In [None]:
plt.plot(history.history["loss"])
plt.title(f"{experiment_id} Loss")
plt.xlabel("Generation")
plt.ylabel("Loss")
plt.savefig(os.path.join(root_folder, experiment_folder, f"{experiment_id}_loss.png"))
plt.show()

In [None]:
plt.plot(history.history["accuracy"])
plt.title(f"{experiment_id} Accuracy")
plt.xlabel("Generation")
plt.ylabel("Accuracy")
plt.savefig(os.path.join(root_folder, experiment_folder, f"{experiment_id}_accuracy.png"))
plt.show()

In [None]:
KEY_WORD = "word"
KEY_SCORE = "score"

def eval(words):
    d = {x:[] for x in [KEY_WORD, KEY_SCORE]}
    for w in words:
        d[KEY_WORD].append(w)
        s = discrim.model(ts.encode(w, (1,encode_length))).numpy().item()
        d[KEY_SCORE].append(s)
    df = pd.DataFrame(data=d)
    display(df)

In [None]:
eval(english_words)

In [None]:
eval([gen.gibberish() for _ in range(10)])