# Set ups

In [23]:
from fastai.text.all import Path, get_text_files,DataBlock,language_model_learner, \
                            partial,TextBlock,RandomSplitter,AWD_LSTM,accuracy, perplexity, valley, slide
import pickle
path = Path("/notebooks/fa") # Directory of documents
path.mkdir(parents=True, exist_ok=True)

# Dataloaders

In [25]:
def txt(path, size=3000):
    # Create Sample dataset
    i = get_text_files(path, folders='docs')
    a = list(np.random.randint(0,len(i),size))
    return i[a]

text_file = partial(get_text_files, folders= 'docs')

In [26]:
def loaders(path,txt_path, vocab=None, bs=64, seq_len=80, 
            lm=True, split_pr=0.1, back=False):
            # Define dataloaders
    
    result = DataBlock(TextBlock.from_folder(path,is_lm=lm, seq_len=seq_len, backwards=back,vocab=vocab), get_items= txt_path,
                    splitter=RandomSplitter(split_pr,seed=123)).dataloaders(path,path,bs=64)
    return result

In [27]:
dls = loaders(path, text_file, back=True) # Tokenize text from right to left.

# Modeling

In [28]:
def model(dls, d = 0.1, unfreez=True, pretrain_path=None, pre_t = False,
          lr =False):
          # Define hyperparametes of AWD_LSTM model
    learn = language_model_learner( dls, AWD_LSTM, 
                                  drop_mult=d, 
                                  pretrained=pre_t,
                                 metrics=[accuracy, perplexity]).to_fp16()
    if pretrain_path is not None: learn.load(pretrain_path)
    if unfreez: learn.unfreeze()
    if lr: learn.lr_find(suggest_funcs=(valley, slide))
    return learn

#learner = model(dls,d=0.3)
learner = model(dls,pretrain_path="/notebooks/model/fa_b_func25",d=0.3)

In [None]:
# train language model with one cycle method.
learner.fit_one_cycle(1, 15e-4, moms=(0.8,0.7,0.8))

epoch,train_loss,valid_loss,accuracy,perplexity,time


# Saving

In [None]:
def save_ml(name,learn = learner ,dls=dls, 
            path = "/notebooks/model", 
            v = True, with_opt=True):
    """
    Save model and it's vocabulary.
    Also save the optimization, status of model.
    """
  
    pth = Path(path)
    pth.mkdir(exist_ok=True, parents=True)
    learner.to_fp32().save(pth/f"{name}")
    if with_opt: learner.to_fp32().save(pth/f"{name}_w", with_opt=False)
    if v: 
        with open(pth/f"vocab_{name}", "wb") as f: pickle.dump(dls.vocab, f)


save_ml("fa_b_func26")