# Training a language model with fastai
- train a language model with curated dataset IMDB using pre-trained model AWD_LSTM


In [1]:
#hide
!pip install -Uqq fastbook
import fastbook
fastbook.setup_book()

In [2]:
#hide
from fastbook import *
from fastai.text.all import *

In [3]:
modifier = 'mar3'

# Training a language model
- take a pretrained model and train it some more using the IMDB dataset

In [4]:
%%time
# create path object
path = untar_data(URLs.IMDB)
path.ls()

CPU times: user 4.77 ms, sys: 159 µs, total: 4.93 ms
Wall time: 23.1 ms


(#7) [Path('/storage/data/imdb/README'),Path('/storage/data/imdb/tmp_lm'),Path('/storage/data/imdb/imdb.vocab'),Path('/storage/data/imdb/tmp_clas'),Path('/storage/data/imdb/test'),Path('/storage/data/imdb/train'),Path('/storage/data/imdb/unsup')]

In [5]:
%%time
# create TextDataLoaders object
get_imdb = partial(get_text_files, folders=['train', 'test', 'unsup'])
dls = TextDataLoaders.from_folder(path, valid = 'test', is_lm=True, bs=16)

CPU times: user 5.81 s, sys: 6.73 s, total: 12.5 s
Wall time: 55 s


In [6]:
dls.show_batch(max_n=2)

Unnamed: 0,text,text_
0,"xxbos 50 years old , this musical comedy fantasy might look its age , but it wears it with dignity . \n\n xxmaj this film is still great fun . xxmaj crosby was never really romantic lead material , but he delivers the material with the lightly humorous edge it needs . xxmaj bendix plays broad and is huge fun in a part which calls upon his strengths . xxmaj hardwicke -","50 years old , this musical comedy fantasy might look its age , but it wears it with dignity . \n\n xxmaj this film is still great fun . xxmaj crosby was never really romantic lead material , but he delivers the material with the lightly humorous edge it needs . xxmaj bendix plays broad and is huge fun in a part which calls upon his strengths . xxmaj hardwicke - how"
1,"something despite stating that he wants to escape so he can see his daughter again , and xxmaj kate becomes emotionally tough seconds after going to pieces over someone that ripped her off for a xxunk . xxmaj yeah . \n\n xxmaj after starting out as a "" this could happen to anyone "" movie , it quickly falls apart as it introduces ideas that make it more and more unrealistic .","despite stating that he wants to escape so he can see his daughter again , and xxmaj kate becomes emotionally tough seconds after going to pieces over someone that ripped her off for a xxunk . xxmaj yeah . \n\n xxmaj after starting out as a "" this could happen to anyone "" movie , it quickly falls apart as it introduces ideas that make it more and more unrealistic . a"


In [7]:
%%time
# define and train model
learn = language_model_learner(dls,AWD_LSTM,metrics=accuracy).to_fp16()
learn.fine_tune(1, 1e-2)

epoch,train_loss,valid_loss,accuracy,time
0,4.74151,4.345975,0.266929,34:07


epoch,train_loss,valid_loss,accuracy,time
0,4.238236,4.04406,0.29634,35:18


CPU times: user 50min 5s, sys: 12min 56s, total: 1h 3min 2s
Wall time: 1h 9min 32s


In [9]:
# get prediction
# preds = learn.predict('The star is', n_words=20)
learn.predict("what comes next", n_words=20)

'what comes next is a same repugnantly western trash animation , badly written and watched TV comedy , largely written by'

In [10]:
learn.export('/notebooks/temp/models/lm_model_'+modifier)

In [11]:
keep_path = learn.path

In [12]:
# workaround to make path writeable
learn.path = Path('/notebooks/temp')

In [13]:
learn.path

Path('/notebooks/temp')

In [14]:
learn.model_dir

'models'

In [15]:
learn.save('lm_'+modifier)

Path('/notebooks/temp/models/lm_mar3.pth')

In [16]:
# workaround to save encoder - need to do this to later load encoder for classifier
learn.save_encoder('ft_'+modifier)

In [17]:
learn.path = keep_path