# Training a text classifier model with fastai
- this notebook assumes you have already run text_model_training.ipynb notebook
- In this notebook, the IMDB dataset is ingested
- the first section

In [1]:
#hide
!pip install -Uqq fastbook
import fastbook
fastbook.setup_book()

In [2]:
#hide
from fastbook import *
from fastai.text.all import *

In [3]:
# switch to control whether direct TDL or DataBlocks definition used 
tdl = True

In [4]:
modifier = 'mar3'

# Ingest the dataset
- define the path for the dataset
- create a TextDataLoaders object

In [5]:
%%time
# create dataloaders object
path = untar_data(URLs.IMDB)
path.ls()

CPU times: user 4.55 ms, sys: 65 µs, total: 4.61 ms
Wall time: 124 ms


(#7) [Path('/storage/data/imdb/README'),Path('/storage/data/imdb/tmp_lm'),Path('/storage/data/imdb/imdb.vocab'),Path('/storage/data/imdb/tmp_clas'),Path('/storage/data/imdb/test'),Path('/storage/data/imdb/train'),Path('/storage/data/imdb/unsup')]

# Define the text classifier

In [6]:
# define TextDataLoaders object
# dls_clas = TextDataLoaders.from_folder(untar_data(URLs.IMDB), valid='test')
dls_clas = TextDataLoaders.from_folder(path, valid='test')


In [7]:
# directory structure of the IMDB curated dataset
'''
├── test
│   ├── neg
│   └── pos
├── tmp_clas
├── tmp_lm
├── train
│   ├── neg
│   └── pos
└── unsup
'''

'\n├── test\n│   ├── neg\n│   └── pos\n├── tmp_clas\n├── tmp_lm\n├── train\n│   ├── neg\n│   └── pos\n└── unsup\n'

In [8]:
dls_clas.path

Path('/storage/data/imdb')

In [9]:
# save the current path
keep_path = path
print("keep_path is: ",str(keep_path))

keep_path is:  /storage/data/imdb


In [10]:
%%time
# define a text_classifier_learner object
learn_clas = text_classifier_learner(dls_clas, AWD_LSTM, 
                                metrics=accuracy).to_fp16()

CPU times: user 7.07 s, sys: 1.45 s, total: 8.52 s
Wall time: 11.1 s


# Fine-tune the text classifier
Use the encoder created as part of training the language model to fine tune the text classifier

In [11]:
# Path('/storage/data/imdb')
learn_clas.path

Path('/storage/data/imdb')

In [12]:
%%time
# set the path to the location of the encoder
learn_clas.path = Path('/notebooks/temp')

CPU times: user 251 µs, sys: 60 µs, total: 311 µs
Wall time: 50.3 µs


In [13]:
# load the encoder that was saved when the language model was trained
learn_clas = learn_clas.load_encoder('ft_'+modifier)

In [14]:
learn_clas.path

Path('/notebooks/temp')

In [15]:
# set the path back to the original path
learn_clas.path = keep_path

In [16]:
# ch 10 style Path('/storage/data/imdb')
learn_clas.path

Path('/storage/data/imdb')

In [17]:
%%time
# fine tune the model
learn_clas.fit_one_cycle(5, 2e-2)

epoch,train_loss,valid_loss,accuracy,time
0,0.432951,0.299519,0.8742,03:46
1,0.411521,0.291931,0.8786,03:29
2,0.395382,0.275534,0.88712,03:28
3,0.409314,0.263998,0.89004,03:28
4,0.379574,0.264123,0.89108,03:28


CPU times: user 12min 29s, sys: 4min 46s, total: 17min 15s
Wall time: 17min 41s


In [18]:
x, y = first(dls_clas.train)
x.shape, y.shape, len(dls_clas.train)

(torch.Size([64, 3345]), torch.Size([64]), 390)

In [19]:
dls_clas.show_batch()

Unnamed: 0,text,category
0,"xxbos xxmaj match 1 : xxmaj tag xxmaj team xxmaj table xxmaj match xxmaj bubba xxmaj ray and xxmaj spike xxmaj dudley vs xxmaj eddie xxmaj guerrero and xxmaj chris xxmaj benoit xxmaj bubba xxmaj ray and xxmaj spike xxmaj dudley started things off with a xxmaj tag xxmaj team xxmaj table xxmaj match against xxmaj eddie xxmaj guerrero and xxmaj chris xxmaj benoit . xxmaj according to the rules of the match , both opponents have to go through tables in order to get the win . xxmaj benoit and xxmaj guerrero heated up early on by taking turns hammering first xxmaj spike and then xxmaj bubba xxmaj ray . a xxmaj german xxunk by xxmaj benoit to xxmaj bubba took the wind out of the xxmaj dudley brother . xxmaj spike tried to help his brother , but the referee restrained him while xxmaj benoit and xxmaj guerrero",pos
1,"xxbos xxmaj director xxmaj douglas xxmaj sirk once said ` there 's a very short distance between high art and trash , and trash that contains craziness is by this very quality nearer to art ' . xxmaj this statement defines his cinema perfectly , a very unique body of work that includes classic stage adaptations , adventure and war films , westerns and of course , his famous melodramas . \n\n xxmaj sirk 's melodramas were , as the very word signifies , dramas with music . xxmaj the music sets the tone for his masterful style , and every stroke of his brush ( sirk was also a painter ) leaves a powerful image on the screen - turned - canvas . xxmaj but this ai n't life but its representation , an imitation of life . xxmaj sirk never tried to show reality , on the contrary",pos
2,"xxbos xxmaj in xxup nyc , seaman xxmaj michael o'hara ( orson xxmaj welles ) rescues xxmaj elsa xxmaj bannister ( rita xxmaj hayworth ) from a mugging & rape as she takes a horse & carriage through xxmaj central xxmaj park -and lives to regret it . xxmaj xxunk - haired xxmaj hayworth 's a platinum blonde in this one ; as dazzling as fresh - fallen snow -but nowhere near as pure … \n\n xxmaj to reveal any more of the convoluted plot in this seminal "" noir "" would be criminal . xxmaj it 's as deceptive as the mirrors used to cataclysmic effect in the final scenes -but the film holds far darker secrets : xxmaj from the xxup ny xxmaj times : "" childhood xxmaj shadows : xxmaj the xxmaj hidden xxmaj story xxmaj of xxmaj the xxmaj black xxmaj dahlia xxmaj murder "" by",pos
3,"xxbos xxmaj it is not as great a film as many people believe ( including my late aunt , who said it was her favorite movie ) . xxmaj but due to the better sections of this film noir , particularly that justifiably famous "" fun house "" finale , xxup the xxup lady xxup from xxup shanghai has gained a position of importance beyond it 's actual worth as a key to the saga of xxmaj orson xxmaj welles ' failure to conquer xxmaj hollywood . \n\n xxmaj by 1946 xxmaj welles ' position as a xxmaj hollywood figure was mixed . xxup citizen xxup kane was not recognized as the great movie it has since been seen as due to the way it was attacked by the xxmaj hearst press and by xxmaj hollywood insiders themselves . xxmaj welles ' attempt at total control ( direction and production",pos
4,"xxbos xxmaj how strange the human mind is ; this center of activity wherein perceptions of reality are formed and stored , and in which one 's view of the world hinges on the finely tuned functioning of the brain , this most delicate and intricate processor of all things sensory . xxmaj and how much do we really know of it 's inner - workings , of it 's depth or capacity ? xxmaj what is it in the mind that allows us to discern between reality and a dream ? xxmaj or can we ? xxmaj perhaps our sense of reality is no more than an impression of what we actually see , like looking at a painting by xxmaj monet , in which the vanilla sky of his vision becomes our reality . xxmaj it 's a concept visited by filmmaker xxmaj cameron xxmaj crowe in his",pos
5,"xxbos xxmaj well , i finally saw it . i did n't go when it first came out because , well , frankly , i was afraid . xxmaj afraid of how bad it might be , or how disappointing . xxmaj while not as bad as xxmaj menace , and better than xxmaj clones , it was n't particularly memorable , or satisfying . \n\n i was 11 years old when i saw xxmaj star xxmaj wars . i still remember sitting in the theater . xxmaj from the opening crawl to the final credits it was a movie experience xxmaj i 'll never forget . a timeless story of the bored farm - boy who just knows he was meant for more , saving the princess and the xxmaj galaxy from the evil menace while being mentored by the wise wizard , the rogue pirate and the various",neg
6,"xxbos * xxmaj some spoilers * \n\n xxmaj this movie is sometimes subtitled "" life xxmaj everlasting . "" xxmaj that 's often taken as reference to the final scene , but more accurately describes how dead and buried this once - estimable series is after this sloppy and illogical send - off . \n\n xxmaj there 's a "" hey kids , let 's put on a show air "" about this telemovie , which can be endearing in spots . xxmaj some fans will feel like insiders as they enjoy picking out all the various cameo appearances . xxmaj co - writer , co - producer xxmaj tom xxmaj fontana and his pals pack the goings - on with friends and favorites from other shows , as well as real xxmaj baltimore personages . \n\n xxmaj that 's on top of the returns of virtually all the members",neg
7,"xxbos i figured that it 's about time i let this one out . xxmaj pokémon fans are suffering in xxmaj america these days . xxmaj why ? xxmaj because we rely on xxmaj kids xxup wb and 4kids xxmaj entertainment to provide us with our beloved series and movies . xxmaj as far as the series goes , they do a pretty good job in bringing the fun and magic of the xxmaj japanese versions to television . xxmaj so what is their problem when it comes to the movies ? xxmaj honestly now , i have seen all three xxmaj pokémon movies in xxmaj japanese and i will definitely be seeing the fourth one . xxmaj they are excellent movies . xxmaj they are all enjoyable and fun to watch . xxmaj and , after seeing xxmaj pokémon 2 xxrep 3 0 in theaters , i ca n't",neg
8,"xxbos xxmaj the movie "" macarthur "" begins and ends at xxmaj gen . xxmaj douglas macarthur 's , xxmaj gregory xxmaj peck , xxmaj alma xxmaj mata the xxup us xxmaj military xxmaj academy of xxmaj west xxmaj point on the xxmaj hudson . xxmaj we see a frail 82 year old xxunk give the commencement speech to the graduating class of 1962 about what an honor it is to serve their country . xxmaj the film then goes into an almost two hour long flashback on xxmaj gen . macarthur 's brilliant as well as controversial career that starts in the darkest hours of xxup wwii on the besieged island of xxmaj corregidor in the xxmaj philippines in the early spring of 1942 . \n\n xxmaj told to leave he island for xxmaj australia before the xxmaj japanese military invade it xxmaj gen . macarthur for the very",pos


In [20]:
learn_clas.summary()

epoch,train_loss,valid_loss,accuracy,time
0,,,00:00,


SequentialRNN (Input shape: ['64 x 3345'])
Layer (type)         Output Shape         Param #    Trainable 
LSTM                 ['64 x 33 x 1152',   1,852,416  False     
________________________________________________________________
LSTM                 ['64 x 33 x 1152',   5,317,632  False     
________________________________________________________________
LSTM                 ['64 x 33 x 400', "  1,846,400  False     
________________________________________________________________
RNNDropout           64 x 33 x 400        0          False     
________________________________________________________________
RNNDropout           64 x 33 x 1152       0          False     
________________________________________________________________
RNNDropout           64 x 33 x 1152       0          False     
________________________________________________________________
BatchNorm1d          64 x 1200            2,400      True      
_______________________________________________________

# Exercise the text classifier
Apply the fine-tuned text classifier on some text samples.

In [21]:
preds = learn_clas.predict("this film shows incredibly bad writing and is a complete disaster")

In [22]:
preds

('neg', TensorText(0), TensorText([0.9987, 0.0013]))

In [26]:
preds = learn_clas.predict("this film shows incredible talent and is a complete triumph")

In [27]:
preds

('pos', TensorText(1), TensorText([2.8299e-05, 9.9997e-01]))

In [25]:
# save the classifier model
learn_clas.path = Path('/notebooks/temp')
learn_clas.save('classifier_single_epoch_'+modifier+'d')

Path('/notebooks/temp/models/classifier_single_epoch_mar3d.pth')