# Training a text classifier model with fastai
- this notebook assumes you have already run text_model_training.ipynb notebook
- In this notebook, the IMDB dataset is ingested
- the first section

In [101]:
#hide
!pip install -Uqq fastbook
import fastbook
fastbook.setup_book()

In [102]:
#hide
from fastbook import *
from fastai.text.all import *

In [103]:
# switch to control whether direct TDL or DataBlocks definition used 
tdl = True

In [104]:
modifier = 'mar3'

# Ingest the dataset
- define the path for the dataset
- create a TextDataLoaders object

In [105]:
%%time
# create dataloaders object
'''dls = TextDataLoaders.from_folder(untar_data(URLs.IMDB), valid='test', bs=16)
learn = text_classifier_learner(dls, AWD_LSTM, drop_mult=0.5, metrics=accuracy)'''
# LSTM have multiple dropout probabilities for different things. Once you set them, this drop_mult property scales all of them. So you can change all dropout probabilities simultaneously using this, keeping their relative size
path = untar_data(URLs.IMDB)
path.ls()

CPU times: user 3.73 ms, sys: 0 ns, total: 3.73 ms
Wall time: 3.61 ms


(#7) [Path('/storage/data/imdb/README'),Path('/storage/data/imdb/tmp_lm'),Path('/storage/data/imdb/imdb.vocab'),Path('/storage/data/imdb/tmp_clas'),Path('/storage/data/imdb/test'),Path('/storage/data/imdb/train'),Path('/storage/data/imdb/unsup')]

In [106]:
%%time
# dls = TextDataLoaders.from_folder(untar_data(URLs.IMDB), valid='test', bs=8)
# get_imdb = partial(get_text_files, folders=['train', 'test', 'unsup'])
dls = TextDataLoaders.from_folder(path, valid = 'test', is_lm=True, bs=16)

CPU times: user 3.64 s, sys: 99.1 ms, total: 3.74 s
Wall time: 3.76 s


In [107]:
dls.show_batch(max_n=4)

Unnamed: 0,text,text_
0,"xxbos xxmaj this has to be one of the worst movies of all time . xxmaj the graphics were horrendous , the acting was b - movie and the effects were just plain xxmaj nintendo 64 qualified . xxmaj you would think that they would put a little more effort into it . xxmaj of course , it is a xxmaj scifi channel movie so you have to expect it to be","xxmaj this has to be one of the worst movies of all time . xxmaj the graphics were horrendous , the acting was b - movie and the effects were just plain xxmaj nintendo 64 qualified . xxmaj you would think that they would put a little more effort into it . xxmaj of course , it is a xxmaj scifi channel movie so you have to expect it to be low"
1,"hanging from a speeding bus by an umbrella , a whole new "" wow ! "" factor is added to the action . xxbos here , let me wave my hands over the keyboard , i 'll tell you what salad she 's going to order . over and over , works like a charm : he 's such a genius , omg how does he do it ? my bullshit detector","from a speeding bus by an umbrella , a whole new "" wow ! "" factor is added to the action . xxbos here , let me wave my hands over the keyboard , i 'll tell you what salad she 's going to order . over and over , works like a charm : he 's such a genius , omg how does he do it ? my bullshit detector freaks"
2,convincingly as the tough survivor with an exterior of cool / intelligent / innocence . xxmaj and full marks to xxmaj tarantino for choosing an unknown actress for the role - much more believeable to have a new face creating the part . xxmaj i 'm looking forward to the next film . xxbos xxmaj first of all i saw this movie without knowing anything about it i just knew that xxmaj,as the tough survivor with an exterior of cool / intelligent / innocence . xxmaj and full marks to xxmaj tarantino for choosing an unknown actress for the role - much more believeable to have a new face creating the part . xxmaj i 'm looking forward to the next film . xxbos xxmaj first of all i saw this movie without knowing anything about it i just knew that xxmaj joel
3,round but hats off to xxmaj anne xxmaj reid and xxmaj our xxmaj friends in the xxmaj north 's xxmaj daniel xxmaj craig ( the latter coming across as the next xxmaj david xxmaj thewlis ) . \n\n xxmaj and director xxmaj roger xxmaj michell ? xxmaj this is as far from xxmaj notting xxmaj hill as it 's possible to be . xxmaj thank xxmaj god . \n\n xxmaj watch this,but hats off to xxmaj anne xxmaj reid and xxmaj our xxmaj friends in the xxmaj north 's xxmaj daniel xxmaj craig ( the latter coming across as the next xxmaj david xxmaj thewlis ) . \n\n xxmaj and director xxmaj roger xxmaj michell ? xxmaj this is as far from xxmaj notting xxmaj hill as it 's possible to be . xxmaj thank xxmaj god . \n\n xxmaj watch this movie


# Define classifier

In [108]:
# dls definition cribbed from chapter 10
# this works
'''
dls_clas = DataBlock(
    blocks=(TextBlock.from_folder(path, vocab=dls.vocab),CategoryBlock),
    get_y = parent_label,
    get_items=partial(get_text_files, folders=['train', 'test']),
    splitter=GrandparentSplitter(valid_name='test')
).dataloaders(path, path=path, bs=128, seq_len=72)

DataBlock - Generic container to quickly build Datasets and DataLoaders
To build a DataBlock you need to give the library four things: 
- the types of your input/labels, 
- and at least two functions: get_items and splitter. 
You may also need to include get_x and get_y or a more generic list of getters that are 
applied to the results of get_items.

DataBlock(blocks=None, dl_type=None, getters=None, n_inp=None, 
item_tfms=None, batch_tfms=None, get_items=None, splitter=None, get_y=None, get_x=None)

DataBlock.dataloaders(source, path='.', verbose=False, bs=64, shuffle=False, num_workers=None, 
do_setup=True, pin_memory=False, timeout=0, batch_size=None, drop_last=False, indexed=None, 
n=None, device=None, persistent_workers=False, wif=None, before_iter=None, after_item=None, 
before_batch=None, after_batch=None, after_iter=None, create_batches=None, 
create_item=None, create_batch=None, retain=None, get_idxs=None, sample=None, 
shuffle_fn=None, do_batch=None)

Once those are provided, you automatically get a Datasets or a DataLoaders:

CategoryBlock = TransformBlock for single-label categorical targets

The independent variable is often referred to as x, and the dependent variable is often referred to as y.
Here, we are telling fastai what function to call to create the labels in our dataset: 
get_y=parent_label parent_label is a function provided by fastai that simply gets the name of the 
folder a file is in. Because we put each of our bear images into folders based on the type of bear, 
this is going to give us the labels that we need.

Partial functions allow us to fix a certain number of arguments of a function and generate a new function

get_items: get_items is completely decoupled from get_x and get_y: it is there to return all your items from 
the source. You can pass get_x and get_y (or a list of getters) to explain how to get your x and y from the result of 
get_items and they both default to noop (which is why when get_items return filenames, we don’t pass a get_x)

GrandparentSplitter - Split items from the grand parent folder names (train_name and valid_name).

seq_len: The LMDataLoader will concatenate all texts (maybe shuffled) in one big stream, 
split it in bs contiguous sentences, then go through those seq_len at a time.

'''

"\ndls_clas = DataBlock(\n    blocks=(TextBlock.from_folder(path, vocab=dls.vocab),CategoryBlock),\n    get_y = parent_label,\n    get_items=partial(get_text_files, folders=['train', 'test']),\n    splitter=GrandparentSplitter(valid_name='test')\n).dataloaders(path, path=path, bs=128, seq_len=72)\n\nDataBlock - Generic container to quickly build Datasets and DataLoaders\nTo build a DataBlock you need to give the library four things: \n- the types of your input/labels, \n- and at least two functions: get_items and splitter. \nYou may also need to include get_x and get_y or a more generic list of getters that are \napplied to the results of get_items.\n\nDataBlock(blocks=None, dl_type=None, getters=None, n_inp=None, \nitem_tfms=None, batch_tfms=None, get_items=None, splitter=None, get_y=None, get_x=None)\n\nDataBlock.dataloaders(source, path='.', verbose=False, bs=64, shuffle=False, num_workers=None, \ndo_setup=True, pin_memory=False, timeout=0, batch_size=None, drop_last=False, indexed=

In [109]:
'''
# details on sequence length meaining

bs,sl = 4,3
ints = L([0,1,2,3,4],[5,6,7,8,9,10],[11,12,13,14,15,16,17,18],[19,20],[21,22,23],[24]).map(tensor)

dl = LMDataLoader(ints, bs=bs, seq_len=sl)
test_eq(list(dl),
    [[tensor([[0, 1, 2], [6, 7, 8], [12, 13, 14], [18, 19, 20]]),
      tensor([[1, 2, 3], [7, 8, 9], [13, 14, 15], [19, 20, 21]])],
     [tensor([[3, 4, 5], [ 9, 10, 11], [15, 16, 17], [21, 22, 23]]),
      tensor([[4, 5, 6], [10, 11, 12], [16, 17, 18], [22, 23, 24]])]])
'''

'\n# details on sequence length meaining\n\nbs,sl = 4,3\nints = L([0,1,2,3,4],[5,6,7,8,9,10],[11,12,13,14,15,16,17,18],[19,20],[21,22,23],[24]).map(tensor)\n\ndl = LMDataLoader(ints, bs=bs, seq_len=sl)\ntest_eq(list(dl),\n    [[tensor([[0, 1, 2], [6, 7, 8], [12, 13, 14], [18, 19, 20]]),\n      tensor([[1, 2, 3], [7, 8, 9], [13, 14, 15], [19, 20, 21]])],\n     [tensor([[3, 4, 5], [ 9, 10, 11], [15, 16, 17], [21, 22, 23]]),\n      tensor([[4, 5, 6], [10, 11, 12], [16, 17, 18], [22, 23, 24]])]])\n'

In [110]:
'''
DEFINITION OF DATABLOCK:

Generic container to quickly build Datasets and DataLoaders

To build a DataBlock you need to give the library four things: 
- the types of your input/labels, 
- and at least two functions: get_items and splitter. 
- You may also need to include get_x and get_y 
or a more generic list of getters that are applied to the results of get_items.

Once those are provided, you automatically get a Datasets or a DataLoaders

DataBlock(
    blocks=None,
    dl_type=None,
    getters=None,
    n_inp=None,
    item_tfms=None,
    batch_tfms=None,
    *,
    get_items=None,
    splitter=None,
    get_y=None,
    get_x=None,
)
'''

'\nDEFINITION OF DATABLOCK:\n\nGeneric container to quickly build Datasets and DataLoaders\n\nTo build a DataBlock you need to give the library four things: \n- the types of your input/labels, \n- and at least two functions: get_items and splitter. \n- You may also need to include get_x and get_y \nor a more generic list of getters that are applied to the results of get_items.\n\nOnce those are provided, you automatically get a Datasets or a DataLoaders\n\nDataBlock(\n    blocks=None,\n    dl_type=None,\n    getters=None,\n    n_inp=None,\n    item_tfms=None,\n    batch_tfms=None,\n    *,\n    get_items=None,\n    splitter=None,\n    get_y=None,\n    get_x=None,\n)\n'

In [111]:
'''
DEFINITION OF TEXTBLOCK: https://docs.fast.ai/text.data.html#TextBlock

A TransformBlock (A basic wrapper that links defaults transforms for the data block API) for texts
'''

'\nDEFINITION OF TEXTBLOCK: https://docs.fast.ai/text.data.html#TextBlock\n\nA TransformBlock (A basic wrapper that links defaults transforms for the data block API) for texts\n'

In [112]:
'''
DEFINITION OF CATEGORYBLOCK

TransformBlock for single-label categorical targets https://docs.fast.ai/data.block.html#CategoryBlock

Signature: CategoryBlock(vocab=None, sort=True, add_na=False)
Source:   
def CategoryBlock(vocab=None, sort=True, add_na=False):
    "`TransformBlock` for single-label categorical targets"
    return TransformBlock(type_tfms=Categorize(vocab=vocab, sort=sort, add_na=add_na))
File:      /opt/conda/envs/fastai/lib/python3.8/site-packages/fastai/data/block.py
Type:      function

'''

'\nDEFINITION OF CATEGORYBLOCK\n\nTransformBlock for single-label categorical targets https://docs.fast.ai/data.block.html#CategoryBlock\n\nSignature: CategoryBlock(vocab=None, sort=True, add_na=False)\nSource:   \ndef CategoryBlock(vocab=None, sort=True, add_na=False):\n    "`TransformBlock` for single-label categorical targets"\n    return TransformBlock(type_tfms=Categorize(vocab=vocab, sort=sort, add_na=add_na))\nFile:      /opt/conda/envs/fastai/lib/python3.8/site-packages/fastai/data/block.py\nType:      function\n\n'

In [113]:
'''
signature:
TextDataLoaders(*loaders, path='.', device=None)

'''

"\nsignature:\nTextDataLoaders(*loaders, path='.', device=None)\n\n"

In [114]:
??parent_label

[0;31mSignature:[0m [0mparent_label[0m[0;34m([0m[0mo[0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0;31mSource:[0m   
[0;32mdef[0m [0mparent_label[0m[0;34m([0m[0mo[0m[0;34m)[0m[0;34m:[0m[0;34m[0m
[0;34m[0m    [0;34m"Label `item` with the parent folder name."[0m[0;34m[0m
[0;34m[0m    [0;32mreturn[0m [0mPath[0m[0;34m([0m[0mo[0m[0;34m)[0m[0;34m.[0m[0mparent[0m[0;34m.[0m[0mname[0m[0;34m[0m[0;34m[0m[0m
[0;31mFile:[0m      /opt/conda/envs/fastai/lib/python3.8/site-packages/fastai/data/transforms.py
[0;31mType:[0m      function


In [115]:
parent_label(path)

'data'

In [116]:
path

Path('/storage/data/imdb')

In [117]:
??TextDataLoaders

[0;31mInit signature:[0m [0mTextDataLoaders[0m[0;34m([0m[0;34m*[0m[0mloaders[0m[0;34m,[0m [0mpath[0m[0;34m=[0m[0;34m'.'[0m[0;34m,[0m [0mdevice[0m[0;34m=[0m[0;32mNone[0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0;31mSource:[0m        
[0;32mclass[0m [0mTextDataLoaders[0m[0;34m([0m[0mDataLoaders[0m[0;34m)[0m[0;34m:[0m[0;34m[0m
[0;34m[0m    [0;34m"Basic wrapper around several `DataLoader`s with factory methods for NLP problems"[0m[0;34m[0m
[0;34m[0m    [0;34m@[0m[0mclassmethod[0m[0;34m[0m
[0;34m[0m    [0;34m@[0m[0mdelegates[0m[0;34m([0m[0mDataLoaders[0m[0;34m.[0m[0mfrom_dblock[0m[0;34m)[0m[0;34m[0m
[0;34m[0m    [0;32mdef[0m [0mfrom_folder[0m[0;34m([0m[0mcls[0m[0;34m,[0m [0mpath[0m[0;34m,[0m [0mtrain[0m[0;34m=[0m[0;34m'train'[0m[0;34m,[0m [0mvalid[0m[0;34m=[0m[0;34m'valid'[0m[0;34m,[0m [0mvalid_pct[0m[0;34m=[0m[0;32mNone[0m[0;34m,[0m [0mseed[0m[0;34m=[0m[0;32mNone[0m[0;34m

In [118]:
??DataBlock

[0;31mInit signature:[0m
[0mDataBlock[0m[0;34m([0m[0;34m[0m
[0;34m[0m    [0mblocks[0m[0;34m=[0m[0;32mNone[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mdl_type[0m[0;34m=[0m[0;32mNone[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mgetters[0m[0;34m=[0m[0;32mNone[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mn_inp[0m[0;34m=[0m[0;32mNone[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mitem_tfms[0m[0;34m=[0m[0;32mNone[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mbatch_tfms[0m[0;34m=[0m[0;32mNone[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0;34m*[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mget_items[0m[0;34m=[0m[0;32mNone[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0msplitter[0m[0;34m=[0m[0;32mNone[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mget_y[0m[0;34m=[0m[0;32mNone[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mget_x[0m[0;34m=[0m[0;32mNone[0m[0;34m,[0m[0;34m[0m
[0;34m[0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0;31mSource:[0m  

In [119]:
??CategoryBlock

[0;31mSignature:[0m [0mCategoryBlock[0m[0;34m([0m[0mvocab[0m[0;34m=[0m[0;32mNone[0m[0;34m,[0m [0msort[0m[0;34m=[0m[0;32mTrue[0m[0;34m,[0m [0madd_na[0m[0;34m=[0m[0;32mFalse[0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0;31mSource:[0m   
[0;32mdef[0m [0mCategoryBlock[0m[0;34m([0m[0mvocab[0m[0;34m=[0m[0;32mNone[0m[0;34m,[0m [0msort[0m[0;34m=[0m[0;32mTrue[0m[0;34m,[0m [0madd_na[0m[0;34m=[0m[0;32mFalse[0m[0;34m)[0m[0;34m:[0m[0;34m[0m
[0;34m[0m    [0;34m"`TransformBlock` for single-label categorical targets"[0m[0;34m[0m
[0;34m[0m    [0;32mreturn[0m [0mTransformBlock[0m[0;34m([0m[0mtype_tfms[0m[0;34m=[0m[0mCategorize[0m[0;34m([0m[0mvocab[0m[0;34m=[0m[0mvocab[0m[0;34m,[0m [0msort[0m[0;34m=[0m[0msort[0m[0;34m,[0m [0madd_na[0m[0;34m=[0m[0madd_na[0m[0;34m)[0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0;31mFile:[0m      /opt/conda/envs/fastai/lib/python3.8/site-packages/fastai/data/block.py
[

In [120]:
x = parent_label
x

<function fastai.data.transforms.parent_label(o)>

In [121]:
path

Path('/storage/data/imdb')

In [122]:
# refactored definition of dataloader object
'''dls = TextDataLoaders.from_df(
    df_tok, path=path, 
    vocab = make_vocab(count),
    text_col = 'text',label_col='label')
    
    MAR 7 LEVEL OF TDL:
        print("tdl dataloader")
    dls_clas = TextDataLoaders.from_folder(path=path,
        blocks=(TextBlock.from_folder(path, vocab=dls.vocab),CategoryBlock),
    #path, vocab=dls.vocab,     
    #    text_col= 'text', 
    #   label_col='label',
        get_y = parent_label,
        get_items=partial(get_text_files, folders=['train', 'test']),
        splitter=GrandparentSplitter(valid_name='test'),
    #    get_items=partial(get_text_files, folders=['train', 'test']),
    #    splitter=GrandparentSplitter(valid_name='test'), 
        bs=128, seq_len=72)
    
    MAR 7 level of non-TDL
        dls_clas = DataBlock(
        blocks=(TextBlock.from_folder(path, vocab=dls.vocab),CategoryBlock),
        get_y = parent_label,
        get_items=partial(get_text_files, folders=['train', 'test']),
        splitter=GrandparentSplitter(valid_name='test')
    ).dataloaders(path, path=path, bs=128, seq_len=72)
  
  
  TextDataLoaders.from_folder(path, train='train', valid='valid', valid_pct=None, seed=None, 
  vocab=None, text_vocab=None, 
  is_lm=False, tok_tfm=None, seq_len=72, backwards=False, bs=64, val_bs=None, shuffle=True, device=None)
    
'''
# two definition for the dataloaders approach - want to get the tdl version working reliably
if tdl:
    print("tdl dataloader")
    blocko = DataBlock(
        blocks=(TextBlock.from_folder(path, vocab=dls.vocab),CategoryBlock),
        get_y = parent_label,
        get_items=partial(get_text_files, folders=['train', 'test']),
        splitter=GrandparentSplitter(valid_name='test')
    )
    dls_clas = TextDataLoaders.from_folder(untar_data(URLs.IMDB), valid='test')
else:
    print("not tdl dataloader")
    dls_clas = DataBlock(
        blocks=(TextBlock.from_folder(path, vocab=dls.vocab),CategoryBlock),
        get_y = parent_label,
        get_items=partial(get_text_files, folders=['train', 'test']),
        splitter=GrandparentSplitter(valid_name='test')
    ).dataloaders(path, path=path, bs=128, seq_len=72)

tdl dataloader


In [123]:
blocko.summary(path,show_batch=True)

Setting-up type transforms pipelines
Collecting items from /storage/data/imdb
Found 50000 items
2 datasets of sizes 25000,25000
Setting up Pipeline: Tokenizer -> Numericalize
Setting up Pipeline: parent_label -> Categorize -- {'vocab': None, 'sort': True, 'add_na': False}

Building one sample
  Pipeline: Tokenizer -> Numericalize
    starting from
      /storage/data/imdb/train/neg/573_1.txt
    applying Tokenizer gives
      ['xxbos', 'xxmaj', 'the', 'last', 'reviewer', 'was', 'very', 'generous', '.', 'i', 'quiet', 'like', 'the', 'first', 'movie', ',', 'but', 'ca', "n't", 'say', 'i', 'enjoy', 'this', 'one', 'very', 'much', '.', 'xxmaj', 'the', 'beginning', 'is', 'bearable', ',', 'but', 'it', 'goes', 'downhill', 'pretty', 'quickly', '.', 'i', 'just', 'do', "n't", 'see', 'xxmaj', 'jon', 'xxmaj', 'bon', 'xxmaj', 'jovi', 'as', 'a', '"', 'bad', '-', 'ass', 'vampire', 'hunter', '"', 'and', 'the', 'vampire', 'princess', 'is', 'neither', 'sexy', 'nor', 'scary', '.', 'a', 'lot', 'of', 'the', '

Unnamed: 0,text,category
0,"xxbos xxmaj match 1 : xxmaj tag xxmaj team xxmaj table xxmaj match xxmaj bubba xxmaj ray and xxmaj spike xxmaj dudley vs xxmaj eddie xxmaj guerrero and xxmaj chris xxmaj benoit xxmaj bubba xxmaj ray and xxmaj spike xxmaj dudley started things off with a xxmaj tag xxmaj team xxmaj table xxmaj match against xxmaj eddie xxmaj guerrero and xxmaj chris xxmaj benoit . xxmaj according to the rules of the match , both opponents have to go through tables in order to get the win . xxmaj benoit and xxmaj guerrero heated up early on by taking turns hammering first xxmaj spike and then xxmaj bubba xxmaj ray . a xxmaj german xxunk by xxmaj benoit to xxmaj bubba took the wind out of the xxmaj dudley brother . xxmaj spike tried to help his brother , but the referee restrained him while xxmaj benoit and xxmaj guerrero",pos
1,"xxbos i was at first disgusted with director sun - woo xxmaj jang because i had felt that he cheated me . xxmaj jang had the potential to create a strong , deeply emotional film about sex and its effects on people , but instead chose to focus his strength on the pornography element more than the actual human element . i could n't see the characters at first and his sloppy introduction which blended both realism and cinema together was amateurish at best ▁ yet this film remained in my mind for days after i viewed it . xxmaj what stayed with me was n't the story , it was n't the characters , nor was it the apparent pornographic nature of the film , but the transition that xxmaj jang demonstrated between y and xxup j. xxmaj if you watch this film carefully , you will see that",neg
2,"xxbos i was but a babe in arms when xxmaj george xxmaj lucas was wowing the world with his out of this world xxmaj saga chronicling the adventures of young xxmaj luke xxmaj skywalker and the notorious xxmaj darth xxmaj vadar but even today 20 years on i can appreciate the genius that is xxmaj lucas and the incredible imagination he 's been blessed with . xxmaj in a xxmaj new xxmaj hope xxmaj lucas showed a new way to tell stories as he introduced us to such memorable characters as the plucky xxmaj princess xxmaj leia , the xxmaj xxunk xxmaj han xxmaj solo and the spirited xxmaj luke xxmaj skywalker as well as that best loved of villains , the sinister xxmaj darth xxmaj vadar . xxmaj in xxmaj the xxmaj empire xxmaj strikes xxmaj back he went all out to show us xxmaj special xxmaj effects can",pos
3,"xxbos xxmaj orca starts as crusty xxmaj irish sea captain xxmaj nolan ( richard xxmaj harris ) & his crew are trying to capture a xxmaj great xxmaj white xxmaj shark so they can sell it for big bucks , unfortunately when a hapless marine biologist called xxmaj ken ( robert xxmaj carradine ) comes under attack from it the xxmaj shark is killed by a xxmaj killer xxmaj whale , this raises xxmaj nolan 's interest in xxmaj killer xxmaj whales & decides he want 's to catch one of them instead . xxmaj however while trying to do so he catches a pregnant female & injuries it to the extent she aborts her unborn foetus on deck which makes a mess & enrages her mate , xxmaj nolan orders the xxmaj whale be dumped back in the sea which is what happens . xxmaj the male xxmaj killer",neg


In [124]:
'''
├── test
│   ├── neg
│   └── pos
├── tmp_clas
├── tmp_lm
├── train
│   ├── neg
│   └── pos
└── unsup



'''

'\n├── test\n│   ├── neg\n│   └── pos\n├── tmp_clas\n├── tmp_lm\n├── train\n│   ├── neg\n│   └── pos\n└── unsup\n\n\n\n'

In [125]:
dls_clas.path

Path('/storage/data/imdb')

In [126]:
keep_path = path

In [127]:
# ch 10 style Path('/storage/data/imdb')
path

Path('/storage/data/imdb')

In [128]:
%%time
# define a text_classifier_learner object
learn_clas = text_classifier_learner(dls_clas, AWD_LSTM, drop_mult=0.5, 
                                metrics=accuracy).to_fp16()

CPU times: user 6.92 s, sys: 1.29 s, total: 8.21 s
Wall time: 4.7 s


In [129]:
# Path('/storage/data/imdb')
learn_clas.path

Path('/storage/data/imdb')

In [130]:
%%time
# set the path to the location of the encoder
learn_clas.path = Path('/notebooks/temp')

CPU times: user 301 µs, sys: 33 µs, total: 334 µs
Wall time: 53.2 µs


In [131]:
# load the encoder that was saved when the language model was trained
learn_clas = learn_clas.load_encoder('ft_'+modifier)

In [132]:
path

Path('/storage/data/imdb')

In [133]:
learn_clas.path

Path('/notebooks/temp')

In [134]:
learn_clas.path = path

In [135]:
# ch 10 style Path('/storage/data/imdb')
learn_clas.path

Path('/storage/data/imdb')

In [136]:
%%time
learn_clas.fit_one_cycle(1, 2e-2)

epoch,train_loss,valid_loss,accuracy,time
0,0.412671,0.280616,0.88552,03:24


CPU times: user 2min 29s, sys: 53.8 s, total: 3min 22s
Wall time: 3min 24s


In [137]:
x, y = first(dls_clas.train)
x.shape, y.shape, len(dls_clas.train)

(torch.Size([64, 3345]), torch.Size([64]), 390)

In [138]:
dls_clas.show_batch()

Unnamed: 0,text,category
0,"xxbos xxmaj match 1 : xxmaj tag xxmaj team xxmaj table xxmaj match xxmaj bubba xxmaj ray and xxmaj spike xxmaj dudley vs xxmaj eddie xxmaj guerrero and xxmaj chris xxmaj benoit xxmaj bubba xxmaj ray and xxmaj spike xxmaj dudley started things off with a xxmaj tag xxmaj team xxmaj table xxmaj match against xxmaj eddie xxmaj guerrero and xxmaj chris xxmaj benoit . xxmaj according to the rules of the match , both opponents have to go through tables in order to get the win . xxmaj benoit and xxmaj guerrero heated up early on by taking turns hammering first xxmaj spike and then xxmaj bubba xxmaj ray . a xxmaj german xxunk by xxmaj benoit to xxmaj bubba took the wind out of the xxmaj dudley brother . xxmaj spike tried to help his brother , but the referee restrained him while xxmaj benoit and xxmaj guerrero",pos
1,"xxbos xxmaj titanic directed by xxmaj james xxmaj cameron presents a fictional love story on the historical setting of the xxmaj titanic . xxmaj the plot is simple , xxunk , or not for those who love plots that twist and turn and keep you in suspense . xxmaj the end of the movie can be figured out within minutes of the start of the film , but the love story is an interesting one , however . xxmaj kate xxmaj winslett is wonderful as xxmaj rose , an aristocratic young lady betrothed by xxmaj cal ( billy xxmaj zane ) . xxmaj early on the voyage xxmaj rose meets xxmaj jack ( leonardo dicaprio ) , a lower class artist on his way to xxmaj america after winning his ticket aboard xxmaj titanic in a poker game . xxmaj if he wants something , he goes and gets it",pos
2,"xxbos xxmaj some have praised _ xxunk _ as a xxmaj disney adventure for adults . i do n't think so -- at least not for thinking adults . \n\n xxmaj this script suggests a beginning as a live - action movie , that struck someone as the type of crap you can not sell to adults anymore . xxmaj the "" crack staff "" of many older adventure movies has been done well before , ( think _ the xxmaj dirty xxmaj dozen _ ) but _ atlantis _ represents one of the worse films in that motif . xxmaj the characters are weak . xxmaj even the background that each member trots out seems stock and awkward at best . xxmaj an xxup md / xxmaj medicine xxmaj man , a tomboy mechanic whose father always wanted sons , if we have not at least seen these before",neg
3,"xxbos xxmaj raising xxmaj victor xxmaj vargas : a xxmaj review \n\n xxmaj you know , xxmaj raising xxmaj victor xxmaj vargas is like sticking your hands into a big , steaming bowl of oatmeal . xxmaj it 's warm and gooey , but you 're not sure if it feels right . xxmaj try as i might , no matter how warm and gooey xxmaj raising xxmaj victor xxmaj vargas became i was always aware that something did n't quite feel right . xxmaj victor xxmaj vargas suffers from a certain overconfidence on the director 's part . xxmaj apparently , the director thought that the ethnic backdrop of a xxmaj latino family on the lower east side , and an idyllic storyline would make the film critic proof . xxmaj he was right , but it did n't fool me . xxmaj raising xxmaj victor xxmaj vargas is",neg
4,"xxbos xxmaj i 've rented and watched this movie for the 1st time on xxup dvd without reading any reviews about it . xxmaj so , after 15 minutes of watching xxmaj i 've noticed that something is wrong with this movie ; it 's xxup terrible ! i mean , in the trailers it looked scary and serious ! \n\n i think that xxmaj eli xxmaj roth ( mr . xxmaj director ) thought that if all the characters in this film were stupid , the movie would be funny … ( so stupid , it 's funny … ? xxup wrong ! ) xxmaj he should watch and learn from better horror - comedies such xxunk xxmaj night "" , "" the xxmaj lost xxmaj boys "" and "" the xxmaj return xxmaj of the xxmaj living xxmaj dead "" ! xxmaj those are funny ! \n\n """,neg
5,"xxbos xxmaj it has said that xxmaj the xxmaj movies and xxmaj baseball both thrived during xxmaj the xxmaj great xxmaj depression . xxmaj it appears that the grim realities of a xxmaj nation caught up in the aftermath of this xxmaj economic xxmaj disaster created a need for occasional relief for the populace . a temporary escape could be found in the on going soap opera that is xxmaj baseball . \n\n xxmaj likewise , an occasional excursion of 2 or 3 hours into the darkened xxunk of the xxmaj cinema . xxmaj the presence of a xxmaj radio in just about everyone 's house hold kept xxmaj depression xxmaj era xxmaj america at once attuned to xxmaj world 's xxmaj events and provided many a xxmaj drama and ( especially ) xxmaj comedy xxmaj shows for a pleasant interlude from harsh reality . \n\n xxmaj the literature of",pos
6,"xxbos xxmaj billy xxmaj chung xxmaj siu xxmaj hung 's ( the bloody swordplay film xxmaj assassin from 1993 ) film xxmaj love xxmaj to xxmaj kill ( hong xxmaj kong , 1993 ) is among the strongest products of the xxmaj category xxrep 3 i boom that inhabited the xxup hk cinema in early nineties . xxmaj it consisted of films with strong sex , nudity and violence , more or less gratuitous and shock valued only . xxmaj love xxmaj to xxmaj kill definitely belongs to the "" more "" category with some unforgettable ideas and pieces of celluloid sickness . \n\n xxmaj the xxup hk psycho xxmaj anthony xxmaj wong ( from the award winning xxmaj the xxmaj untold xxmaj story by xxmaj herman xxmaj yau , from the same year ) plays a business man and a husband who likes to torture , humiliate and rape",neg
7,"xxbos xxmaj office work , especially in this era of computers , multi - functional copy machines , e - mail , voice mail , snail mail and ` temps , ' is territory ripe with satirical possibilities , a vein previously tapped in such films as ` clockwatchers ' and ` office xxmaj space , ' and very successfully . xxmaj this latest addition to the temp / humor pool , however , ` haiku xxmaj tunnel , ' directed by xxmaj josh xxmaj kornbluth and xxmaj jacob xxmaj kornbluth , fails to live up to it 's predecessors , and leaves the laughs somewhere outside the door , waiting for a chance to sneak in . xxmaj unfortunately for the audience , that chance never comes ; so what you get is a nice try , but as the man once said , no cigar . \n\n\t xxmaj",neg
8,"xxbos xxmaj anyone who visited drive - ins in the 1950s , 60s , and 70s , must have seen a film or two by xxmaj american xxmaj international xxmaj pictures , a distributor that resembled 1980s giant xxmaj cannon xxmaj films . xxmaj wherever movie - goers ventured , xxup aip would be right there to supply the latest en vogue titles - in the 50s came horror movies like ' voodoo xxmaj woman ' and ' the xxmaj undead ; ' in the 60s were xxmaj frankie avalon - annette xxmaj funicello beach comedies and biker flicks like ' the xxmaj glory xxmaj xxunk ; ' and into the 70s , xxup aip churned out grindhouse - level trash like ' cannibal xxmaj girls ' and ' sugar xxmaj hill . ' \n\n ' dillinger , ' released in 1973 , is one of the more ' highbrow",neg


In [139]:
learn_clas.summary()

epoch,train_loss,valid_loss,accuracy,time
0,,,00:00,


SequentialRNN (Input shape: ['64 x 3345'])
Layer (type)         Output Shape         Param #    Trainable 
LSTM                 ['64 x 33 x 1152',   1,852,416  False     
________________________________________________________________
LSTM                 ['64 x 33 x 1152',   5,317,632  False     
________________________________________________________________
LSTM                 ['64 x 33 x 400', "  1,846,400  False     
________________________________________________________________
RNNDropout           64 x 33 x 400        0          False     
________________________________________________________________
RNNDropout           64 x 33 x 1152       0          False     
________________________________________________________________
RNNDropout           64 x 33 x 1152       0          False     
________________________________________________________________
BatchNorm1d          64 x 1200            2,400      True      
_______________________________________________________

In [None]:
%%time
learn_clas.freeze_to(-2)
learn_clas.fit_one_cycle(1, 2e-2)

In [None]:
preds = learn_clas.predict("this film shows incredibly bad writing and is a complete disaster")

In [86]:
preds

('neg', TensorText(0), TensorText([9.9998e-01, 2.0704e-05]))

In [33]:
preds = learn_clas.predict("what a terrible film")

In [23]:
preds

('neg', TensorText(0), TensorText([9.9956e-01, 4.4422e-04]))

In [17]:
learn_clas.save('classifier_single_epoch_'+modifier+'b')

Path('/notebooks/temp/models/classifier_single_epoch_mar3b.pth')