# AWD-LSTM

**For fair comparison and benchmarking no pretrained models will be used**

In [1]:
import os
os.environ['FASTAI_TB_CLEAR_FRAMES']="1"

In [2]:
from fastai.text import *
from fastai.data_block import *

In [3]:
data_path = Path("../../data/quora/")

In [4]:
train_df = pd.read_csv(data_path/'train.csv')
test_df = pd.read_csv(data_path/'test.csv')

In [5]:
train_df.shape, test_df.shape

((1306122, 3), (56370, 2))

In [6]:
train_df.head(1)

Unnamed: 0,qid,question_text,target
0,00002165364db923c7e6,How did Quebec nationalists see their province...,0


In [8]:
data_lm = TextLMDataBunch.load(".")
data_clas = TextClasDataBunch.load(".", bs=256)

### LM AND CLAS DATA

In [9]:
# create language model data
data_lm = TextLMDataBunch.from_df(path='.', train_df=train_df, valid_df=test_df)

In [10]:
# create classification data with same tokens in language model data
data_clas = (TextList.from_df(path=".", df=train_df, cols="question_text",
                              vocab=data_lm.train_ds.vocab)
                   .random_split_by_pct(0.05)
                   .label_from_df(cols='target')
                   .add_test(TextList.from_df(path=".", df=test_df, cols="question_text",
                              vocab=data_lm.train_ds.vocab))
                   .databunch(bs=512))

In [11]:
# save for all experiments
data_lm.save()
data_clas.save()

### LM Training

Now fastai supports a generic language model learner with `AWD-LSTM`, `QRNN`, `Transformer` and `Transformer XL`

In [9]:
awd_lstm_lm_config = dict(emb_sz=300, n_hid=1150, n_layers=3,
                          pad_token=1, qrnn=False, output_p=0.25, 
                          hidden_p=0.1, input_p=0.2, embed_p=0.02,
                          weight_p=0.15, tie_weights=True, out_bias=True)

In [10]:
learn = language_model_learner(data=data_lm, arch=AWD_LSTM, config=awd_lstm_lm_config,
                               drop_mult=1.0, pretrained=False, clip=0)

In [None]:
learn.fit_one_cycle(5, 1e-2)

In [None]:
learn.save_encoder("awd-lstm-enc")

### Metric

In [None]:
from utils import fbeta_binary

### Clas Training After LM - Full Data

In [None]:
awd_lstm_clas_config = dict(emb_sz=300, n_hid=1150, n_layers=3, pad_token=1, qrnn=False, bidir=False,
                            output_p=0.4, hidden_p=0.2, input_p=0.6, embed_p=0.1, weight_p=0.5)

In [None]:
learn = text_classifier_learner(data=data_clas, arch=AWD_LSTM, bptt=70,
                                max_len=1500, drop_mult=1.0, config=awd_lstm_clas_config)

In [None]:
# metrics
learn.metrics = [fbeta_binary(thresh=t) for t in np.arange(0.1, 1., 0.2)]

In [None]:
learn.load_encoder("awd-lstm-enc");

In [None]:
learn.freeze_to(-1)

In [None]:
learn.fit_one_cycle(5, 5e-2)

In [None]:
learn.freeze_to(-2)

In [None]:
learn.fit_one_cycle(5, 1e-3)

In [None]:
learn.fit_one_cycle(1, 1e-3)

In [None]:
learn.save("clas-stage-2")

In [None]:
# reduce batch size to fit to GPU
data_clas = TextClasDataBunch.load(".", bs=256)

In [None]:
learn.data = data_clas

In [None]:
learn.unfreeze()

In [None]:
learn.fit_one_cycle(5, 1e-4)

In [None]:
learn.save("clas-stage-final")