# Configuration Section - Important

In [None]:
colab_flag = True  # Set it true if the notebook is run on Colab
imdb_phase2_finetuned_flag = False# Set it to true if the phase2 fine tuned model exists


# Importing Modules

## General Libraries

In [None]:
import pandas as pd
import numpy as np
import pickle

## For Colab

In [None]:
if colab_flag:
  
  !pip install -Uqq fastbook   
  from fastbook import * 
  from google.colab import drive 
  drive.mount('/content/drive')
  %cd /content/drive/My\ Drive/Colab\ Notebooks/

  gpu_info = !nvidia-smi
  gpu_info = '\n'.join(gpu_info)
  if gpu_info.find('failed') >= 0:
    print('Not connected to a GPU')
  else:
    print(gpu_info)

[K     |████████████████████████████████| 720 kB 4.0 MB/s 
[K     |████████████████████████████████| 189 kB 75.5 MB/s 
[K     |████████████████████████████████| 1.2 MB 66.5 MB/s 
[K     |████████████████████████████████| 46 kB 4.7 MB/s 
[K     |████████████████████████████████| 56 kB 5.9 MB/s 
[K     |████████████████████████████████| 51 kB 304 kB/s 
[?25hMounted at /content/drive
/content/drive/My Drive/Colab Notebooks
Sat Dec 11 11:04:15 2021       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 495.44       Driver Version: 460.32.03    CUDA Version: 11.2     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla P100-PCIE...  Off  | 00000000:00:04.0 Off | 

## Deep Learning

In [None]:
from fastai.text.all import *

# Functions

# Building the LSTM model with a transfer learning approach

## Data loader object for the IMDb reviews

In [None]:
path = Path("/content/drive/My\ Drive/Colab\ Notebooks/abc"

In [None]:
get_imdb = partial(get_text_files, folders=['train', 'test', 'unsup'])


In [None]:
get_files()

Path('/root/.fastai/data/imdb')

In [None]:
 imdb_dls = DataBlock(blocks=TextBlock.from_folder(path, is_lm=True),
                       get_items=get_imdb, splitter=RandomSplitter(0.2)).dataloaders(path, path=path, bs=128, seq_len=80)

## Phase 1 - Implementing the general language model pre-trained with Wikipedia articles

In [None]:
learn = language_model_learner(imdb_dls, AWD_LSTM, drop_mult=0.3, metrics=[accuracy, Perplexity()]).to_fp16()


## Phase 2 - Fine tuning the general model with the IMDb reviews corpus

### First tuning - only the embeddings

In [None]:
# The pretrained model is frozen by default. Only the embeddings will be fine tuned at first

learn.fit_one_cycle(1, 2e-2)

epoch,train_loss,valid_loss,accuracy,perplexity,time
0,4.012154,3.902845,0.300364,49.543194,26:09


### Unfreezzing the model and fine-tuning with 10 epochs



In [None]:
if not imdb_phase2_finetuned_flag:
  
  learn.unfreeze()
  learn.fit_one_cycle(10, 2e-3)
  learn.save("imdb_finetuned")
  %cd /root/.fastai/data/imdb/models/
  !cp imdb_finetuned.pth /content/drive/My\ Drive/Colab\ Notebooks/Data

else:
  
  learn.load("imdb_finetuned")

epoch,train_loss,valid_loss,accuracy,perplexity,time
0,3.766195,3.762601,0.316651,43.060276,28:10


epoch,train_loss,valid_loss,accuracy,perplexity,time
0,3.766195,3.762601,0.316651,43.060276,28:10
1,3.678445,3.671302,0.327101,39.30304,28:19
2,3.566799,3.613816,0.333645,37.107368,28:11
3,3.45461,3.584203,0.337989,36.024616,28:31
4,3.381629,3.585835,0.338268,36.083466,28:04


### Saving/Loading the encoder

In [None]:
learn.save_encoder("imdb_encoder")
%cd /root/.fastai/data/imdb/models/
!cp imdb_encoder.pth /content/drive/My\ Drive/Colab\ Notebooks/Data


### Generating a text review

In [None]:
text = "I liked this movie because"
n_words = 40
prediction = learn.predict(text, n_words, temperature=0.75)

In [None]:
print(prediction)

i liked this movie because it was a very original and original story . It did n't seem to be a typical 80 's movie but it had some very good performances from James Garner and Susan Sarandon . i


## Phase 3 - Fine tuning the classifier model

### Creating the Classifier DataLoaders

In [None]:
dls_clas = DataBlock(
                     blocks=(TextBlock.from_folder(path, vocab=imdb_dls.vocab),CategoryBlock),
                     get_y = parent_label,
                     get_items=partial(get_text_files, folders=['train', 'test']),
                     splitter=GrandparentSplitter(valid_name='test')
                     ).dataloaders(path, path=path, bs=128, seq_len=72)

In [None]:
dls_clas.show_batch(max_n=3)

Unnamed: 0,text,category
0,"xxbos xxmaj match 1 : xxmaj tag xxmaj team xxmaj table xxmaj match xxmaj bubba xxmaj ray and xxmaj spike xxmaj dudley vs xxmaj eddie xxmaj guerrero and xxmaj chris xxmaj benoit xxmaj bubba xxmaj ray and xxmaj spike xxmaj dudley started things off with a xxmaj tag xxmaj team xxmaj table xxmaj match against xxmaj eddie xxmaj guerrero and xxmaj chris xxmaj benoit . xxmaj according to the rules of the match , both opponents have to go through tables in order to get the win . xxmaj benoit and xxmaj guerrero heated up early on by taking turns hammering first xxmaj spike and then xxmaj bubba xxmaj ray . a xxmaj german xxunk by xxmaj benoit to xxmaj bubba took the wind out of the xxmaj dudley brother . xxmaj spike tried to help his brother , but the referee restrained him while xxmaj benoit and xxmaj guerrero",pos
1,"xxbos xxmaj titanic directed by xxmaj james xxmaj cameron presents a fictional love story on the historical setting of the xxmaj titanic . xxmaj the plot is simple , xxunk , or not for those who love plots that twist and turn and keep you in suspense . xxmaj the end of the movie can be figured out within minutes of the start of the film , but the love story is an interesting one , however . xxmaj kate xxmaj winslett is wonderful as xxmaj rose , an aristocratic young lady betrothed by xxmaj cal ( billy xxmaj zane ) . xxmaj early on the voyage xxmaj rose meets xxmaj jack ( leonardo dicaprio ) , a lower class artist on his way to xxmaj america after winning his ticket aboard xxmaj titanic in a poker game . xxmaj if he wants something , he goes and gets it",pos
2,"xxbos xxmaj some have praised _ xxunk _ as a xxmaj disney adventure for adults . i do n't think so -- at least not for thinking adults . \n\n xxmaj this script suggests a beginning as a live - action movie , that struck someone as the type of crap you can not sell to adults anymore . xxmaj the "" crack staff "" of many older adventure movies has been done well before , ( think _ the xxmaj dirty xxmaj dozen _ ) but _ atlantis _ represents one of the worse films in that motif . xxmaj the characters are weak . xxmaj even the background that each member trots out seems stock and awkward at best . xxmaj an xxup md / xxmaj medicine xxmaj man , a tomboy mechanic whose father always wanted sons , if we have not at least seen these before",neg


### Creating the classifier model

In [None]:
learn = text_classifier_learner(dls_clas, AWD_LSTM, drop_mult=0.5, metrics=accuracy).to_fp16()

In [None]:
# Let us load the imdb encoder
learn.load_encoder("imdb_encoder")

<fastai.text.learner.TextLearner at 0x7fead24e8f50>

### Fine-Tuning the Classifier

In [None]:
learn.fit_one_cycle(1, 2e-2)

epoch,train_loss,valid_loss,accuracy,time
0,0.254971,0.197572,0.9216,01:40


In [None]:
# Let us progressively unfrezze the model. First the last two layers.
learn.freeze_to(-2)
learn.fit_one_cycle(1, slice(1e-2/(2.6**4),1e-2))

epoch,train_loss,valid_loss,accuracy,time
0,0.236382,0.176682,0.93228,01:53


In [None]:
# A bit more
learn.freeze_to(-3)
learn.fit_one_cycle(1, slice(5e-3/(2.6**4),5e-3))

epoch,train_loss,valid_loss,accuracy,time
0,0.191473,0.154162,0.94236,02:38


In [None]:
# And the whole model
learn.unfreeze()
learn.fit_one_cycle(2, slice(1e-3/(2.6**4),1e-3))

epoch,train_loss,valid_loss,accuracy,time
0,0.160293,0.151415,0.94368,03:14
1,0.136745,0.152952,0.94388,03:15


### Exporting the model

In [None]:
learn.save("imdb_inferer")
%cd /root/.fastai/data/imdb/models/
!cp imdb_inferer.pth /content/drive/My\ Drive/Colab\ Notebooks/Data

Path('/root/.fastai/data/imdb/models/imdb_inferer.pth')