# Fast AI pre-processing

## Train on Movie dialogs

In [1]:
from fastai.text.all import *

In [2]:
cwd = 'D:/Pytorch Data/cornell_movie_dialogs/'

In [3]:
data = pd.read_csv(cwd + 'movie_dialogs.csv')

In [4]:
# The warning means nothing and the fast ai should be more transparent about needing this
data['is_valid'] = False
data['is_valid'].loc[data.index[:(len(data) // 10)]] = True

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  iloc._setitem_with_indexer(indexer, value)


In [5]:
data.columns

Index(['Unnamed: 0', 'input', 'truth', 'is_valid'], dtype='object')

## Train encoder on movie dialogs

In [6]:
dls_lm = TextDataLoaders.from_df(data, path=cwd, text_col='input', is_lm=True, 
                                 valid_col='is_valid', max_vocab = 10000, seq_len = 10, num_workers = 0)
dls_lm.show_batch(max_n=3)

  return array(a, dtype, copy=False, order=order)


Unnamed: 0,text,text_
0,xxbos calm yourself xxunk . now slowly . who has,calm yourself xxunk . now slowly . who has finally
1,! xxbos i was wondering if you re not doing,xxbos i was wondering if you re not doing anything
2,on painkillers and we spent the whole afternoon together !,painkillers and we spent the whole afternoon together ! he


In [8]:
learn = language_model_learner(
    dls_lm, AWD_LSTM, drop_mult = 0.3,
    metrics = [accuracy, Perplexity()], 
).to_fp16()

In [9]:
learn.fit_one_cycle(1, 2e-2)

epoch,train_loss,valid_loss,accuracy,perplexity,time
0,4.306205,4.140295,0.299333,62.821354,13:09


In [10]:
learn.save('1epoch')

Path('D:/Pytorch Data/cornell_movie_dialogs/models/1epoch.pth')

In [11]:
learn.load('1epoch')

<fastai.text.learner.LMLearner at 0x2893b5b3a00>

In [12]:
learn.unfreeze()
learn.fit_one_cycle(10, 2e-3)

epoch,train_loss,valid_loss,accuracy,perplexity,time
0,3.971489,4.012854,0.312596,55.304462,16:00
1,3.929687,3.995411,0.315101,54.348167,16:17
2,3.917242,3.967587,0.319055,52.856846,16:34
3,3.832657,3.95048,0.322253,51.9603,16:26
4,3.712217,3.944817,0.323507,51.666885,16:33
5,3.611965,3.952956,0.324157,52.089127,16:22
6,3.473314,3.98359,0.322781,53.709511,17:11
7,3.290017,4.025487,0.321653,56.007576,15:58
8,3.159003,4.079242,0.318292,59.10067,15:44
9,3.065054,4.104817,0.317337,60.631641,16:17


In [13]:
learn.save_encoder('encoded_model')

In [20]:
input_text   = 'Hello there my good friend'
nr_words     = 10
nr_sentences = 1
preds = [learn.predict(input_text, nr_words, temperature = 0.5) for _ in range(nr_sentences)]

In [21]:
preds

['Hello there my good friend . oh no . what the hell is']

## Try Combing Each line with the truth

In [22]:
data['combined'] = data['input'] + ' : ' +  data['truth']

In [23]:
dls_lm = TextDataLoaders.from_df(data, path=cwd, text_col='combined', is_lm=True, 
                                 valid_col='is_valid', max_vocab = 10000, seq_len = 10, num_workers = 0)

  return array(a, dtype, copy=False, order=order)


In [26]:
dls_lm.show_batch(max_n=3)

Unnamed: 0,text,text_
0,xxbos you re not going to gray . : he,you re not going to gray . : he must
1,had . lemme tell you about sarge . . .,. lemme tell you about sarge . . . xxbos
2,enough for a xxunk . xxbos in the gut i,for a xxunk . xxbos in the gut i mean


In [27]:
learn = language_model_learner(
    dls_lm, AWD_LSTM, drop_mult = 0.3,
    metrics = [accuracy, Perplexity()], 
)

In [28]:
learn.fit_one_cycle(1, 2e-2)

epoch,train_loss,valid_loss,accuracy,perplexity,time
0,4.283886,4.166557,0.299896,64.493011,26:22


In [29]:
learn.save('1epochcombined')

Path('D:/Pytorch Data/cornell_movie_dialogs/models/1epochcombined.pth')

In [30]:
learn.load('1epochcombined')

<fastai.text.learner.LMLearner at 0x289f8bcfaf0>

In [31]:
learn.unfreeze()
learn.fit_one_cycle(5, 2e-3)

epoch,train_loss,valid_loss,accuracy,perplexity,time
0,3.972531,4.047938,0.313456,57.279217,32:31
1,3.890632,3.986928,0.322638,53.889114,30:42
2,3.709523,3.95193,0.327759,52.035702,30:20
3,3.536654,3.950764,0.328703,51.975056,30:20
4,3.35933,3.984048,0.327341,53.734116,30:59


In [32]:
learn.save_encoder('combined_encoded_model')

In [34]:
input_text   = 'Hello there my good friend. :'
nr_words     = 10
nr_sentences = 6
preds = [learn.predict(input_text, nr_words, temperature = 0.5) for _ in range(nr_sentences)]

In [35]:
preds

['Hello there my good friend . : how do you know ? i just saw him',
 'Hello there my good friend . : i m not sure i know that . i',
 'Hello there my good friend . : you re not a captain are you ? i',
 'Hello there my good friend . : oh . i ve been thinking about that .',
 'Hello there my good friend . : i thought you wanted to see me . i',
 'Hello there my good friend . : i m sorry that s all . i m']

In [38]:
input_text   = 'What do you want? :'
nr_words     = 12
nr_sentences = 6
[learn.predict(input_text, nr_words, temperature = 0.5) for _ in range(nr_sentences)]

['What do you want ? : i m an old friend of mine . that s a',
 'What do you want ? : i m a writer . i m not sure i understand',
 'What do you want ? : i want to see my son . i m sorry .',
 'What do you want ? : the new york times . you know what i mean .',
 'What do you want ? : i m not a reporter . do nt say that .',
 'What do you want ? : i do nt know . i m not going to sit']

In [42]:
input_text   = 'Who are you? :'
nr_words     = 50
nr_sentences = 6
[learn.predict(input_text, nr_words, temperature = 0.5) for _ in range(nr_sentences)]

['Who are you ? : i m the one who s going to be a member of the communist party . i m not sure . : well i m going to tell you something . i d like to talk to you . i had to go to the bathroom . :',
 'Who are you ? : i do nt know . i m not sure . : you know what i m saying to you ? i m not sure . : you re not going to tell me anything . i m not really asking you to . : i do nt',
 'Who are you ? : i am the man who killed him . he s in the same room with mr . carpenter . : well i m sorry . you re not going to let anybody go ? : i am not going to be a policeman anymore . well i',
 'Who are you ? : i m the one that came along . i do nt know . : you have to go back to the hospital . i do nt know . : i m sorry . i m not sure i can . : i guess . you re',
 'Who are you ? : i m not a communist . i m sorry i i m sorry . : no you re not . you re not . you re not . i m sorry . : i m sorry . i m not going anywhere . : i m not',
 'Who are you ? : i am a member of the nazi party . i ca nt go anywhere 

Note that the conversation change token can be used to say that the network thinks that the line is finished. This can be used to avoid cutting the sentence off in the middle of a line.

## Looking inside the model