In [1]:
'''
This is a model of Text sentiment classification.
Li Teng
10.11.2021


The process will be done in following step:

1.load config

2.build model

3.load data and clean it.

4.training and recording 

5.validation

6.hotflip
'''

'\nThis is a model of Text sentiment classification.\nLi Teng\n10.11.2021\n\n\nThe process will be done in following step:\n\n1.load config\n\n2.build model\n\n3.load data and clean it.\n\n4.training and recording \n\n5.validation\n\n6.hotflip\n'

In [1]:
import torch
from config import DefaultConfig
from Text_Cnn import ConvNet, dynamical_padding, get_vocab_list, class_to_tensor
import os
import heapq
from utils import load_flattened_documents, load_datasets, get_label, get_meanloss_and_wi, load_documents
from datasets_preprocessing import clean_datasets, Movie_Classif_Dataset, get_part_of_speech
from datasets_preprocessing import doc_to_tag, get_word_tag_dict, get_wi_tag
from torch.utils.data import DataLoader
import torch.nn as nn
import wandb
import nltk

'''
loading config 
'''
Conf = DefaultConfig()

VOCAB_SIZE = Conf.vocab_size
BATCH_SIZE = Conf.batch_size
Lr = Conf.lr
EPOCHS = Conf.epochs
DEVICE = Conf.device

In [7]:
'''
step 2: build model
'''
Text_CNN = ConvNet()

In [2]:
'''
step 3: load data and preprocessing
'''

#load raw datasets
data_root = os.path.join('Data', 'movies')
documents = load_flattened_documents(data_root,None)
documents = clean_datasets(documents)
train, val, test = load_datasets(data_root)
#load Train_Dataset
Train_Dataset = Movie_Classif_Dataset(documents,train)
#load into DataLoader
Loader = DataLoader(dataset = Train_Dataset,
                    batch_size = BATCH_SIZE,
                    shuffle=True,
                    collate_fn=dynamical_padding)

In [3]:
'''
step 4: training and recording 
'''
def train(epochs,model,device,dataloader,Lr):
    '''
    Training model
    '''    
    loss_func = nn.NLLLoss()
    optimizer = torch.optim.SGD(model.parameters(),lr=Lr)
    model.to(device)
    #model.train()
    for e in range(epochs):
        loss_sum = 0
        acc = 0
        for i,(x,y) in enumerate(dataloader):
            x = x.to(device)
            y = y.to(device)
            y_hat = model(x)
            loss = loss_func(y_hat,y)
            label_hat = torch.argmax(y_hat,dim=1)
            acc += torch.sum(label_hat == y).item()
            loss_sum += loss
            # backward propagation
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            if (i+1)%100 == 0:
                acc = acc/(BATCH_SIZE*100)
                loss_average = loss_sum/100
                print('epoch:{}, batch:{}, loss:{}, acc:{}'.format(e,i,loss_average.data,acc))
                wandb.log({"loss": loss_average.data,"acc":acc})
                # Optional
                wandb.watch(model)
                loss_sum = 0
                acc = 0
    torch.save(model,'Text_Cnn_1.pth')

In [4]:
#load model if we have checkpoint
if os.path.exists('Text_Cnn_1.pth'):
    if torch.cuda.is_available():
        Text_CNN = torch.load('Text_Cnn_1.pth').cuda()
    else:
        Text_CNN = torch.load('Text_Cnn_1.pth')
    print('load from Text_Cnn_1.pth')
else:
    print('training model.')
    wandb.init(project='Text_Cnn',entity='teng_li')
    train(EPOCHS,Text_CNN,DEVICE,Loader,Lr)

load from Text_Cnn_1.pth


In [10]:
# try a new one
#Text_CNN = ConvNet()
print('training model.')
#wandb.init(project='Text_Cnn',entity='teng_li')
wandb.init(project='Text_Cnn3',entity='teng_li')
train(20,Text_CNN,DEVICE,Loader,Lr)

training model.


wandb: wandb version 0.12.10 is available!  To upgrade, please run:
wandb:  $ pip install wandb --upgrade


epoch:0, batch:99, loss:0.7008103132247925, acc:0.515
epoch:0, batch:199, loss:0.6993375420570374, acc:0.52
epoch:0, batch:299, loss:0.7014205455780029, acc:0.465
epoch:0, batch:399, loss:0.6985161304473877, acc:0.505
epoch:0, batch:499, loss:0.6908286213874817, acc:0.515
epoch:0, batch:599, loss:0.6981061697006226, acc:0.465
epoch:0, batch:699, loss:0.6973797678947449, acc:0.48
epoch:0, batch:799, loss:0.6960407495498657, acc:0.495
epoch:1, batch:99, loss:0.6951991319656372, acc:0.495
epoch:1, batch:199, loss:0.6830211877822876, acc:0.585
epoch:1, batch:299, loss:0.6974428296089172, acc:0.525
epoch:1, batch:399, loss:0.6965981125831604, acc:0.48
epoch:1, batch:499, loss:0.6983054280281067, acc:0.43
epoch:1, batch:599, loss:0.6970250010490417, acc:0.485
epoch:1, batch:699, loss:0.6955912113189697, acc:0.51
epoch:1, batch:799, loss:0.6932725310325623, acc:0.525
epoch:2, batch:99, loss:0.6923266053199768, acc:0.535
epoch:2, batch:199, loss:0.6897321343421936, acc:0.56
epoch:2, batch:299,

KeyboardInterrupt: 

In [8]:
'''
step 5: validation
'''
#load Val_Dataset
Val_Dataset = Movie_Classif_Dataset(documents,val)
#load into DataLoader
Val_Loader = DataLoader(dataset = Val_Dataset,
                    batch_size = 1,
                    shuffle=False)

def validation(model,val_dataloader):
    correct = 0
    total = 0
    with torch.no_grad():
        for x,y in val_dataloader:
            y_hat = model.predict(x)
            if y[0] == y_hat:
                correct += 1
            total += 1
    print('correct:',correct)
    print('total:',total)
    print('acc = ',correct/total)
    
validation(Text_CNN,Val_Loader)

correct: 175
total: 200
acc =  0.875


In [6]:
'''
step 6: hotflip
In this step we will try to:
6.0 get word_level documents and sentence_level documents, and some config
6.1 get gradient of one word (w0_gradient)
6.2 N words which has max loss changed to flip (min (w0_gradient * w0_embed ))
6.3 the best word(wi) for each filped word (max (w0_gradient * wi_embed))
6.4 find best w0 and wi
6.5 replace w0 with wi 
'''

'\nstep 6: hotflip\nIn this step we will try to:\n6.0 get word_level documents and sentence_level documents, and some config\n6.1 get gradient of one word (w0_gradient)\n6.2 N words which has max loss changed to flip (min (w0_gradient * w0_embed ))\n6.3 the best word(wi) for each filped word (max (w0_gradient * wi_embed))\n6.4 find best w0 and wi\n6.5 replace w0 with wi \n'

In [5]:
'''
step 6.0:get word_level documents and sentence_level documents, the tag_dict, and some config
'''
data_root = os.path.join('Data', 'movies')
word_level_docs = clean_datasets(load_flattened_documents(data_root,None))
sent_level_docs = load_documents(data_root)

# loss function
loss_func = nn.NLLLoss()
# first of all we need to load Vocab
Vocab_list = get_vocab_list()

#some conf of hotflip
vocab_size = Conf.search_size    # get from Conf.search_size
beam_size = Conf.beam_search_size # get from Conf.beam_search_size
change_word_num = Conf.change_word_num # get from Conf.change_word_num

# get word tag dict from given sent_level_documents
word_tag_dict = get_word_tag_dict(sent_level_docs)

In [33]:
# need to reset those if we want to flip again, cause those data will be changed after flip
# first time flip dont need to do this
Val_Loader = DataLoader(dataset = Val_Dataset,
                    batch_size = 1,
                    shuffle=False)
word_level_docs = clean_datasets(load_flattened_documents(data_root,None))

In [None]:

'''
hotflip: here I only handle the Val_docs as a sample
'''

#for each doc in val_dataloader
for i,(x,y) in enumerate(Val_Loader):
    y_hat = Text_CNN.single_instance_forward(x)
    y_tensor = class_to_tensor(y)
    # record the prediction and loss before hotflip
    loss = loss_func(y_hat,y_tensor)
    print('loss before flip:',loss)
    print('class before flip:',Text_CNN.predict(x))
    y = get_label(y)
    docid = val[i].annotation_id
    print('docid:',docid)
    # get word_level and sent_level doc
    word_doc = word_level_docs[docid]
    sent_doc = sent_level_docs[docid]
    # get tag for each word in doc
    words_tag = doc_to_tag(sent_doc)
    for n in range(change_word_num):
        print('flip time:',n+1)
        # step 6.1: get gradient
        gradient = Text_CNN.get_gradient(x,y)
        # step 6.2: get best w0
        w0_list = []# a set to record mean loss of each w0
        for w0_place,w0 in enumerate(word_doc):
            w0_id = x[0][0][w0_place]
            #print(w0)
            w0_embed = Text_CNN.get_word_embedding(w0_id)
            #print(w0_embed.shape)
            w0_gradient = gradient[0][0][0][w0_place]
            #print(w0_gradient.shape)
            if w0_gradient.sum() != 0:
                # now min (w0_gradient * w0_embed )
                w0_loss_estimate = torch.dot(w0_gradient,w0_embed)
                w0_list.append({'w0':w0,'w0_place':w0_place,'w0_gradient':w0_gradient,
                                'w0_embed':w0_embed,'w0_loss_estimate':w0_loss_estimate})
        # now let's get best w0 list
        
        best_w0 = heapq.nsmallest(beam_size,w0_list,key=lambda s: s['w0_loss_estimate'])
        
        #step 6.3: the best word(wi) for each filped word (max (w0_gradient * wi_embed))
        final_list = []
        for w0_info in best_w0:
            w0 = w0_info['w0']
            w0_place = w0_info['w0_place']
            w0_tag = words_tag[w0_place] 
            w0_embed = w0_info['w0_embed']
            best_wi = {'wi_id':None,'wi_embed':None,'wi_loss_estimate':0}
            w0_gradient = w0_info['w0_gradient']
            for wi_id in range(vocab_size):
                # get word wi
                wi_index = torch.tensor(wi_id)
                wi = Vocab_list[wi_id]
                
                # POS constraint
                # get the tag of wi
                if wi in word_tag_dict:
                    wi_tag = word_tag_dict[wi]
                else:
                    wi_tag = set()
                    wi_tag.add(get_part_of_speech(wi))
                #check w0_tag and wi_tag, if not same, skip to next word
                if w0_tag not in wi_tag:
                    continue
                # check if the tag changed after flip. if changed, skip
                tag = get_wi_tag(sent_doc,w0_place,wi)
                if tag != w0_tag:
                    continue
                #end of the POS constraint    
                
                wi_embed = Text_CNN.get_word_embedding(wi_index)
                # cos_sim constraint
                if torch.cosine_similarity(w0_embed,wi_embed,dim=0)<0.3:
                    continue
                # end of cos_sim constraint
                # up to now the wi is qualified, then we can record the best wi
                wi_loss_estimate = torch.dot(w0_gradient,wi_embed)
                if wi_loss_estimate > best_wi['wi_loss_estimate']:
                    best_wi['wi_id'] = wi_id
                    best_wi['wi_embed'] = wi_embed
                    best_wi['wi_loss_estimate'] = wi_loss_estimate
            #now we get best wi for w0
            
            if best_wi['wi_embed']==None:
                continue
            wi_embed = best_wi['wi_embed']
            embed_diff = wi_embed - w0_embed
            loss_estimate = torch.dot(w0_gradient,embed_diff)
            final_list.append({'w0':w0_info['w0'],'w0_place':w0_info['w0_place'],'wi_id':best_wi['wi_id'],
                               'wi':Vocab_list[best_wi['wi_id']],'loss_estimate':loss_estimate})
            
        # then step 6.4: get best w0 and wi
        final_filp = heapq.nlargest(1,final_list,key=lambda s:s['loss_estimate'])[0]
        print(final_filp)
        w0_place = final_filp['w0_place']
        wi_id = final_filp['wi_id']
        wi = final_filp['wi']
        # step 6.5: replace w0 with wi
        x[0][0][w0_place] = wi_id
        word_doc[w0_place] = wi
        # predict the class of new doc
        print('predict after hotflip:',Text_CNN.predict(x))
        # comput loss after this flip
        y_hat = Text_CNN.single_instance_forward(x)
        loss = loss_func(y_hat,y_tensor)
        print('loss after flip:',loss)
        
        
   
      

loss before flip: tensor(0.0004, grad_fn=<NllLossBackward>)
class before flip: NEG
docid: negR_800.txt
flip time: 1
{'w0': 'knockoffs', 'w0_place': 38, 'wi_id': 1543, 'wi': 'drugs', 'loss_estimate': tensor(0.0009, grad_fn=<DotBackward>)}
predict after hotflip: NEG
loss after flip: tensor(0.0040, grad_fn=<NllLossBackward>)
flip time: 2
{'w0': 'plot', 'w0_place': 366, 'wi_id': 877, 'wi': 'land', 'loss_estimate': tensor(0.0150, grad_fn=<DotBackward>)}
predict after hotflip: NEG
loss after flip: tensor(0.0119, grad_fn=<NllLossBackward>)
flip time: 3
{'w0': 'worse', 'w0_place': 529, 'wi_id': 39, 'wi': 'more', 'loss_estimate': tensor(0.0465, grad_fn=<DotBackward>)}
predict after hotflip: NEG
loss after flip: tensor(0.0268, grad_fn=<NllLossBackward>)
flip time: 4
{'w0': 'attempts', 'w0_place': 474, 'wi_id': 926, 'wi': 'calls', 'loss_estimate': tensor(0.0656, grad_fn=<DotBackward>)}
predict after hotflip: NEG
loss after flip: tensor(0.0923, grad_fn=<NllLossBackward>)
flip time: 5
{'w0': 'attem

{'w0': 'in', 'w0_place': 174, 'wi_id': 897, 'wi': 'throughout', 'loss_estimate': tensor(0.2400, grad_fn=<DotBackward>)}
predict after hotflip: POS
loss after flip: tensor(10.6522, grad_fn=<NllLossBackward>)
loss before flip: tensor(3.9696e-05, grad_fn=<NllLossBackward>)
class before flip: NEG
docid: negR_807.txt
flip time: 1
{'w0': 'clumsy', 'w0_place': 864, 'wi_id': 286, 'wi': 'little', 'loss_estimate': tensor(6.5800e-06, grad_fn=<DotBackward>)}
predict after hotflip: NEG
loss after flip: tensor(4.1365e-05, grad_fn=<NllLossBackward>)
flip time: 2
{'w0': 'bad', 'w0_place': 1414, 'wi_id': 235, 'wi': 'lot', 'loss_estimate': tensor(8.4178e-06, grad_fn=<DotBackward>)}
predict after hotflip: NEG
loss after flip: tensor(4.2199e-05, grad_fn=<NllLossBackward>)
flip time: 3
{'w0': 'mess', 'w0_place': 46, 'wi_id': 493, 'wi': 'thing', 'loss_estimate': tensor(8.1044e-06, grad_fn=<DotBackward>)}
predict after hotflip: NEG
loss after flip: tensor(4.7802e-05, grad_fn=<NllLossBackward>)
flip time: 4
{

{'w0': 'bad', 'w0_place': 564, 'wi_id': 267, 'wi': 'great', 'loss_estimate': tensor(0.0632, grad_fn=<DotBackward>)}
predict after hotflip: NEG
loss after flip: tensor(0.0372, grad_fn=<NllLossBackward>)
flip time: 5
{'w0': 'dull', 'w0_place': 155, 'wi_id': 1535, 'wi': 'nice', 'loss_estimate': tensor(0.1625, grad_fn=<DotBackward>)}
predict after hotflip: NEG
loss after flip: tensor(0.0432, grad_fn=<NllLossBackward>)
loss before flip: tensor(0.0132, grad_fn=<NllLossBackward>)
class before flip: NEG
docid: negR_814.txt
flip time: 1
{'w0': 'badly', 'w0_place': 183, 'wi_id': 112, 'wi': 'well', 'loss_estimate': tensor(0.0902, grad_fn=<DotBackward>)}
predict after hotflip: NEG
loss after flip: tensor(0.3543, grad_fn=<NllLossBackward>)
flip time: 2
{'w0': 'failed', 'w0_place': 279, 'wi_id': 583, 'wi': 'wanted', 'loss_estimate': tensor(3.1411, grad_fn=<DotBackward>)}
predict after hotflip: POS
loss after flip: tensor(1.6754, grad_fn=<NllLossBackward>)
flip time: 3
{'w0': 'plot', 'w0_place': 173,

In [29]:
print(w0_embed.shape)
print(wi_embed.shape)
print(torch.cosine_similarity(w0_embed,wi_embed,dim=0))

torch.Size([300])
torch.Size([300])
tensor(0.0028, grad_fn=<DivBackward0>)


In [None]:
'''
finished
'''  