In [None]:
from google.colab import drive
drive.mount('/content/gdrive')

Mounted at /content/gdrive


In [None]:
!pip install datasets
import gensim
import pandas as pd
import argparse
import numpy as np
from collections import Counter
from datasets import load_dataset
import os
import torch
import pickle
import re
import time
import copy
import math
from torch.utils.data import DataLoader, Dataset
import torch.optim as optimizer 
import torch.nn.functional as F
from torch import nn
from sklearn.metrics import accuracy_score

import seaborn as sns
import matplotlib as mpl
import matplotlib.pyplot as plt
plt.rcParams['font.family'] = "sans-serif"
plt.rcParams['font.sans-serif'] = ['Times New Roman']
sns.set_style("whitegrid")
sns.set_style({'font.family':'serif', 'font.serif':'Times New Roman'})
sns.set(font_scale=1.2)


Collecting datasets
  Downloading datasets-1.16.1-py3-none-any.whl (298 kB)
[K     |████████████████████████████████| 298 kB 5.2 MB/s 
[?25hCollecting xxhash
  Downloading xxhash-2.0.2-cp37-cp37m-manylinux2010_x86_64.whl (243 kB)
[K     |████████████████████████████████| 243 kB 42.1 MB/s 
Collecting aiohttp
  Downloading aiohttp-3.8.1-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl (1.1 MB)
[K     |████████████████████████████████| 1.1 MB 38.6 MB/s 
[?25hCollecting huggingface-hub<1.0.0,>=0.1.0
  Downloading huggingface_hub-0.2.1-py3-none-any.whl (61 kB)
[K     |████████████████████████████████| 61 kB 432 kB/s 
Collecting fsspec[http]>=2021.05.0
  Downloading fsspec-2021.11.1-py3-none-any.whl (132 kB)
[K     |████████████████████████████████| 132 kB 46.6 MB/s 
Collecting async-timeout<5.0,>=4.0.0a3
  Downloading async_timeout-4.0.1-py3-none-any.whl (5.7 kB)
Collecting yarl<2.0,>=1.0
  Downloading yarl-1.7.2-cp37-cp37m-manylinux_2_

In [None]:
data_dir = '/content/gdrive/MyDrive/530proj_me'
def clean_text(w):
    return re.sub(
            r"([.,'!?\"()*#:;])",
            '',
            w.lower()
            ).replace('-', ' ').replace('/', ' ')
def preprocessing(train=False, eval=False, test=False):
  # label col:  "pants-fire" : 0, "false" : 1, "barely-true" : 2, "half-true" : 3, "mostly-true" : 4, "true" : 5
  if train:
    current_dataset = load_dataset("liar", split="train")
  if eval:
    current_dataset = load_dataset('liar', split='validation')
  if test:
    current_dataset = load_dataset('liar', split='test')
  return current_dataset

def get_word2vec_embedding(statements, data_dir):
  token_file = os.path.join(data_dir,'token_to_ix.pkl')
  w2v_file = os.path.join(data_dir,'train_w2v.npy')

  if os.path.exists(w2v_file) and os.path.exists(token_file):
        print("Loading train language files")
        return pickle.load(open(token_file, "rb")), np.load(w2v_file)

  token2ix = {'PAD': 0, 'UNK': 1}
  for s in statements:
    s = clean_text(s).split()
    for word in s:
      if word not in token2ix:
        token2ix[word] = len(token2ix)
  ix2token = {token2ix[k]: k for k in token2ix.keys()}
  w2v_path = '/content/gdrive/MyDrive/530project/GoogleNews-vectors-negative300.bin.gz'
  w2vmodel = gensim.models.KeyedVectors.load_word2vec_format(w2v_path, binary=True)
  pretrained_emb = torch.randn([len(token2ix),300])
  for i in range(len(token2ix)):
    word = ix2token[i]
    if word in w2vmodel:
      vec = w2vmodel[word]
      pretrained_emb[i, :] = torch.from_numpy(vec)
  np.save(w2v_file, pretrained_emb)
  pickle.dump(token2ix, open(token_file, "wb"))
  return token2ix, pretrained_emb

def embed_text(x, max_len, token2ix):
  ques_ix = np.zeros(max_len, np.int64)
  x = clean_text(x).split()
  for ix, word in enumerate(x):
    if word in token2ix:
      ques_ix[ix] = token2ix[word]
    else:
      ques_ix[ix] = 1
    if ix + 1 == max_len:
      break
  return ques_ix
def category_from_output(output):
  res = []
  for i in output:
    top_n, top_i = i.topk(1)
    category_i = top_i[0].item()
    res.append(category_i)
  return res

In [None]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
EMBEDDING_DIM = 300
BATCH_SIZE = 64
LEARNING_RATE = 0.005
EPOCH = 11

train_dataset = pd.DataFrame(preprocessing(train=True))
statements = train_dataset['statement']
token2ix, pretrained_emb = get_word2vec_embedding(statements, data_dir)
print(pretrained_emb.shape) # (len(vocab), embedding_dim)
lengths = [len(x.split()) for x in statements]
max_len = int(np.percentile(lengths,90))

train_dataset['embedded'] = train_dataset['statement'].apply(lambda x: embed_text(x, max_len, token2ix))
dev_dataset = pd.DataFrame(preprocessing(eval=True))
dev_dataset['embedded'] = dev_dataset['statement'].apply(lambda x: embed_text(x, max_len, token2ix))
test_dataset = pd.DataFrame(preprocessing(test=True))
test_dataset['embedded'] = test_dataset['statement'].apply(lambda x: embed_text(x, max_len, token2ix))

Downloading:   0%|          | 0.00/2.33k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/1.68k [00:00<?, ?B/s]

Using custom data configuration default


Downloading and preparing dataset liar/default (download: 989.82 KiB, generated: 3.26 MiB, post-processed: Unknown size, total: 4.22 MiB) to /root/.cache/huggingface/datasets/liar/default/1.0.0/479463e757b7991eed50ffa7504d7788d6218631a484442e2098dabbf3b44514...


Downloading:   0%|          | 0.00/1.01M [00:00<?, ?B/s]

0 examples [00:00, ? examples/s]

0 examples [00:00, ? examples/s]

0 examples [00:00, ? examples/s]

Dataset liar downloaded and prepared to /root/.cache/huggingface/datasets/liar/default/1.0.0/479463e757b7991eed50ffa7504d7788d6218631a484442e2098dabbf3b44514. Subsequent calls will reuse this data.
Loading train language files
(12969, 300)


Using custom data configuration default
Reusing dataset liar (/root/.cache/huggingface/datasets/liar/default/1.0.0/479463e757b7991eed50ffa7504d7788d6218631a484442e2098dabbf3b44514)
Using custom data configuration default
Reusing dataset liar (/root/.cache/huggingface/datasets/liar/default/1.0.0/479463e757b7991eed50ffa7504d7788d6218631a484442e2098dabbf3b44514)


In [None]:
def process_col(current_dataset, train = False, col_cnts = None, col = 'speaker'):
  if train:
    col_cnt = Counter(current_dataset[col])
    col_cnt = sorted(col_cnt.items(), key = lambda kv: kv[1], reverse=True)
    # elif col == 'speaker':
    col_cnt = {j[0]:idx for idx, j in enumerate([i for i in col_cnt if i[1]>60])}
  else:
    col_cnt = col_cnts[col]
  
  def col2ix(x):
    if x in col_cnt:
      return col_cnt[x]
    return len(col_cnt.keys())
  current_dataset[col + '_'] = current_dataset[col].apply(lambda x: col2ix(x))
  dummies = pd.get_dummies(current_dataset[col+'_'], prefix=col)
  names = list(dummies.columns)
  current_dataset = pd.concat((current_dataset,dummies),axis = 1)
  return current_dataset, names, col_cnt

def process_metadata(current_dataset, meta_cols, train = False, col_cnts = None):
  dummy_name = []
  if train: col_cnts = {}
  for col in meta_cols:
    current_dataset, names, col_cnt = process_col(current_dataset, train = train, col_cnts = col_cnts, col = col)
    dummy_name += names
    if train: col_cnts[col] = col_cnt
  return current_dataset, dummy_name, col_cnts
  
meta_cols = ['subject','speaker','job_title','state_info','party_affiliation','context']
train_dataset_meta, dummy_name, col_cnts = process_metadata(train_dataset, meta_cols, train = True, col_cnts = None)
dev_dataset_meta, _, _ = process_metadata(dev_dataset,meta_cols, train = False, col_cnts = col_cnts)
test_dataset_meta, _, _ = process_metadata(test_dataset,meta_cols, train = False, col_cnts = col_cnts)

In [None]:
dummy_name
len(dummy_name)

109

In [None]:
class liar_dataset(Dataset):
  def __init__(self, dst, dummy_name):
    self.embedded = np.array(dst['embedded'])
    self.label = np.array(dst['label'])
    self.meta = np.array(dst[dummy_name])
  def __getitem__(self, index):
    return self.embedded[index],\
          self.label[index],\
          self.meta[index]
  def __len__(self):
    return len(self.label)
BATCH_SIZE = 64
train_dst = liar_dataset(train_dataset_meta, dummy_name)
train_data_iter = DataLoader(train_dst, batch_size=BATCH_SIZE, shuffle=True)
dev_dst = liar_dataset(dev_dataset_meta, dummy_name)
dev_data_iter = DataLoader(dev_dst, batch_size=BATCH_SIZE, shuffle=True)
test_dst = liar_dataset(test_dataset_meta, dummy_name)
test_data_iter = DataLoader(test_dst, batch_size=BATCH_SIZE, shuffle=True)

In [None]:
def train(epoch,train_data_iter,dev_data_iter,opt,criteon, net, device):
  def timeSince(since):
      now = time.time()
      s = now - since
      m = math.floor(s / 60)
      s -= m * 60
      return '%dm %ds' % (m, s)
  train_losses, dev_losses, dev_acc_list = [], [], []
  best_model, best_val_acc = None, float('-inf')
  cnt_step = 0
  current_loss = 0
  plot_every = 2
  dev_every = 2
  print('train len:',len(train_data_iter),'dev len:',len(dev_data_iter))
  print('learning_rate',LEARNING_RATE,'n_iters',epoch, 'batch size', BATCH_SIZE, 'optim','Adam', 'lr_scheduler',None, 'device',device)
  start = time.time()
  for e in range(epoch): 
    print('Epoch', e)
    net.train()
    for batch_idx, (text, label, meta) in enumerate(train_data_iter):
      # if text.shape[0]!=BATCH_SIZE:
        # continue
      text, label, meta = text.to(device), label.to(device), meta.to(device)
      output = net(text,meta)
      loss = criteon(output,label)
      current_loss += loss
      cnt_step += 1
      opt.zero_grad()
      loss.backward()
      opt.step()
    if e==0:
      print(time.time()-start)
    if e % plot_every == 0:
      tmp_loss = current_loss.item() / cnt_step
      train_losses.append(tmp_loss)
      current_loss, cnt_step = 0, 0
      print('%d %d%% (%s) %.4f ' % (e, e / EPOCH * 100, timeSince(start), tmp_loss))
    if e % dev_every ==0:
      net.eval()
      eval_loss = 0
      y_pred, y_true = [], []
      cnt_eval_step = 0
      for batch_idx, (text, label, meta) in enumerate(dev_data_iter):
        # if text.shape[0]!=BATCH_SIZE:
          # continue
        text, label, meta = text.to(device), label.to(device), meta.to(device)
        output = net(text, meta)
        categories = category_from_output(output)
        loss = criteon(output,label)
        eval_loss += loss
        cnt_eval_step += 1

        y_pred += categories
        y_true += label.tolist()
      # print(cnt_eval_step, eval_loss, len(dev_data_iter))
      dev_losses.append(eval_loss.item()/cnt_eval_step)
      acc = accuracy_score(y_pred,y_true)
      dev_acc_list.append(acc)
      if acc>best_val_acc:
        best_val_acc = acc
        best_model = copy.deepcopy(net)
      print('%d %d%% (%s) %.4f %s %s %.4f' % (e, e / EPOCH * 100, timeSince(start), eval_loss.item()/cnt_eval_step, categories[:4], label.tolist()[:4], acc))
  print('best_val_acc',best_val_acc)
  return train_losses, dev_losses, dev_acc_list, best_model # best_model

In [None]:
from sklearn.metrics import precision_score, recall_score, f1_score, accuracy_score
def evaluate_p_r_f1_acc(y_pred, y_true):
  precision = precision_score(y_pred, y_true)
  recall = recall_score(y_pred, y_true)
  fscore = f1_score(y_pred, y_true)
  acc = accuracy_score(y_pred, y_true)
  return precision, recall, fscore, acc
def evaluate_model(model, data_iter):
  # model.eval()
  y_pred, y_true = [], []
  for batch_idx, (text, label, meta) in enumerate(data_iter):
        # if text.shape[0]!=BATCH_SIZE:
          # continue
        text, label, meta = text.to(device), label.to(device), meta.to(device)
        output = model(text, meta)
        categories = category_from_output(output)
        loss = criteon(output,label)

        y_pred += categories
        y_true += label.tolist()
        
  acc = accuracy_score(y_pred,y_true)
  print('acc: ', acc)
  # p,r,fscore, acc = evaluate_p_r_f1_acc(y_pred, y_true)
  # print('Precision: ',p, '\tRecall: ',r,'\tF-score: ',fscore,'\tacc: ', acc)
  

## CNN

In [None]:

class hybridCNN2(nn.Module):
    def __init__(self, token_size, pretrained_emb):
        super(hybridCNN2, self).__init__()
        num_class = 6
        dropout_rate = 0.5
        self.ksizes = [5,5,5]
        print(dropout_rate,self.ksizes)
        self.embedding = nn.Embedding(
            num_embeddings=token_size,
            embedding_dim=300
        )
        self.embedding.weight.data.copy_(torch.from_numpy(pretrained_emb))
        self.conv_unit = nn.Sequential(
            nn.Conv1d(in_channels=300, out_channels=128, kernel_size=self.ksizes[0]),
            nn.Dropout(dropout_rate), nn.ReLU(),
            nn.Conv1d(in_channels=128, out_channels=128, kernel_size=self.ksizes[1]),
            # nn.Dropout(dropout_rate), nn.ReLU(),
            # nn.Conv1d(in_channels=128, out_channels=128, kernel_size=self.ksizes[2]),
            # nn.Dropout(dropout_rate), nn.ReLU(),
        )
        # self.convs = nn.ModuleList([nn.Conv2d(1, 100, (w, 200)) for w in kernel_wins])
        self.dropout = nn.Dropout(dropout_rate)
        self.fc = nn.Linear(128, num_class)

    def forward(self, x, meta):
      # print('x',x.shape,'meta',meta.shape)
      x = self.embedding(x) # [4, len, 300] (4=bsz)
      x = torch.transpose(x,1,2)
      x = self.conv_unit(x) # x1: [4, 128, len_a]
      x = x.squeeze(-1) # x: [4, 128, len_d]
      x = self.dropout(x) # torch.Size([4, 128, len_d])
      x = x[:,:,-1] # [bsz, 128]

      logit = self.fc(x) # [4, 6]
      return logit
      # fc1 = nn.Linear(meta.shape[1],2)
      # meta = fc1(meta.float())
      # output2 = torch.cat((x,meta), dim=1)
      # fc = nn.Linear(output2.shape[1], 6)
      # output3 = fc(output2)
      # return output3
    
# debug
net = hybridCNN2(len(token2ix), pretrained_emb).to(device)
criteon = nn.CrossEntropyLoss().to(device)
for batch_idx, (text, label, meta) in enumerate(train_data_iter):
    text, label, meta = text.to(device), label.to(device), meta.to(device)
    output = net(text,meta)
    loss = criteon(output,label)
    print('1',output.shape)
    break

0.5 [5, 5, 5]
1 torch.Size([64, 6])


In [None]:
LEARNING_RATE = 0.0002
net = hybridCNN2(len(token2ix), pretrained_emb).to(device)
criteon = nn.CrossEntropyLoss().to(device)
opt = optimizer.Adam(net.parameters(), lr=LEARNING_RATE,weight_decay=1e-4)
train_losses, dev_losses, dev_acc_list, best_model = train(11,train_data_iter,dev_data_iter,opt,criteon, net, device)

0.3 [5, 5, 5]
train len: 161 dev len: 21
learning_rate 0.0002 n_iters 11 batch size 64 optim Adam lr_scheduler None device cpu
Epoch 0
19.760735511779785
0 0% (0m 19s) 1.7641 
0 0% (0m 20s) 1.7757 [1, 1, 1, 1] [5, 1, 5, 2] 0.2002
Epoch 1
Epoch 2
2 18% (0m 59s) 1.7569 
2 18% (1m 0s) 1.7706 [1, 1, 1, 0] [4, 4, 2, 5] 0.2298
Epoch 3
Epoch 4
4 36% (1m 39s) 1.7488 
4 36% (1m 39s) 1.7608 [0, 1, 0, 0] [3, 2, 2, 4] 0.2336
Epoch 5
Epoch 6
6 54% (2m 19s) 1.7364 
6 54% (2m 19s) 1.7705 [3, 0, 1, 0] [3, 3, 0, 5] 0.2251
Epoch 7
Epoch 8
8 72% (3m 4s) 1.7075 
8 72% (3m 4s) 1.7767 [0, 0, 0, 0] [4, 3, 0, 2] 0.2188
Epoch 9
Epoch 10
10 90% (3m 58s) 1.6424 
10 90% (3m 59s) 1.8344 [0, 0, 0, 0] [1, 4, 5, 1] 0.2126
best_val_acc 0.2336448598130841


In [None]:
evaluate_model(best_model, dev_data_iter)
evaluate_model(best_model, test_data_iter)

acc:  0.2336448598130841
acc:  0.2112236944660951


In [None]:
# torch.save(best_model.state_dict(), os.path.join(data_dir,'liar_CNN-acc2367-2182.pth'))

In [None]:
X_train, y_train = np.array([np.array(i) for i in train_dataset['embedded']]),np.array(train_dataset['label'])
X_dev, y_dev = np.array([np.array(i) for i in dev_dataset['embedded']]),np.array(dev_dataset['label'])
X_test, y_test = np.array([np.array(i) for i in test_dataset['embedded']]),np.array(test_dataset['label'])
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
models = {'logistic':LogisticRegression(),'svm':SVC(C=1,)}
for m in models:
  print(m)
  model = models[m]
  model.fit(X_train,y_train)
  y_pred = model.predict(X_dev)
  print('dev',accuracy_score(y_pred,y_dev))
  y_pred = model.predict(X_test)
  print('test',accuracy_score(y_pred,y_test))

logistic
dev 0.19626168224299065
test 0.21434138737334374
svm
dev 0.22741433021806853
test 0.1995323460639127


In [None]:
for ker in ['linear','poly','rbf','sigmoid']:
  clf = SVC(kernel = ker)
  clf.fit(X_train, y_train)
  y_pred = clf.predict(X_dev)
  acc = accuracy_score(y_pred, y_dev)
  print('dev',accuracy_score(y_pred,y_dev))
  y_pred = model.predict(X_test)
  print('test',accuracy_score(y_pred,y_test))

In [None]:
gamma_space = [0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9]
# c_space = [i/10 for i in range(5,15,2)]
for i,g in enumerate(gamma_space):
  c = c_space[4-i]
    # for c in c_space:
  print('g',g,'\tc',c)
  clf = SVC(gamma = g)
  clf.fit(X_train, y_train)
  y_pred = clf.predict(X_dev)
  acc = accuracy_score(y_pred, y_dev)
  print('dev',accuracy_score(y_pred,y_dev))
  y_pred = model.predict(X_test)
  print('test',accuracy_score(y_pred,y_test))

g 0.1 	c 1.3
dev 0.1985981308411215
test 0.1995323460639127
g 0.2 	c 1.1
dev 0.1985981308411215
test 0.1995323460639127
g 0.3 	c 0.9
dev 0.1985981308411215
test 0.1995323460639127
g 0.4 	c 0.7
dev 0.1985981308411215
test 0.1995323460639127
g 0.5 	c 0.5
dev 0.19314641744548286
test 0.1995323460639127


In [None]:
# bad strong baseline
class hybridCNN1(nn.Module):
    def __init__(self, token_size, pretrained_emb, hidden_dim=32, n_layers=2):
        super(hybridCNN1, self).__init__()
        num_class = 6
        dropout_rate = 0.5
        self.ksizes = [3,4,5]
        self.embedding = nn.Embedding(
            num_embeddings=token_size,
            embedding_dim=300
        )
        self.embedding.weight.data.copy_(torch.from_numpy(pretrained_emb))
        self.conv_unit = nn.Sequential(
            nn.Conv1d(in_channels=300, out_channels=128, kernel_size=self.ksizes[0]),
            nn.Dropout(0.8), nn.Relu(),
            nn.Conv1d(in_channels=128, out_channels=128, kernel_size=self.ksizes[1]),
            nn.Dropout(0.8), nn.Relu(),
            nn.Conv1d(in_channels=128, out_channels=128, kernel_size=self.ksizes[2]),
            nn.Dropout(0.8), nn.Relu(),
        )

      
        self.conv_unit1 = nn.Sequential(
            torch.nn.Conv1d(in_channels=300, out_channels=128, kernel_size=self.ksizes[0]),
            torch.nn.MaxPool1d(kernel_size=self.ksizes[0]),
            # torch.nn.AdaptiveMaxPool1d(output_size),
        )
        self.conv_unit2 = nn.Sequential(
            torch.nn.Conv1d(in_channels=300, out_channels=128, kernel_size=self.ksizes[1]),
            torch.nn.MaxPool1d(kernel_size=self.ksizes[1]),
        )
        self.conv_unit3 = nn.Sequential(
            torch.nn.Conv1d(in_channels=300, out_channels=128, kernel_size=self.ksizes[2]),
            torch.nn.MaxPool1d(kernel_size=self.ksizes[2]),
        )
        # self.convs = nn.ModuleList([nn.Conv2d(1, 100, (w, 200)) for w in kernel_wins])
        self.dropout = nn.Dropout(dropout_rate)
        self.fc = nn.Linear(128, num_class)

        self.hidden_dim = hidden_dim
        self.n_layers = n_layers
        self.rnn = nn.LSTM(300, hidden_dim, num_layers=n_layers, bidirectional=True, dropout=0.5)
        self.dropout = nn.Dropout(0.8)

        self.meta_lstm = nn.LSTM(input_size=300, hidden_size = 5, num_layers = 2, 
                                 batch_first = True, bidirectional = True)
        self.meta_lstm1 = nn.LSTM(input_size=35, hidden_size = 16, num_layers = 2, 
                                 batch_first = True, bidirectional = True)

    def forward(self, x, meta):
      # print('x',x.shape,'meta',meta.shape)
      x = self.embedding(x) # [4, len, 300] (4=bsz)
      x = torch.transpose(x,1,2)
      x1 = self.conv_unit1(x) # x1: [4, 128, len_a]
      x2 = self.conv_unit2(x) # x2: [4, 128, len_b]
      x3 = self.conv_unit3(x) # x3: [4, 128, len_c]
      x = torch.cat((x1,x2,x3), dim=2) # x: [4, 128, len_d]
      x = x.squeeze(-1) # x: [4, 128, len_d]
      x = self.dropout(x) # torch.Size([4, 128, len_d])
      x = x[:,:,-1] # [bsz, 128]

      logit = self.fc(x) # [4, 6]
      return logit
      # fc1 = nn.Linear(meta.shape[1],2)
      # meta = fc1(meta.float())
      # output2 = torch.cat((x,meta), dim=1)
      # fc = nn.Linear(output2.shape[1], 6)
      # output3 = fc(output2)
      # return output3
    
# debug
net = hybridCNN1(len(token2ix), pretrained_emb).to(device)
criteon = nn.CrossEntropyLoss().to(device)
for batch_idx, (text, label, meta) in enumerate(train_data_iter):
    text, label, meta = text.to(device), label.to(device), meta.to(device)
    output = net(text,meta)
    loss = criteon(output,label)
    print('1',output.shape)
    break

1 torch.Size([64, 6])


In [None]:
LEARNING_RATE = 0.0005

In [None]:
net = hybridCNN1(len(token2ix), pretrained_emb).to(device)
criteon = nn.CrossEntropyLoss().to(device)
opt = optimizer.Adam(net.parameters(), lr=LEARNING_RATE)
train_losses, dev_losses, dev_acc_list, best_model = train(11,train_data_iter,dev_data_iter,opt,criteon, net, device)

train len: 161 dev len: 21
learning_rate 0.0005 n_iters 11 batch size 64 optim Adam lr_scheduler None device cpu
Epoch 0
29.711714267730713
0 0% (0m 29s) 1.8109 
0 0% (0m 30s) 1.7576 [1, 2, 1, 2] [1, 3, 3, 1] 0.2173
Epoch 1
Epoch 2
2 18% (1m 28s) 1.7730 
2 18% (1m 29s) 1.7641 [3, 3, 3, 0] [0, 2, 4, 4] 0.2274
Epoch 3
Epoch 4
4 36% (2m 30s) 1.7435 
4 36% (2m 31s) 1.7594 [0, 1, 0, 1] [2, 2, 2, 0] 0.2290
Epoch 5
Epoch 6
6 54% (3m 28s) 1.7203 
6 54% (3m 29s) 1.7578 [1, 1, 3, 1] [2, 2, 4, 4] 0.2220
Epoch 7
Epoch 8
8 72% (4m 26s) 1.6885 
8 72% (4m 27s) 1.7577 [0, 2, 0, 0] [4, 1, 2, 2] 0.2336
Epoch 9
Epoch 10
10 90% (5m 24s) 1.6257 
10 90% (5m 25s) 1.7691 [1, 0, 2, 2] [5, 0, 2, 2] 0.2235
best_val_acc 0.2336448598130841


In [None]:
evaluate_model(best_model, dev_data_iter)
evaluate_model(best_model, test_data_iter)

acc:  0.2336448598130841
acc:  0.20420888542478566


In [None]:
torch.save(best_model.state_dict(), os.path.join(data_dir,'liar_CNN-acc2328-2135.pth'))

In [None]:
best_model = hybridCNN1(len(token2ix), pretrained_emb)
criteon = nn.CrossEntropyLoss().to(device)
opt = optimizer.Adam(net.parameters(), lr=LEARNING_RATE)
model_path = os.path.join(data_dir, 'liar_CNN-acc2336.pth')
best_model.load_state_dict(torch.load(model_path))
evaluate_model(best_model, dev_data_iter)
evaluate_model(best_model, test_data_iter)

acc:  0.21728971962616822


## Previous

In [None]:
# trying
class hybrid_BiLSTM_Attention(nn.Module):
    def __init__(self, token_size, pretrained_emb, hidden_dim=128, n_layers=2):
        super(hybrid_BiLSTM_Attention, self).__init__()
        self.embedding = nn.Embedding(
            num_embeddings=token_size,
            embedding_dim=300
        )
        print('hidden_dim',hidden_dim)
        self.embedding.weight.data.copy_(torch.from_numpy(pretrained_emb).type(torch.float))

        self.hidden_dim = hidden_dim
        self.n_layers = n_layers
        self.rnn = nn.LSTM(300, hidden_dim, num_layers=n_layers, bidirectional=True, dropout=0.5)
        self.dropout = nn.Dropout(0.5)

        self.w_omega = nn.Parameter(torch.Tensor(hidden_dim * 2, hidden_dim * 2))
        self.u_omega = nn.Parameter(torch.Tensor(hidden_dim * 2, 1))

        nn.init.uniform_(self.w_omega, -0.1, 0.1)
        nn.init.uniform_(self.u_omega, -0.1, 0.1)
        self.fc = nn.Linear(hidden_dim * 2, 6)
        

    def attention_net(self, x):       #x:[batch, seq_len, hidden_dim*2]
        u = torch.tanh(torch.matmul(x, self.w_omega))         #[batch, seq_len, hidden_dim*2]
        att = torch.matmul(u, self.u_omega)                   #[batch, seq_len, 1]
        att_score = F.softmax(att, dim=1)
        scored_x = x * att_score                              #[batch, seq_len, hidden_dim*2]
        context = torch.sum(scored_x, dim=1)                  #[batch, hidden_dim*2]
        # print('context',context.shape)
        return context

    def forward(self, x, meta):
        # print(x.shape,meta.shape)
        embedding = self.dropout(self.embedding(x))       #[seq_len, batch, embedding_dim]
        embedding = torch.transpose(embedding,0,1)
        # output: [seq_len, batch, hidden_dim*2]     hidden/cell: [n_layers*2, batch, hidden_dim]
        output, (final_hidden_state, final_cell_state) = self.rnn(embedding)
        output = output.permute(1, 0, 2)                  #[batch, seq_len, hidden_dim*2]
        attn_output = self.attention_net(output)
        # print(attn_output.shape)
        # logit = self.fc(attn_output)
        # return logit
        # print(attn_output)

        fc1 = nn.Linear(meta.shape[1],128)
        meta = fc1(meta.float())

        # meta_emb = 
        # meta_lstm = nn.LSTM(meta.shape[1], hidden_size=64, num_layers=1, dropout=0.5)
        # meta = meta_lstm(meta.float())
        # print(meta.shape)
        # print(meta)

        output2 = torch.cat((attn_output,meta), dim=1)
        fc = nn.Linear(output2.shape[1], 6)
        output3 = fc(output2)
        return output3
# bilstm_hidden = 32
bilstm = hybrid_BiLSTM_Attention(len(token2ix), pretrained_emb).to(device)
criteon = nn.CrossEntropyLoss().to(device)
for batch_idx, (text, label, meta) in enumerate(train_data_iter):
    text, label, meta = text.to(device), label.to(device), meta.to(device)
    # cnnnet = BiLSTM_Attention(len(token2ix), pretrained_emb).to(device)
    # print(meta.shape)
    output = bilstm(meta.long(),meta)
    # output1 = cnnnet(text, meta)
    # output2 = torch.cat((output,output1), dim=1)
    # print('0',output2.shape)
    # fc = nn.Linear(output2.shape[1], 6)
    # output3 = fc(output2)
    # print('1',output3.shape)
    loss = criteon(output,label)
    break

hidden_dim 128


In [None]:
LEARNING_RATE = 0.001

In [None]:
net = hybrid_BiLSTM_Attention(len(token2ix), pretrained_emb).to(device)
criteon = nn.CrossEntropyLoss().to(device)
opt = optimizer.Adam(net.parameters(), lr=LEARNING_RATE)
train_losses, dev_losses, dev_acc_list, best_model = train(1,train_data_iter,dev_data_iter,opt,criteon, net, device)

hidden_dim 128
train len: 161 dev len: 21
learning_rate 0.001 n_iters 1 batch size 64 optim Adam lr_scheduler None device cpu
Epoch 0
84.79648303985596
0 0% (1m 24s) 3.4699 
0 0% (1m 28s) 3.4764 [20, 20, 20, 20] [4, 0, 4, 5] 0.0413
best_val_acc 0.04127725856697819


## BiLSTM

In [None]:
class BiLSTM_Attention(nn.Module):
    def __init__(self, token_size, pretrained_emb, hidden_dim=64, n_layers=2):
        super(BiLSTM_Attention, self).__init__()
        self.embedding = nn.Embedding(
            num_embeddings=token_size,
            embedding_dim=300
        )
        print('hidden_dim',hidden_dim)
        self.embedding.weight.data.copy_(torch.from_numpy(pretrained_emb).type(torch.float))

        self.hidden_dim = hidden_dim
        self.n_layers = n_layers
        self.rnn = nn.LSTM(300, hidden_dim, num_layers=n_layers, bidirectional=True, dropout=0.5)
        self.dropout = nn.Dropout(0.5)

        self.w_omega = nn.Parameter(torch.Tensor(hidden_dim * 2, hidden_dim * 2))
        self.u_omega = nn.Parameter(torch.Tensor(hidden_dim * 2, 1))

        nn.init.uniform_(self.w_omega, -0.1, 0.1)
        nn.init.uniform_(self.u_omega, -0.1, 0.1)
        self.fc = nn.Linear(hidden_dim * 2, 6)
        

    def attention_net(self, x):       #x:[batch, seq_len, hidden_dim*2]
        u = torch.tanh(torch.matmul(x, self.w_omega))         #[batch, seq_len, hidden_dim*2]
        att = torch.matmul(u, self.u_omega)                   #[batch, seq_len, 1]
        att_score = F.softmax(att, dim=1)
        scored_x = x * att_score                              #[batch, seq_len, hidden_dim*2]
        context = torch.sum(scored_x, dim=1)                  #[batch, hidden_dim*2]
        # print('context',context.shape)
        return context

    def forward(self, x, meta):
        # print(x.shape,meta.shape)
        embedding = self.dropout(self.embedding(x))       #[seq_len, batch, embedding_dim]
        embedding = torch.transpose(embedding,0,1)
        # output: [seq_len, batch, hidden_dim*2]     hidden/cell: [n_layers*2, batch, hidden_dim]
        output, (final_hidden_state, final_cell_state) = self.rnn(embedding)
        output = output.permute(1, 0, 2)                  #[batch, seq_len, hidden_dim*2]
        attn_output = self.attention_net(output)
        
        logit = self.fc(attn_output)
        return logit

        fc1 = nn.Linear(meta.shape[1],16)
        meta = fc1(meta.float())
        # print(meta.shape)

        output2 = torch.cat((attn_output,meta), dim=1)
        fc = nn.Linear(output2.shape[1], 6)
        output3 = fc(output2)
        return output3
# bilstm_hidden = 32
bilstm = BiLSTM_Attention(len(token2ix), pretrained_emb).to(device)
criteon = nn.CrossEntropyLoss().to(device)
for batch_idx, (text, label, meta) in enumerate(train_data_iter):
    text, label, meta = text.to(device), label.to(device), meta.to(device)
    cnnnet = BiLSTM_Attention(len(token2ix), pretrained_emb).to(device)
    # print(meta.shape)
    output = bilstm(meta.long(),meta)
    # output1 = cnnnet(text, meta)
    # output2 = torch.cat((output,output1), dim=1)
    # print('0',output2.shape)
    # fc = nn.Linear(output2.shape[1], 6)
    # output3 = fc(output2)
    # print('1',output3.shape)
    loss = criteon(output,label)
    break

hidden_dim 64
hidden_dim 64


In [None]:
LEARNING_RATE = 0.001

In [None]:
LEARNING_RATE = 2e-5 # 0.00002

In [None]:
net = BiLSTM_Attention(len(token2ix), pretrained_emb).to(device)
criteon = nn.CrossEntropyLoss().to(device)
opt = optimizer.Adam(net.parameters(), lr=LEARNING_RATE)
train_losses, dev_losses, dev_acc_list, best_model = train(11,train_data_iter,dev_data_iter,opt,criteon, net, device)

hidden_dim 64
train len: 161 dev len: 21
learning_rate 0.001 n_iters 11 batch size 64 optim Adam lr_scheduler None device cpu
Epoch 0
28.021397352218628
0 0% (0m 28s) 1.7459 
0 0% (0m 29s) 1.7246 [1, 1, 1, 0] [3, 4, 5, 4] 0.2469
Epoch 1
Epoch 2
2 18% (1m 20s) 1.6521 
2 18% (1m 21s) 1.7357 [0, 2, 2, 0] [0, 3, 1, 2] 0.2671
Epoch 3
Epoch 4
4 36% (2m 11s) 1.4083 
4 36% (2m 12s) 1.9078 [1, 5, 1, 0] [1, 4, 4, 5] 0.2570
Epoch 5
Epoch 6
6 54% (3m 3s) 1.1335 
6 54% (3m 4s) 2.2464 [3, 0, 2, 3] [4, 1, 2, 5] 0.2414
Epoch 7
Epoch 8
8 72% (3m 57s) 0.9084 
8 72% (3m 58s) 2.5837 [2, 3, 5, 4] [1, 2, 0, 1] 0.2422
Epoch 9
Epoch 10
10 90% (4m 51s) 0.7392 
10 90% (4m 52s) 2.7517 [2, 0, 0, 1] [2, 4, 3, 4] 0.2360
best_val_acc 0.26713395638629284


In [None]:
import warnings
warnings.filterwarnings("ignore")
fig, ((ax1, ax2))= plt.subplots(1,2,figsize = (15,5))
x_axis = [i*2 for i in range(len(dev_losses))]
sns.lineplot(x_axis, dev_acc_list, ax = ax1)
ax1.set_ylabel('Accuracy')
ax1.set_xlabel("Number of Iterations")
sns.lineplot(x_axis, train_losses, ax = ax2, label = 'train loss')
sns.lineplot(x_axis, dev_losses, ax = ax2, label = 'dev loss')
ax2.set_ylabel("Loss")
ax2.set_xlabel("Number of Iterations")
ax2.legend()
plt.tight_layout()
plt.show()

In [None]:
# torch.save(best_model.state_dict(), os.path.join(data_dir,'liar_biLSTM-acc2671.pth'))

In [None]:
evaluate_model(best_model, dev_data_iter)

acc:  0.26713395638629284


In [None]:
evaluate_model(best_model, test_data_iter)

acc:  0.2720187061574435


In [None]:
class hybridCNN(nn.Module):
    def __init__(self, token_size, pretrained_emb, hidden_dim=32, n_layers=2):
        super(hybridCNN, self).__init__()
        num_class = 6
        dropout_rate = 0.5
        ksizes = [3,3,3]
        self.embedding = nn.Embedding(
            num_embeddings=token_size,
            embedding_dim=300
        )
        self.embedding.weight.data.copy_(torch.from_numpy(pretrained_emb))
        self.conv_unit1 = nn.Sequential(
            torch.nn.Conv1d(in_channels=300, out_channels=128, kernel_size=ksizes[0]),
            torch.nn.MaxPool1d(kernel_size=ksizes[0]),
            # torch.nn.AdaptiveMaxPool1d(output_size),
        )
        self.conv_unit2 = nn.Sequential(
            torch.nn.Conv1d(in_channels=300, out_channels=128, kernel_size=ksizes[1]),
            torch.nn.MaxPool1d(kernel_size=ksizes[1]),
        )
        self.conv_unit3 = nn.Sequential(
            torch.nn.Conv1d(in_channels=300, out_channels=128, kernel_size=ksizes[2]),
            torch.nn.MaxPool1d(kernel_size=ksizes[2]),
        )
        # self.convs = nn.ModuleList([nn.Conv2d(1, 100, (w, 200)) for w in kernel_wins])
        self.dropout = nn.Dropout(dropout_rate)
        self.fc = nn.Linear(128, num_class)

        self.hidden_dim = hidden_dim
        self.n_layers = n_layers
        self.rnn = nn.LSTM(300, hidden_dim, num_layers=n_layers, bidirectional=True, dropout=0.5)
        self.dropout = nn.Dropout(0.8)

        self.w_omega = nn.Parameter(torch.Tensor(hidden_dim * 2, hidden_dim * 2))
        self.u_omega = nn.Parameter(torch.Tensor(hidden_dim * 2, 1))

        nn.init.uniform_(self.w_omega, -0.1, 0.1)
        nn.init.uniform_(self.u_omega, -0.1, 0.1)
    def attention_net(self, x):       #x:[batch, seq_len, hidden_dim*2]
      u = torch.tanh(torch.matmul(x, self.w_omega))         #[batch, seq_len, hidden_dim*2]
      att = torch.matmul(u, self.u_omega)                   #[batch, seq_len, 1]
      att_score = F.softmax(att, dim=1)
      scored_x = x * att_score                              #[batch, seq_len, hidden_dim*2]
      context = torch.sum(scored_x, dim=1)                  #[batch, hidden_dim*2]
      # print('context',context.shape)
      return context
      
    def forward(self, x, meta):
      x = self.embedding(x) # [4, len, 300] (4=bsz)
      x = torch.transpose(x,1,2)
      x1 = self.conv_unit1(x) # x1: [4, 128, len_a]
      x2 = self.conv_unit2(x) # x2: [4, 128, len_b]
      x3 = self.conv_unit3(x) # x3: [4, 128, len_c]
      x = torch.cat((x1,x2,x3), dim=2) # x: [4, 128, len_d]
      x = x.squeeze(-1) # x: [4, 128, len_d]
      x = self.dropout(x) # torch.Size([4, 128, len_d])
      x = x[:,:,-1] # [bsz, 128]

      logit = self.fc(x) # [4, 6]
      return logit

      embedding = self.dropout(self.embedding(meta.long()))
      embedding = torch.transpose(embedding,0,1)
      output, (final_hidden_state, final_cell_state) = self.rnn(embedding)
      output = output.permute(1, 0, 2)                  #[batch, seq_len, hidden_dim*2]
      attn_output = self.attention_net(output)
      
      output2 = torch.cat((x,attn_output), dim=1)
      fc = nn.Linear(output2.shape[1], 6)
      output3 = fc(output2)
      return output3


      # self.conv_unit = nn.ModuleList()
      # for kernel_ in self.ksizes:
      #   self.conv_unit.append(nn.Conv2d(x.shape[0], 14, (kernel_, 300)))
      # statement_ = self.embedding(x).unsqueeze(0) # 1*W*D -> 1*1*W*D
      # statement_ = [F.relu(conv(statement_)).squeeze(3) for conv in self.conv_unit] # 1*1*W*1 -> 1*Conv-filters*(W-1) x len(convs)
      # statement_ = [F.max_pool1d(i, i.size(2)).squeeze(2) for i in statement_] # 1*Conv-filters*1 -> 1*Conv-filters x len(convs)
      # statement_ = torch.cat(statement_, 1)  # 1*len(convs)
      # # print(statement_.shape)

      # embedding = self.embedding(meta.long()).squeeze(0) # [bsz, len, emb-dim]
      # # print(embedding.shape)
      # embedding = torch.transpose(embedding,1,2)
      # x1 = self.conv_unit1(embedding)
      # # print(x1.shape)
      # _, (meta, _) = self.meta_lstm1(x1) # (layer x dir) * batch * hidden
      # # print(meta.shape)
      # meta = F.max_pool1d(meta, 16)
      # # print(meta.shape)
      # meta = meta.view(meta.shape[1], -1)

      # output2 = torch.cat((x,meta), dim=1)
    
# debug
net = hybridCNN(len(token2ix), pretrained_emb).to(device)
criteon = nn.CrossEntropyLoss().to(device)
for batch_idx, (text, label, meta) in enumerate(train_data_iter):
    text, label, meta = text.to(device), label.to(device), meta.to(device)
    output = net(text,meta)
    loss = criteon(output,label)
    print('1',output.shape)
    break

1 torch.Size([64, 6])
