In [1]:
import pandas as pd
import torch
from torch import nn
from sklearn.model_selection import train_test_split
from torch.utils.data import Dataset, DataLoader, TensorDataset
import numpy as np
from torch.nn.utils.rnn import pad_sequence

In [2]:
from google.colab import drive
drive.mount('/content/drive')
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
path = '/content/drive/My Drive/Colab Notebooks/HWK4_testing/'

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [3]:
train = pd.read_table(path+'train',header=None,sep=' |\n', names=['index','word','tag'], doublequote = False, keep_default_na=False) 
dev = pd.read_table(path+'dev',header=None,sep=' |\n', names=['index','word','tag'], doublequote = False, keep_default_na=False) 
train['word'] = train['word'].astype(str)
dev['word'] = dev['word'].astype(str)

  return read_csv(**locals())


In [4]:
# ------------------------------Load Data------------------------------
# X
unique_words_list = sorted(list(train['word'].unique()))
num_unique_words = len(unique_words_list)
values = range(0,num_unique_words)
word_index_dict = dict(zip(unique_words_list, values))  

# lower_word_index_dict
lower_unique_words_list = (map(lambda x: x.lower(), word_index_dict))
lower_word_index_dict = dict(zip(lower_unique_words_list, values))  

# add unknown words
word_index_dict['<unk>'] = num_unique_words
word_index_dict['<unk_digit>'] = num_unique_words+1
word_index_dict['<unk_alnum>'] = num_unique_words+2

#word_index_dict


# embedding
embedding = nn.Embedding(num_unique_words+3,100)  #+2 for <unk>, <unk_digit>
torch.save(embedding.weight.data,path+'embedding_weight.pt')  #embedding.weight.data

In [5]:
def add_features(s):
  try:
    lookup_tensor = torch.tensor([word_index_dict[s]], dtype=torch.long)
    word_embed = embedding(lookup_tensor)
    temp = word_embed.tolist()
    feature_list = temp[0]
  except:
    #try:
    #  lookup_tensor = torch.tensor([lower_word_index_dict[s]], dtype=torch.long)
    #except:
    if s[0].isdigit():
      lookup_tensor = torch.tensor([word_index_dict['<unk_digit>']], dtype=torch.long)
    elif s.isalnum():
      lookup_tensor = torch.tensor([word_index_dict['<unk_alnum>']], dtype=torch.long)
    else:
      lookup_tensor = torch.tensor([word_index_dict['<unk>']], dtype=torch.long)
    word_embed = embedding(lookup_tensor)
    temp = word_embed.tolist()
    feature_list = temp[0]
    
    #feature_list = [1]*100
  return feature_list

train['all_features'] = train['word'].apply(add_features) 
dev['all_features'] = dev['word'].apply(add_features) 
train.head(3)

Unnamed: 0,index,word,tag,all_features
0,1,EU,B-ORG,"[-1.6467961072921753, -0.2072284072637558, 1.9..."
1,2,rejects,O,"[0.6387030482292175, 1.2546366453170776, 0.266..."
2,3,German,B-MISC,"[0.5691825151443481, 1.7315154075622559, 0.789..."


In [6]:
def getXY(index_l,feature_l):
  all_sentences = []
  current_sentence = []
  for i in range(0,len(index_l)):
    current_index = index_l[i]
    current_feature = feature_l[i]
    if current_index==1:
      all_sentences.append(torch.tensor(current_sentence)) # torch.tensor(  np.array(
      current_sentence = [current_feature]
    else:
      current_sentence.append(current_feature)
  all_sentences.append(torch.tensor(current_sentence)) # torch.tensor(  np.array(
  all_sentences = all_sentences[1:]
  return all_sentences

In [7]:
# X
train_index_list = train["index"].tolist()
train_feature_list = train["all_features"].tolist()
dev_index_list = dev["index"].tolist()
dev_feature_list = dev["all_features"].tolist()
X_train = getXY(train_index_list,train_feature_list)
X_dev = getXY(dev_index_list,dev_feature_list)

In [8]:
# Y
Y_train_df = pd.get_dummies(train.tag, prefix='y')
Y_dev_df = pd.get_dummies(dev.tag, prefix='y')
#Y_train_df['y_paddding'] = 0
#Y_dev_df['y_paddding'] = 0
Y_train_df['target']= Y_train_df.values.tolist()
Y_dev_df['target']= Y_dev_df.values.tolist()
train_target_list = Y_train_df["target"].tolist()
dev_target_list = Y_dev_df["target"].tolist()
#train_target_list

In [9]:
Y_train = getXY(train_index_list,train_target_list)
Y_dev = getXY(dev_index_list,dev_target_list)

In [10]:
train_dataset = list(zip(X_train,Y_train))
dev_dataset = list(zip(X_dev,Y_dev))

In [11]:

def pad_collate(batch):
  result = []
  all_x_batch = []
  all_y_batch = []
  sentence_len_list = []
  for each_tuple in batch:
    (x,y) = each_tuple
    all_x_batch.append(x)
    all_y_batch.append(y)
    sentence_len_list.append(len(x))
  xx_pad = pad_sequence(all_x_batch, batch_first=True, padding_value=0)
  yy_pad = pad_sequence(all_y_batch, batch_first=True, padding_value=0)
  return (xx_pad,yy_pad,sentence_len_list)

BATCH_SIZE = 32
train_loader = DataLoader(dataset=train_dataset, batch_size=BATCH_SIZE,collate_fn=pad_collate, shuffle=False) 
dev_loader = DataLoader(dataset=dev_dataset, batch_size=BATCH_SIZE, collate_fn=pad_collate, shuffle=False)


In [12]:
# Hyperparameters
embedding_dim = 100
num_layers = 1
hidden_size = 256
num_classes = len(train['tag'].unique()) # =9
LEARNING_RATE = 0.003
linear_output_size = 128

# Create a bidirectional LSTM
class BRNN(nn.Module):
    def __init__(self, embedding_dim, hidden_size, num_layers, num_classes):
        super(BRNN, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers

        self.dropout1 = nn.Dropout(p=0.33)
        self.lstm = nn.LSTM(
            embedding_dim, hidden_size, num_layers, batch_first=True, bidirectional=True
        )
        self.dropout2 = nn.Dropout(p=0.33)
        self.fc1 = nn.Linear(hidden_size * 2, linear_output_size)
        self.ELU = nn.ELU() #alpha=1.0, inplace=False
        self.fc2 = nn.Linear(linear_output_size, num_classes)

    def forward(self, x):
        h0 = torch.zeros(self.num_layers * 2, x.size(0), self.hidden_size).to(device)
        c0 = torch.zeros(self.num_layers * 2, x.size(0), self.hidden_size).to(device)

        x = self.dropout1(x)

        out, _ = self.lstm(x, (h0, c0))
        #out = self.fc(out[:, -1, :])

        x = self.dropout2(out[:, -1, :])
        x = self.fc1(out) #x
        x = self.ELU(x)
        x = self.fc2(x)

        return x

In [13]:
b_model = BRNN(embedding_dim, hidden_size, num_layers, num_classes).to(device)
optimizer = torch.optim.SGD(b_model.parameters(), lr=LEARNING_RATE)

In [14]:
def my_loss(output, target,sentence_l):
  (batch_N,max_l,num_labels) = output.shape

  m = nn.Softmax(dim=2)
  temp = m(output)
  temp = torch.log(temp)
  #print(temp.shape)
  #print(target.shape)
  temp = temp*target
  loss = torch.sum(temp)
  return loss*(-40)/sum(sentence_l)

In [15]:
# train model and save the parameters for least validation loss

ceof = 40
N_EPOCHS = 100
valid_loss_min = np.Inf
for e in range(0, N_EPOCHS):
  train_loss = 0
  valid_loss = 0
  b_model.train()
  for X_batch, y_batch,sll in train_loader:
    X_batch = X_batch.float()
    optimizer.zero_grad()
    X_batch, y_batch = X_batch.to(device), y_batch.to(device)
    y_pred = b_model(X_batch)

    loss = my_loss(y_pred, y_batch,sll)

    loss.backward()
    optimizer.step()
    train_loss += loss.item()

  b_model.eval() # prep model for evaluation
  for data, target, sll in dev_loader:
      data = data.float()

      data, target = data.to(device), target.to(device)
      output = b_model(data)
      # calculate the loss
      loss = my_loss(output, target,sll)
      valid_loss += loss.item()

  if (valid_loss/len(dev_loader)) <= valid_loss_min:
      print('Validation loss decreased ({:.6f} --> {:.6f}).  Saving model ...'.format(
      valid_loss_min/ceof,
      valid_loss/(len(dev_loader)*ceof)) )
      torch.save(b_model.state_dict(), path+'model.pt')
      valid_loss_min = valid_loss/len(dev_loader)
      

  print(f'Epoch {e+0:03}: | Train Loss: {train_loss/(len(train_loader)*ceof):.5f} | Validation Loss: {valid_loss/(len(dev_loader)*ceof):.5f}')


Validation loss decreased (inf --> 0.793117).  Saving model ...
Epoch 000: | Train Loss: 0.83420 | Validation Loss: 0.79312
Validation loss decreased (0.793117 --> 0.720971).  Saving model ...
Epoch 001: | Train Loss: 0.73533 | Validation Loss: 0.72097
Validation loss decreased (0.720971 --> 0.636146).  Saving model ...
Epoch 002: | Train Loss: 0.66153 | Validation Loss: 0.63615
Validation loss decreased (0.636146 --> 0.579246).  Saving model ...
Epoch 003: | Train Loss: 0.59914 | Validation Loss: 0.57925
Validation loss decreased (0.579246 --> 0.528334).  Saving model ...
Epoch 004: | Train Loss: 0.55464 | Validation Loss: 0.52833
Validation loss decreased (0.528334 --> 0.495243).  Saving model ...
Epoch 005: | Train Loss: 0.51981 | Validation Loss: 0.49524
Validation loss decreased (0.495243 --> 0.462558).  Saving model ...
Epoch 006: | Train Loss: 0.48971 | Validation Loss: 0.46256
Validation loss decreased (0.462558 --> 0.419540).  Saving model ...
Epoch 007: | Train Loss: 0.46357 

In [16]:
# Load the state of model
b_model.load_state_dict(torch.load(path+'model.pt'))

<All keys matched successfully>

In [17]:
# predict dev data
num_labels = len(train['tag'].unique())

def predict(model, dataloader):
    temp = list(Y_dev_df)
    tag_list = []
    for each_name in temp:
      tag_list.append(each_name[2:])

    prediction_list = []
    with torch.no_grad():
      for X_batch,y,sll in dataloader:
        X_batch = X_batch.float()
        X_batch, y = X_batch.to(device), y.to(device)
        output = model(X_batch)
        temp = output.tolist()

        for i in range(0,len(temp)):
          current_s_output = temp[i]
          current_s_len = sll[i]
          for j in range(0,current_s_len):
            current_w_output = current_s_output[j]
            max_index = current_w_output.index(max(current_w_output[:num_labels]))
            prediction_list.append(tag_list[max_index])
    
    return prediction_list
  
all_prediction = predict(b_model, dev_loader)

In [18]:
dev['pred'] = all_prediction

In [19]:
# output to greedy.out

t1_output_df = dev.drop('all_features', 1)
t1_output_list = t1_output_df.values.tolist()
first_flag = True
with open(path+'dev1.out', 'w') as f:
    for each_output in t1_output_list:
        idx = each_output[0]
        word = each_output[1]
        gold = each_output[2]
        pred = each_output[3]

        if idx==1:
            if first_flag:
                first_flag = False
            else:
                f.write('\n')
        try:
          f.write(str(idx)+' '+word+' '+gold+' '+pred)
          f.write('\n')
        except:
          print(counter)
          print(idx)
          print(word)

f.close()

In [20]:
torch.save(b_model, path+'blstm1.pt')

In [None]:
########################################## testing task 1 ####################################################

In [1]:
import pandas as pd
import torch
from torch import nn
from sklearn.model_selection import train_test_split
from torch.utils.data import Dataset, DataLoader, TensorDataset
import numpy as np
from torch.nn.utils.rnn import pad_sequence

In [2]:
#device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
#path = '/content/drive/My Drive/Colab Notebooks/HWK4_testing/'

In [3]:
train = pd.read_table(path+'train',header=None,sep=' |\n', names=['index','word','tag'], doublequote = False, keep_default_na=False) 
dev = pd.read_table(path+'dev',header=None,sep=' |\n', names=['index','word','tag'], doublequote = False, keep_default_na=False) 
train['word'] = train['word'].astype(str)
dev['word'] = dev['word'].astype(str)

  return read_csv(**locals())


In [4]:
# ------------------------------Load Data------------------------------
# X
unique_words_list = sorted(list(train['word'].unique()))
num_unique_words = len(unique_words_list)
values = range(0,num_unique_words)
word_index_dict = dict(zip(unique_words_list, values))  

# lower_word_index_dict
lower_unique_words_list = (map(lambda x: x.lower(), word_index_dict))
lower_word_index_dict = dict(zip(lower_unique_words_list, values))  

# add unknown words
word_index_dict['<unk>'] = num_unique_words
word_index_dict['<unk_digit>'] = num_unique_words+1
word_index_dict['<unk_alnum>'] = num_unique_words+2

In [5]:
# load word embedding
embedding_weight = torch.load(path+'embedding_weight.pt')
embedding = nn.Embedding.from_pretrained(embedding_weight)

In [6]:
def add_features(s):
  try:
    lookup_tensor = torch.tensor([word_index_dict[s]], dtype=torch.long)
    word_embed = embedding(lookup_tensor)
    temp = word_embed.tolist()
    feature_list = temp[0]
  except:
    #try:
    #  lookup_tensor = torch.tensor([lower_word_index_dict[s]], dtype=torch.long)
    #except:
    if s[0].isdigit():
      lookup_tensor = torch.tensor([word_index_dict['<unk_digit>']], dtype=torch.long)
    elif s.isalnum():
      lookup_tensor = torch.tensor([word_index_dict['<unk_alnum>']], dtype=torch.long)
    else:
      lookup_tensor = torch.tensor([word_index_dict['<unk>']], dtype=torch.long)
    word_embed = embedding(lookup_tensor)
    temp = word_embed.tolist()
    feature_list = temp[0]
    
    #feature_list = [1]*100
  return feature_list

train['all_features'] = train['word'].apply(add_features) 
dev['all_features'] = dev['word'].apply(add_features) 

In [7]:
def getXY(index_l,feature_l):
  all_sentences = []
  current_sentence = []
  for i in range(0,len(index_l)):
    current_index = index_l[i]
    current_feature = feature_l[i]
    if current_index==1:
      all_sentences.append(torch.tensor(current_sentence)) # torch.tensor(  np.array(
      current_sentence = [current_feature]
    else:
      current_sentence.append(current_feature)
  all_sentences.append(torch.tensor(current_sentence)) # torch.tensor(  np.array(
  all_sentences = all_sentences[1:]
  return all_sentences

In [8]:
# X
train_index_list = train["index"].tolist()
train_feature_list = train["all_features"].tolist()
dev_index_list = dev["index"].tolist()
dev_feature_list = dev["all_features"].tolist()
X_train = getXY(train_index_list,train_feature_list)
X_dev = getXY(dev_index_list,dev_feature_list)

In [9]:
# Y
Y_train_df = pd.get_dummies(train.tag, prefix='y')
Y_dev_df = pd.get_dummies(dev.tag, prefix='y')
#Y_train_df['y_paddding'] = 0
#Y_dev_df['y_paddding'] = 0
Y_train_df['target']= Y_train_df.values.tolist()
Y_dev_df['target']= Y_dev_df.values.tolist()
train_target_list = Y_train_df["target"].tolist()
dev_target_list = Y_dev_df["target"].tolist()
#train_target_list
Y_train = getXY(train_index_list,train_target_list)
Y_dev = getXY(dev_index_list,dev_target_list)

In [10]:
train_dataset = list(zip(X_train,Y_train))
dev_dataset = list(zip(X_dev,Y_dev))

In [11]:
def pad_collate(batch):
  result = []
  all_x_batch = []
  all_y_batch = []
  sentence_len_list = []
  for each_tuple in batch:
    (x,y) = each_tuple
    all_x_batch.append(x)
    all_y_batch.append(y)
    sentence_len_list.append(len(x))
  xx_pad = pad_sequence(all_x_batch, batch_first=True, padding_value=0)
  yy_pad = pad_sequence(all_y_batch, batch_first=True, padding_value=0)
  return (xx_pad,yy_pad,sentence_len_list)

BATCH_SIZE = 32
train_loader = DataLoader(dataset=train_dataset, batch_size=BATCH_SIZE,collate_fn=pad_collate, shuffle=False) 
dev_loader = DataLoader(dataset=dev_dataset, batch_size=BATCH_SIZE, collate_fn=pad_collate, shuffle=False)

In [12]:
# Hyperparameters
embedding_dim = 100
num_layers = 1
hidden_size = 256
num_classes = len(train['tag'].unique()) # =9
LEARNING_RATE = 0.003
linear_output_size = 128

# Create a bidirectional LSTM
class BRNN(nn.Module):
    def __init__(self, embedding_dim, hidden_size, num_layers, num_classes):
        super(BRNN, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers

        self.dropout1 = nn.Dropout(p=0.33)
        self.lstm = nn.LSTM(
            embedding_dim, hidden_size, num_layers, batch_first=True, bidirectional=True
        )
        self.dropout2 = nn.Dropout(p=0.33)
        self.fc1 = nn.Linear(hidden_size * 2, linear_output_size)
        self.ELU = nn.ELU() #alpha=1.0, inplace=False
        self.fc2 = nn.Linear(linear_output_size, num_classes)

    def forward(self, x):
        h0 = torch.zeros(self.num_layers * 2, x.size(0), self.hidden_size).to(device)
        c0 = torch.zeros(self.num_layers * 2, x.size(0), self.hidden_size).to(device)

        x = self.dropout1(x)

        out, _ = self.lstm(x, (h0, c0))
        #out = self.fc(out[:, -1, :])

        x = self.dropout2(out[:, -1, :])
        x = self.fc1(out) #x
        x = self.ELU(x)
        x = self.fc2(x)

        return x

In [13]:
brnn_model = torch.load(path+'blstm1.pt')
brnn_model = brnn_model.to(device)

In [14]:
# predict dev data
num_labels = len(train['tag'].unique())

def predict(model, dataloader):
    temp = list(Y_dev_df)
    tag_list = []
    for each_name in temp:
      tag_list.append(each_name[2:])

    prediction_list = []
    with torch.no_grad():
      for X_batch,y,sll in dataloader:
        X_batch = X_batch.float()
        X_batch, y = X_batch.to(device), y.to(device)
        output = model(X_batch)
        temp = output.tolist()

        for i in range(0,len(temp)):
          current_s_output = temp[i]
          current_s_len = sll[i]
          for j in range(0,current_s_len):
            current_w_output = current_s_output[j]
            max_index = current_w_output.index(max(current_w_output[:num_labels]))
            prediction_list.append(tag_list[max_index])
    
    return prediction_list
  
all_prediction = predict(brnn_model, dev_loader)
dev['pred'] = all_prediction

In [15]:
# output to task 1
t1_output_df = dev.drop('all_features', 1)
t1_output_list = t1_output_df.values.tolist()
first_flag = True
with open(path+'dev1.out', 'w') as f:
    for each_output in t1_output_list:
        idx = each_output[0]
        word = each_output[1]
        gold = each_output[2]
        pred = each_output[3]

        if idx==1:
            if first_flag:
                first_flag = False
            else:
                f.write('\n')
        try:
          f.write(str(idx)+' '+word+' '+gold+' '+pred)
          f.write('\n')
        except:
          print(counter)
          print(idx)
          print(word)

f.close()

In [None]:
############################################## task 2 start #######################################################

In [None]:
################################################# TASK 2 ############################################################

In [17]:
# ------------------------------Load GloVe------------------------------
GloVe = pd.read_table(path+'glove.6B.100d.txt',header=None,sep=' |\n', doublequote = False) 
GloVe[0] = GloVe[0].astype(str)
GloVe_word_list= GloVe[0].tolist()
GloVe.drop(GloVe.columns[0], axis=1, inplace=True)
GloVe['target']= GloVe.values.tolist()
GloVe_target_list = GloVe['target'].tolist()
#[str(i) for i in lst]
GloVe_word_list = [str(i) for i in GloVe_word_list]
GloVe_word_index_dict = dict(zip(GloVe_word_list, GloVe_target_list))

  return read_csv(**locals())


In [18]:
GloVe_lower_word_list = (map(lambda x: x.lower(), GloVe_word_list))
GloVe_lowcase_word_index_dict = dict(zip(GloVe_lower_word_list, GloVe_target_list))

In [19]:
def add_features(s):
  try:
    feature_list = GloVe_word_index_dict[s]
  except:
    try:
      feature_list = GloVe_lowcase_word_index_dict[s.lower()]
    except:
      if s[0].isdigit():
        lookup_tensor = torch.tensor([word_index_dict['<unk_digit>']], dtype=torch.long)
      elif s.isalnum():
        lookup_tensor = torch.tensor([word_index_dict['<unk_alnum>']], dtype=torch.long)
      else:
        lookup_tensor = torch.tensor([word_index_dict['<unk>']], dtype=torch.long)
      word_embed = embedding(lookup_tensor)
      temp = word_embed.tolist()
      feature_list = temp[0]
    
    #feature_list = [1]*100
  return feature_list

train['all_features'] = train['word'].apply(add_features) 
dev['all_features'] = dev['word'].apply(add_features) 

In [20]:
# X
train_index_list = train["index"].tolist()
train_feature_list = train["all_features"].tolist()
dev_index_list = dev["index"].tolist()
dev_feature_list = dev["all_features"].tolist()
X_train = getXY(train_index_list,train_feature_list)
X_dev = getXY(dev_index_list,dev_feature_list)

In [21]:
# Y
Y_train_df = pd.get_dummies(train.tag, prefix='y')
Y_dev_df = pd.get_dummies(dev.tag, prefix='y')
#Y_train_df['y_paddding'] = 0
#Y_dev_df['y_paddding'] = 0
Y_train_df['target']= Y_train_df.values.tolist()
Y_dev_df['target']= Y_dev_df.values.tolist()
train_target_list = Y_train_df["target"].tolist()
dev_target_list = Y_dev_df["target"].tolist()
#train_target_list

In [22]:
Y_train = getXY(train_index_list,train_target_list)
Y_dev = getXY(dev_index_list,dev_target_list)

In [23]:
train_dataset = list(zip(X_train,Y_train))
dev_dataset = list(zip(X_dev,Y_dev))

In [24]:
#################### data loader ####################
train_loader = DataLoader(dataset=train_dataset, batch_size=BATCH_SIZE,collate_fn=pad_collate, shuffle=False) 
dev_loader = DataLoader(dataset=dev_dataset, batch_size=BATCH_SIZE, collate_fn=pad_collate, shuffle=False)

In [41]:
b_model_2 = BRNN(embedding_dim, hidden_size, num_layers, num_classes).to(device)
optimizer_2 = torch.optim.SGD(b_model_2.parameters(), lr=LEARNING_RATE)
LEARNING_RATE = 0.029

In [42]:
def my_loss_2(output, target,sentence_l):
  (batch_N,max_l,num_labels) = output.shape

  m = nn.Softmax(dim=2)
  temp = m(output)
  temp = torch.log(temp)
  temp = temp*target
  loss = torch.sum(temp)
  return loss*(-20)/sum(sentence_l)

In [43]:
# train model and save the parameters for least validation loss
ceof = 20
N_EPOCHS = 100
valid_loss_min = np.Inf
for e in range(0, N_EPOCHS):
  train_loss = 0
  valid_loss = 0
  b_model_2.train()
  for X_batch, y_batch,sll in train_loader:
    X_batch = X_batch.float()
    optimizer_2.zero_grad()
    X_batch, y_batch = X_batch.to(device), y_batch.to(device)
    y_pred = b_model_2(X_batch)

    loss = my_loss_2(y_pred, y_batch,sll)
    #print(loss)

    loss.backward()
    optimizer_2.step()
    train_loss += loss.item()

  b_model_2.eval() # prep model for evaluation
  for data, target, sll in dev_loader:
      data = data.float()

      data, target = data.to(device), target.to(device)
      output = b_model_2(data)
      # calculate the loss
      loss = my_loss_2(output, target,sll)
      valid_loss += loss.item()

  if (valid_loss/len(dev_loader)) <= valid_loss_min:
      print('Validation loss decreased ({:.6f} --> {:.6f}).  Saving model ...'.format(
      valid_loss_min/ceof,
      valid_loss/(len(dev_loader)*ceof)) )
      torch.save(b_model_2.state_dict(), path+'model_2.pt')
      valid_loss_min = valid_loss/len(dev_loader)
      

  print(f'Epoch {e+0:03}: | Train Loss: {train_loss/(len(train_loader)*ceof):.5f} | Validation Loss: {valid_loss/(len(dev_loader)*ceof):.5f}')

Validation loss decreased (inf --> 0.307884).  Saving model ...
Epoch 000: | Train Loss: 0.46346 | Validation Loss: 0.30788
Validation loss decreased (0.307884 --> 0.219940).  Saving model ...
Epoch 001: | Train Loss: 0.27547 | Validation Loss: 0.21994
Validation loss decreased (0.219940 --> 0.181630).  Saving model ...
Epoch 002: | Train Loss: 0.21868 | Validation Loss: 0.18163
Validation loss decreased (0.181630 --> 0.153190).  Saving model ...
Epoch 003: | Train Loss: 0.18602 | Validation Loss: 0.15319
Epoch 004: | Train Loss: 0.16464 | Validation Loss: 0.15631
Validation loss decreased (0.153190 --> 0.147467).  Saving model ...
Epoch 005: | Train Loss: 0.15225 | Validation Loss: 0.14747
Validation loss decreased (0.147467 --> 0.132127).  Saving model ...
Epoch 006: | Train Loss: 0.14199 | Validation Loss: 0.13213
Validation loss decreased (0.132127 --> 0.129463).  Saving model ...
Epoch 007: | Train Loss: 0.13334 | Validation Loss: 0.12946
Validation loss decreased (0.129463 --> 0.

In [44]:
# Load the state of model
b_model_2.load_state_dict(torch.load(path+'model_2.pt'))

<All keys matched successfully>

In [45]:
# predict dev data
num_labels = len(train['tag'].unique())  
all_prediction = predict(b_model_2, dev_loader)
dev['pred'] = all_prediction

In [46]:
# output to greedy.out
# df = df.drop('column_name', 1)
t2_output_df = dev.drop('all_features', 1)
t2_output_list = t2_output_df.values.tolist()
first_flag = True
with open(path+'dev2.out', 'w') as f:
    for each_output in t2_output_list:
        idx = each_output[0]
        word = each_output[1]
        gold = each_output[2]
        pred = each_output[3]

        if idx==1:
            if first_flag:
                first_flag = False
            else:
                f.write('\n')
        try:
          f.write(str(idx)+' '+word+' '+gold+' '+pred)
          f.write('\n')
        except:
          print(idx)
          print(word)

f.close()

In [49]:
torch.save(b_model_2, path+'blstm2.pt')

In [7]:
#################################################### Task 2 testing ########################################################

In [16]:
# ------------------------------Load GloVe------------------------------
GloVe = pd.read_table(path+'glove.6B.100d.txt',header=None,sep=' |\n', doublequote = False) 
GloVe[0] = GloVe[0].astype(str)
GloVe_word_list= GloVe[0].tolist()
GloVe.drop(GloVe.columns[0], axis=1, inplace=True)
GloVe['target']= GloVe.values.tolist()
GloVe_target_list = GloVe['target'].tolist()
#[str(i) for i in lst]
GloVe_word_list = [str(i) for i in GloVe_word_list]
GloVe_word_index_dict = dict(zip(GloVe_word_list, GloVe_target_list))

  return read_csv(**locals())


In [17]:
GloVe_lower_word_list = (map(lambda x: x.lower(), GloVe_word_list))
GloVe_lowcase_word_index_dict = dict(zip(GloVe_lower_word_list, GloVe_target_list))

In [18]:
def add_features(s):
  try:
    feature_list = GloVe_word_index_dict[s]
  except:
    try:
      feature_list = GloVe_lowcase_word_index_dict[s.lower()]
    except:
      if s[0].isdigit():
        lookup_tensor = torch.tensor([word_index_dict['<unk_digit>']], dtype=torch.long)
      elif s.isalnum():
        lookup_tensor = torch.tensor([word_index_dict['<unk_alnum>']], dtype=torch.long)
      else:
        lookup_tensor = torch.tensor([word_index_dict['<unk>']], dtype=torch.long)
      word_embed = embedding(lookup_tensor)
      temp = word_embed.tolist()
      feature_list = temp[0]
    
    #feature_list = [1]*100
  return feature_list

train['all_features'] = train['word'].apply(add_features) 
dev['all_features'] = dev['word'].apply(add_features) 

In [19]:
# X
train_index_list = train["index"].tolist()
train_feature_list = train["all_features"].tolist()
dev_index_list = dev["index"].tolist()
dev_feature_list = dev["all_features"].tolist()
X_train = getXY(train_index_list,train_feature_list)
X_dev = getXY(dev_index_list,dev_feature_list)

In [20]:
# Y
Y_train_df = pd.get_dummies(train.tag, prefix='y')
Y_dev_df = pd.get_dummies(dev.tag, prefix='y')
#Y_train_df['y_paddding'] = 0
#Y_dev_df['y_paddding'] = 0
Y_train_df['target']= Y_train_df.values.tolist()
Y_dev_df['target']= Y_dev_df.values.tolist()
train_target_list = Y_train_df["target"].tolist()
dev_target_list = Y_dev_df["target"].tolist()
#train_target_list
Y_train = getXY(train_index_list,train_target_list)
Y_dev = getXY(dev_index_list,dev_target_list)

In [21]:
train_dataset = list(zip(X_train,Y_train))
dev_dataset = list(zip(X_dev,Y_dev))

In [22]:
#################### data loader ####################
train_loader = DataLoader(dataset=train_dataset, batch_size=BATCH_SIZE,collate_fn=pad_collate, shuffle=False) 
dev_loader = DataLoader(dataset=dev_dataset, batch_size=BATCH_SIZE, collate_fn=pad_collate, shuffle=False)

In [23]:
brnn_model_2 = torch.load(path+'blstm2.pt')
brnn_model_2 = brnn_model_2.to(device)

In [25]:
# predict dev data
num_labels = len(train['tag'].unique())  
all_prediction = predict(brnn_model_2, dev_loader)
dev['pred'] = all_prediction

In [26]:
# output to greedy.out
# df = df.drop('column_name', 1)
t2_output_df = dev.drop('all_features', 1)
t2_output_list = t2_output_df.values.tolist()
first_flag = True
with open(path+'dev2.out', 'w') as f:
    for each_output in t2_output_list:
        idx = each_output[0]
        word = each_output[1]
        gold = each_output[2]
        pred = each_output[3]

        if idx==1:
            if first_flag:
                first_flag = False
            else:
                f.write('\n')
        try:
          f.write(str(idx)+' '+word+' '+gold+' '+pred)
          f.write('\n')
        except:
          print(idx)
          print(word)

f.close()