In [9]:
from google.colab import drive
drive.mount('/content/gdrive')

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).


In [10]:
import os
import sys
sys.path.append('/content/gdrive/My Drive/stanford-nlp/a3')
os.chdir("/content/gdrive/MyDrive/stanford-nlp/a3")
!ls

collect_submission.sh  local_env.yml	parser_transitions.py  README.txt  run.py
data		       parser_model.py	__pycache__	       results	   utils


In [11]:
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
CS224N 2021-2022: Homework 3
run.py: Run the dependency parser.
Sahil Chopra <schopra8@stanford.edu>
Haoshen Hong <haoshen@stanford.edu>
"""
from datetime import datetime
import os
import pickle
import math
import time
import argparse

from torch import nn, optim
import torch
from tqdm import tqdm

from parser_model import ParserModel
from utils.parser_utils import minibatches, load_and_preprocess_data, AverageMeter

# parser = argparse.ArgumentParser(description='Train neural dependency parser in pytorch')
# parser.add_argument('-d', '--debug', action='store_true', help='whether to enter debug mode')
# args = parser.parse_args()

# -----------------
# Primary Functions
# -----------------
def train(parser, train_data, dev_data, output_path, batch_size=1024, n_epochs=10, lr=0.0006):
    """ Train the neural dependency parser.

    @param parser (Parser): Neural Dependency Parser
    @param train_data ():
    @param dev_data ():
    @param output_path (str): Path to which model weights and results are written.
    @param batch_size (int): Number of examples in a single batch
    @param n_epochs (int): Number of training epochs
    @param lr (float): Learning rate
    """
    best_dev_UAS = 0


    ### YOUR CODE HERE (~2-7 lines)
    ### TODO:
    ###      1) Construct Adam Optimizer in variable `optimizer`
    ###      2) Construct the Cross Entropy Loss Function in variable `loss_func` with `mean`
    ###         reduction (default)
    ###
    ### Hint: Use `parser.model.parameters()` to pass optimizer
    ###       necessary parameters to tune.
    ### Please see the following docs for support:
    ###     Adam Optimizer: https://pytorch.org/docs/stable/optim.html
    ###     Cross Entropy Loss: https://pytorch.org/docs/stable/nn.html#crossentropyloss

    optimizer = optim.Adam(parser.model.parameters(), lr=lr)
    loss_func = nn.CrossEntropyLoss()

    ### END YOUR CODE

    for epoch in range(n_epochs):
        print("Epoch {:} out of {:}".format(epoch + 1, n_epochs))
        dev_UAS = train_for_epoch(parser, train_data, dev_data, optimizer, loss_func, batch_size)
        if dev_UAS > best_dev_UAS:
            best_dev_UAS = dev_UAS
            print("New best dev UAS! Saving model.")
            torch.save(parser.model.state_dict(), output_path)
        print("")


def train_for_epoch(parser, train_data, dev_data, optimizer, loss_func, batch_size):
    """ Train the neural dependency parser for single epoch.

    Note: In PyTorch we can signify train versus test and automatically have
    the Dropout Layer applied and removed, accordingly, by specifying
    whether we are training, `model.train()`, or evaluating, `model.eval()`

    @param parser (Parser): Neural Dependency Parser
    @param train_data ():
    @param dev_data ():
    @param optimizer (nn.Optimizer): Adam Optimizer
    @param loss_func (nn.CrossEntropyLoss): Cross Entropy Loss Function
    @param batch_size (int): batch size

    @return dev_UAS (float): Unlabeled Attachment Score (UAS) for dev data
    """
    parser.model.train() # Places model in "train" mode, i.e. apply dropout layer
    n_minibatches = math.ceil(len(train_data) / batch_size)
    loss_meter = AverageMeter()

    with tqdm(total=(n_minibatches)) as prog:
        for i, (train_x, train_y) in enumerate(minibatches(train_data, batch_size)):
            optimizer.zero_grad()   # remove any baggage in the optimizer
            loss = 0. # store loss for this batch herewithin
            train_x = torch.from_numpy(train_x).long()
            train_y = torch.from_numpy(train_y.nonzero()[1]).long()

            ### YOUR CODE HERE (~4-10 lines)
            ### TODO:
            ###      1) Run train_x forward through model to produce `logits`
            ###      2) Use the `loss_func` parameter to apply the PyTorch CrossEntropyLoss function.
            ###         This will take `logits` and `train_y` as inputs. It will output the CrossEntropyLoss
            ###         between softmax(`logits`) and `train_y`. Remember that softmax(`logits`)
            ###         are the predictions (y^ from the PDF).
            ###      3) Backprop losses
            ###      4) Take step with the optimizer
            ### Please see the following docs for support:
            ###     Optimizer Step: https://pytorch.org/docs/stable/optim.html#optimizer-step

            logits = parser.model(train_x)
            loss = loss_func(logits, train_y)
            loss.backward()
            optimizer.step()

            ### END YOUR CODE
            prog.update(1)
            loss_meter.update(loss.item())

    print ("Average Train Loss: {}".format(loss_meter.avg))

    print("Evaluating on dev set",)
    parser.model.eval() # Places model in "eval" mode, i.e. don't apply dropout layer
    dev_UAS, _ = parser.parse(dev_data)
    print("- dev UAS: {:.2f}".format(dev_UAS * 100.0))
    return dev_UAS


#if __name__ == "__main__":
    # debug = args.debug

    # assert (torch.__version__.split(".") >= ["1", "0", "0"]), "Please install torch version >= 1.0.0"

    # print(80 * "=")
    # print("INITIALIZING")
    # print(80 * "=")
    # parser, embeddings, train_data, dev_data, test_data = load_and_preprocess_data(debug)

    # start = time.time()
    # model = ParserModel(embeddings, hidden_size=256, dropout_prob=0.3)
    # parser.model = model
    # print("took {:.2f} seconds\n".format(time.time() - start))

    # print(80 * "=")
    # print("TRAINING")
    # print(80 * "=")
    # output_dir = "results/{:%Y%m%d_%H%M%S}/".format(datetime.now())
    # output_path = output_dir + "model.weights"

    # if not os.path.exists(output_dir):
    #     os.makedirs(output_dir)

    # train(parser, train_data, dev_data, output_path, batch_size=1024, n_epochs=10, lr=0.0005)

    # if not debug:
    #     print(80 * "=")
    #     print("TESTING")
    #     print(80 * "=")
    #     print("Restoring the best model weights found on the dev set")
    #     parser.model.load_state_dict(torch.load(output_path))
    #     print("Final evaluation on test set",)
    #     parser.model.eval()
    #     UAS, dependencies = parser.parse(test_data)
    #     print("- test UAS: {:.2f}".format(UAS * 100.0))
    #     print("Done!")

In [12]:
# debug = args.debug
debug = False

assert (torch.__version__.split(".") >= ["1", "0", "0"]), "Please install torch version >= 1.0.0"

print(80 * "=")
print("INITIALIZING")
print(80 * "=")
parser, embeddings, train_data, dev_data, test_data = load_and_preprocess_data(debug)

start = time.time()
model = ParserModel(embeddings, hidden_size=256, dropout_prob=0.3)
parser.model = model
print("took {:.2f} seconds\n".format(time.time() - start))

print(80 * "=")
print("TRAINING")
print(80 * "=")
output_dir = "results/{:%Y.%m.%d_%H:%M:%S}/".format(datetime.now())
output_path = output_dir + "model.weights"

if not os.path.exists(output_dir):
  os.makedirs(output_dir)

train(parser, train_data, dev_data, output_path, batch_size=1024, n_epochs=10, lr=0.0006)

if not debug:
  print(80 * "=")
  print("TESTING")
  print(80 * "=")
  print("Restoring the best model weights found on the dev set")
  parser.model.load_state_dict(torch.load(output_path))
  print("Final evaluation on test set",)
  parser.model.eval()
  UAS, dependencies = parser.parse(test_data)
  print("- test UAS: {:.2f}".format(UAS * 100.0))
  print("Done!")

INITIALIZING
Loading data...
took 2.30 seconds
Building parser...
took 2.13 seconds
Loading pretrained embeddings...
took 3.16 seconds
Vectorizing data...
took 1.38 seconds
Preprocessing training data...
took 52.15 seconds
took 0.01 seconds

TRAINING
Epoch 1 out of 10


100%|██████████| 1848/1848 [02:21<00:00, 13.10it/s]


Average Train Loss: 0.16808760447177665
Evaluating on dev set


1445850it [00:00, 26447393.52it/s]      


- dev UAS: 85.14
New best dev UAS! Saving model.

Epoch 2 out of 10


100%|██████████| 1848/1848 [02:17<00:00, 13.41it/s]


Average Train Loss: 0.09798737496411775
Evaluating on dev set


1445850it [00:00, 36332742.83it/s]      


- dev UAS: 87.14
New best dev UAS! Saving model.

Epoch 3 out of 10


100%|██████████| 1848/1848 [02:17<00:00, 13.39it/s]


Average Train Loss: 0.08408330067250377
Evaluating on dev set


1445850it [00:00, 37570995.84it/s]      


- dev UAS: 88.19
New best dev UAS! Saving model.

Epoch 4 out of 10


100%|██████████| 1848/1848 [02:17<00:00, 13.41it/s]


Average Train Loss: 0.0749703907544898
Evaluating on dev set


1445850it [00:00, 35156784.81it/s]      


- dev UAS: 88.25
New best dev UAS! Saving model.

Epoch 5 out of 10


100%|██████████| 1848/1848 [02:21<00:00, 13.10it/s]


Average Train Loss: 0.06791776689616116
Evaluating on dev set


1445850it [00:00, 36906087.21it/s]      


- dev UAS: 88.99
New best dev UAS! Saving model.

Epoch 6 out of 10


100%|██████████| 1848/1848 [02:17<00:00, 13.43it/s]


Average Train Loss: 0.06209659648180266
Evaluating on dev set


1445850it [00:00, 35233995.90it/s]      


- dev UAS: 89.39
New best dev UAS! Saving model.

Epoch 7 out of 10


100%|██████████| 1848/1848 [02:18<00:00, 13.37it/s]


Average Train Loss: 0.05717202987183224
Evaluating on dev set


1445850it [00:00, 34419874.56it/s]      


- dev UAS: 88.83

Epoch 8 out of 10


100%|██████████| 1848/1848 [02:18<00:00, 13.38it/s]


Average Train Loss: 0.05274030378842283
Evaluating on dev set


1445850it [00:00, 34205926.13it/s]      


- dev UAS: 89.03

Epoch 9 out of 10


100%|██████████| 1848/1848 [02:17<00:00, 13.42it/s]


Average Train Loss: 0.04901887655250115
Evaluating on dev set


1445850it [00:00, 34909274.50it/s]      


- dev UAS: 89.07

Epoch 10 out of 10


100%|██████████| 1848/1848 [02:18<00:00, 13.34it/s]


Average Train Loss: 0.045506820811394164
Evaluating on dev set


1445850it [00:00, 36720604.78it/s]      


- dev UAS: 89.28

TESTING
Restoring the best model weights found on the dev set
Final evaluation on test set


2919736it [00:00, 53269160.50it/s]      

- test UAS: 89.69
Done!



