Check GPU information

In [None]:
gpu_info = !nvidia-smi
gpu_info = '\n'.join(gpu_info)
if gpu_info.find('failed') >= 0:
  print('Not connected to a GPU')
else:
  print(gpu_info)

Install SuPar

In [None]:
!pip install -U supar

Mount Google Drive or prepare data as you like. Make sure all the following data paths are correct.

In [None]:
from google.colab import drive
drive.mount('/content/drive')
!cp -r drive/MyDrive/AdvTree/data .

Pre-training with all training data on UD Turkish

In [None]:
!python -u -m supar.cmds.biaffine_dep train -b -d 0 -c biaffine-dep-en -p udtur_biaffine_pretrainall -f char \
    --train data/ud2.3_tur/train.conllu  \
    --dev data/ud2.3_tur/dev.conllu  \
    --test data/ud2.3_tur/test.conllu  \
    --embed ""
!cp udtur_biaffine_pretrainall udtur_biaffine_pretrainall_backup

Pre-training with all training data and BERT on PTB

In [None]:
!python -u -m supar.cmds.biaffine_dep train -b -d 0 -c biaffine-dep-roberta-en -p ptb_biaffine_roberta_pretrainall \
    --train data/ptb_sd330/train.subset10.conllx  \
    --dev data/ptb_sd330/dev.conllx  \
    --test data/ptb_sd330/test.conllx  \
    --encoder=bert  \
    --bert=roberta-large  \
    --lr=5e-5  \
    --lr-rate=20  \
    --epochs=30  \
    --update-steps=4
!cp ptb_biaffine_roberta_pretrainall ptb_biaffine_roberta_pretrainall_backup

Pre-training with all training data and BERT on CTB

In [None]:
!python -u -m supar.cmds.biaffine_dep train -b -d 0 -c biaffine-dep-electra-zh \
        -p ctb5_biaffine_electra_zh_pretrainall \
        --train data/ctb5_sd330/train.subset1000.conll  \
        --dev data/ctb5_sd330/dev.conll  \
        --test data/ctb5_sd330/test.conll  \
        --epochs=30  \
        --encoder=bert  \
        --bert=hfl/chinese-electra-180g-large-discriminator
!cp ctb5_biaffine_electra_zh_pretrainall ctb5_biaffine_electra_zh_pretrainall_backup

Pre-training with all training data and BERT on UD Dutch

In [None]:
!python -u -m supar.cmds.biaffine_dep train -b -d 0 -c biaffine-dep-xlmr -p ud23nllas_biaffine_xlmr_pretrainall \
    --train data/ud2.3_nl_las/train.conllu  \
    --dev data/ud2.3_nl_las/dev.conllu  \
    --test data/ud2.3_nl_las/test.conllu  \
    --epochs=30 \
    --encoder=bert  \
    --bert=xlm-roberta-large
!cp ud23nllas_biaffine_xlmr_pretrainall ud23nllas_biaffine_xlmr_pretrainall_backup

In [None]:
from supar import Parser
parser = Parser.load('ptb_biaffine_roberta_pretrainall')
parser.evaluate('data/ptb_sd330/test.conllx', verbose = True)

Modify SuPar code for pre-training on UD Turkish

In [None]:
!cp supar_modified/models_dep_binceloss.py /usr/local/lib/python3.7/dist-packages/supar/models/dep.py
!cp supar_modified/parsers_dep_nolossineval.py /usr/local/lib/python3.7/dist-packages/supar/parsers/dep.py
!cp supar_modified/parsers_parser_noearlystop.py /usr/local/lib/python3.7/dist-packages/supar/parsers/parser.py
!cp supar_modified/utils_metric_bestuas.py /usr/local/lib/python3.7/dist-packages/supar/utils/metric.py

In [None]:
for trial_idx in range(5):
    for n_train in [10, 50, 100, 1000]:
        !python -u -m supar.cmds.biaffine_dep train -b -d 0 -c biaffine-dep-en -p udtur_biaffine_pretrainall -f char \
            --train data/ud2.3_tur/train.trial{trial_idx}.subset{n_train}.conllu \
            --dev data/ud2.3_tur/dev.conllu \
            --test data/ud2.3_tur/test.conllu \
            --embed ""
        !mv udtur_biaffine_pretrainall udtur_biaffine_pretrainall_trial{trial_idx}_subset{n_train}

Modify SuPar code and train the final layer on UD Turkish

In [None]:
!cp supar_modified/models_dep_hlocalarc.py /usr/local/lib/python3.7/dist-packages/supar/models/dep.py
!cp supar_modified/parsers_dep_nolossineval.py /usr/local/lib/python3.7/dist-packages/supar/parsers/dep.py
!cp supar_modified/parsers_parser_freezeallbutlastlayer.py /usr/local/lib/python3.7/dist-packages/supar/parsers/parser.py
!cp supar_modified/utils_metric_bestuas.py /usr/local/lib/python3.7/dist-packages/supar/utils/metric.py

In [None]:
for trial_idx in range(5):
    for n_train in [10, 50, 100, 1000]:
        !python -u -m supar.cmds.biaffine_dep train -b -d 0 -c biaffine-dep-en -p udtur_biaffine_pretrainall_trial{trial_idx}_subset{n_train} -f char \
            --train data/ud2.3_tur/train.trial{trial_idx}.subset{n_train}.conllu \
            --dev data/ud2.3_tur/dev.conllu \
            --test data/ud2.3_tur/test.conllu \
            --checkpoint \
            --embed ""

Modify SuPar code and train the final layer on PTB

In [None]:
!cp supar_modified/models_dep_hlocalarc.py /usr/local/lib/python3.7/dist-packages/supar/models/dep.py
!cp supar_modified/parsers_dep_nolossineval.py /usr/local/lib/python3.7/dist-packages/supar/parsers/dep.py
!cp supar_modified/parsers_parser_freezeallbutlastlayer.py /usr/local/lib/python3.7/dist-packages/supar/parsers/parser.py
!cp supar_modified/utils_metric_bestuas.py /usr/local/lib/python3.7/dist-packages/supar/utils/metric.py

In [None]:
for trial_idx in range(5):
    for n_train in [10, 50, 100, 1000]:
        !cp ptb_biaffine_roberta_pretrainall_backup ptb_biaffine_roberta_pretrainall
        !python -u -m supar.cmds.biaffine_dep train -b -d 0 -c biaffine-dep-roberta-en -p ptb_biaffine_roberta_pretrainall \
            --train data/ptb_sd330/train.trial{trial_idx}.subset{n_train}.conllx  \
            --dev data/ptb_sd330/dev.conllx  \
            --test data/ptb_sd330/test.conllx  \
            --checkpoint \
            --encoder=bert  \
            --bert=roberta-large  \
            --lr=5e-5  \
            --lr-rate=20  \
            --epochs=30  \
            --update-steps=4
        !mv ptb_biaffine_roberta_pretrainall ptb_biaffine_roberta_pretrainall_trial{trial_idx}_subset{n_train}

Modify SuPar code to evaluate the models and get the datasets with pre-trained features for our DRO methods

In [None]:
!cp supar_modified/models_dep_getfeature.py /usr/local/lib/python3.7/dist-packages/supar/models/dep.py
!cp supar_modified/parsers_dep_getfeature.py /usr/local/lib/python3.7/dist-packages/supar/parsers/dep.py
!cp supar_modified/parsers_parser_freezeallbutlastlayer.py /usr/local/lib/python3.7/dist-packages/supar/parsers/parser.py
!cp supar_modified/utils_metric_bestuas.py /usr/local/lib/python3.7/dist-packages/supar/utils/metric.py

In [None]:
import numpy as np
import supar

test_metric_list = []
for n_train in [10, 50, 100, 1000]:
    for trial_idx in range(5):
        base_folder = 'data/ptb_sd330/'
        model = supar.Parser.load('ptb_biaffine_roberta_pretrainall_trial{}_subset{}'.format(trial_idx, n_train))
        set_names = ['train', 'dev', 'test']
        file_names = ['train.trial{}.subset{}'.format(trial_idx, n_train), 'dev', 'test']
        overall_data = {}
        for i in range(len(set_names)):
            file_path = base_folder + '{}.conllx'.format(file_names[i])
            _, cur_metric, processed_data_list = model.evaluate(file_path, verbose = True)
            if set_names[i] == 'test':
                test_metric_list.append(cur_metric)
            overall_data[set_names[i]] = processed_data_list
        np.save(base_folder + 'ptb_biaffine_roberta_pretrainall_trial{}_subset{}.npy'.format(trial_idx, n_train), overall_data)

for cur_metric in test_metric_list:
    print('{}'.format(cur_metric))

In [None]:
!cp data/ptb_sd330/*.npy drive/MyDrive/AdvTree/data/ptb_sd330/