generated from edobobo/p-lightning-template
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Update
WSDModel
, add MFS
baseline, add unit testing
- Loading branch information
1 parent
cf3dd21
commit a159f6f
Showing
35 changed files
with
1,999 additions
and
291 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,12 +1,53 @@ | ||
train_path: 'data/train.tsv' | ||
validation_path: 'data/validation.tsv' | ||
test_path: 'data/test.tsv' | ||
|
||
train_ds: 'semcor' | ||
semcor_data_path: 'data/WSD_Training_Corpora/SemCor/semcor.data.xml' | ||
semcor_key_path: 'data/WSD_Training_Corpora/SemCor/semcor.gold.key.txt' | ||
semcor_omsti_data_path: 'data/WSD_Training_Corpora/SemCor+OMSTI/semcor+omsti.data.xml' | ||
semcor_omsti_key_path: 'data/WSD_Training_Corpora/SemCor+OMSTI/semcor+omsti.gold.key.txt' | ||
|
||
batch_size: 16 | ||
num_workers: 1 | ||
train_path: "data/train.tsv" | ||
validation_path: "data/validation.tsv" | ||
test_path: "data/test.tsv" | ||
|
||
train_ds: "semcor" | ||
val_ds: "semeval2007" | ||
test_ds: "semeval2015" | ||
|
||
preprocessed_dir: "data/preprocessed/" | ||
force_preprocessing: False | ||
dump_preprocessed: True | ||
use_synset_vocab: True | ||
|
||
wordnet: | ||
glosses: "data/wordnet/means/glosses.json" | ||
lemma_means: "data/wordnet/means/lemma_means.json" | ||
lexeme_means: "data/wordnet/means/lexeme_means.json" | ||
sense_means: "data/wordnet/means/sense_means.json" | ||
|
||
corpora: | ||
semcor: | ||
data_path: "data/WSD_Training_Corpora/SemCor/semcor.data.xml" | ||
key_path: "data/WSD_Training_Corpora/SemCor/semcor.gold.key.txt" | ||
semcor+omsti: | ||
data_path: "data/WSD_Training_Corpora/SemCor+OMSTI/semcor+omsti.data.xml" | ||
key_path: "data/WSD_Training_Corpora/SemCor+OMSTI/semcor+omsti.gold.key.txt" | ||
omsti: | ||
data_path: "data/WSD_Training_Corpora/SemCor+OMSTI/semcor+omsti.data.xml" | ||
key_path: "data/WSD_Training_Corpora/SemCor+OMSTI/semcor+omsti.gold.key.txt" | ||
semeval_all: | ||
data_path: "data/WSD_Unified_Evaluation_Datasets/ALL/ALL.data.xml" | ||
key_path: "data/WSD_Unified_Evaluation_Datasets/ALL/ALL.gold.key.txt" | ||
semeval2007: | ||
data_path: "data/WSD_Unified_Evaluation_Datasets/semeval2007/semeval2007.data.xml" | ||
key_path: "data/WSD_Unified_Evaluation_Datasets/semeval2007/semeval2007.gold.key.txt" | ||
semeval2013: | ||
data_path: "data/WSD_Unified_Evaluation_Datasets/semeval2013/semeval2013.data.xml" | ||
key_path: "data/WSD_Unified_Evaluation_Datasets/semeval2013/semeval2013.gold.key.txt" | ||
semeval2015: | ||
data_path: "data/WSD_Unified_Evaluation_Datasets/semeval2015/semeval2015.data.xml" | ||
key_path: "data/WSD_Unified_Evaluation_Datasets/semeval2015/semeval2015.gold.key.txt" | ||
senseval2: | ||
data_path: "data/WSD_Unified_Evaluation_Datasets/senseval2/senseval2.data.xml" | ||
key_path: "data/WSD_Unified_Evaluation_Datasets/senseval2/senseval2.gold.key.txt" | ||
senseval3: | ||
data_path: "data/WSD_Unified_Evaluation_Datasets/senseval3/senseval3.data.xml" | ||
key_path: "data/WSD_Unified_Evaluation_Datasets/senseval3/senseval3.gold.key.txt" | ||
|
||
batch_size: 32 | ||
num_workers: 0 | ||
|
||
min_freq_senses: 1 | ||
allow_multiple_senses: False |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,10 @@ | ||
log: False | ||
|
||
wandb_logger: | ||
_target_: pytorch_lightning.loggers.WandbLogger | ||
entity: LeonardoEmili | ||
project: neural-wsd | ||
|
||
watch: | ||
log: 'all' | ||
log_freq: 100 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1 +1,25 @@ | ||
tokenizer: 'bert-base-cased' | ||
model_name: 'bert-base-cased' | ||
learning_rate: 1e-3 | ||
min_learning_rate: 1e-4 | ||
language_model_learning_rate: 1e-5 | ||
language_model_min_learning_rate: 1e-6 | ||
language_model_weight_decay: 1e-4 | ||
use_lemma_mask: False | ||
use_lexeme_mask: False | ||
|
||
word_encoder: | ||
_target_: src.layers.word_encoder.WordEncoder | ||
fine_tune: False | ||
word_dropout: 0.2 | ||
model_name: ${model.model_name} | ||
|
||
sequence_encoder: lstm | ||
lstm_encoder: | ||
_target_: torch.nn.LSTM | ||
input_size: 512 | ||
hidden_size: 256 | ||
bidirectional: True | ||
batch_first: True | ||
num_layers: 2 | ||
dropout: 0.40 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,9 +1,15 @@ | ||
# Required to make the "experiments" dir the default one for the output of the models | ||
hydra: | ||
run: | ||
dir: ./experiments/${train.model_name}/${now:%Y-%m-%d}/${now:%H-%M-%S} | ||
dir: ./experiments/${model.model_name}/${now:%Y-%m-%d}/${now:%H-%M-%S} | ||
|
||
# Debug mode | ||
debug: False | ||
max_samples: 1000 | ||
|
||
defaults: | ||
- train: default_train | ||
- model: default_model | ||
- data: default_data | ||
- logging: wandb_logging | ||
- test: default_test |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
checkpoint_path: <MODEL_CHECKPOINT_PATH> | ||
latest_checkpoint_path: experiments/bert-base-cased/2021-11-16/23-06-26/default_name/epoch=2-step=3485.ckpt | ||
use_latest: false |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,35 +1,33 @@ | ||
# reproducibility | ||
seed: 42 | ||
|
||
# model name | ||
model_name: default_name # used to name the directory in which model's checkpoints will be stored (experiments/model_name/...) | ||
# experiment name | ||
experiment_name: default_name | ||
|
||
# pl_trainer | ||
pl_trainer: | ||
_target_: pytorch_lightning.Trainer | ||
gpus: 1 | ||
accumulate_grad_batches: 4 | ||
gradient_clip_val: 10.0 | ||
val_check_interval: 1.0 # you can specify an int "n" here => validation every "n" steps | ||
max_steps: 100_000 | ||
# uncomment the lines below for training with mixed precision | ||
max_epochs: 20 | ||
fast_dev_run: False | ||
# precision: 16 | ||
# amp_level: O2 | ||
|
||
|
||
# early stopping callback | ||
# "early_stopping_callback: null" will disable early stopping | ||
early_stopping_callback: | ||
_target_: pytorch_lightning.callbacks.EarlyStopping | ||
monitor: val_loss | ||
mode: min | ||
monitor: val_f1_micro | ||
mode: max | ||
patience: 50 | ||
|
||
# model_checkpoint_callback | ||
# "model_checkpoint_callback: null" will disable model checkpointing | ||
model_checkpoint_callback: | ||
_target_: pytorch_lightning.callbacks.ModelCheckpoint | ||
monitor: val_loss | ||
mode: min | ||
monitor: val_f1_micro | ||
mode: max | ||
verbose: True | ||
save_top_k: 5 | ||
dirpath: experiments/${train.model_name} | ||
dirpath: ${train.experiment_name}/ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
[tool.black] | ||
line-length = 120 | ||
target-version = ['py36', 'py37', 'py38'] | ||
include = '\.pyi?$' |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,14 @@ | ||
{ | ||
"python.defaultInterpreterPath": "/root/miniconda3/envs/neural-wsd/bin/python", | ||
"python.formatting.provider": "black", | ||
"python.formatting.blackArgs": [ | ||
"--line-length", | ||
"120" | ||
], | ||
"files.exclude": { | ||
"**/.classpath": true, | ||
"**/.project": true, | ||
"**/.settings": true, | ||
"**/.factorypath": true | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,18 @@ | ||
#!/bin/bash | ||
|
||
# Downloads miniconda | ||
wget -q https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh | ||
sh Miniconda3-latest-Linux-x86_64.sh -b | ||
export PATH="/root/miniconda3/bin:${PATH}" | ||
conda init | ||
|
||
# Creates the environment | ||
echo "Creating the environment" | ||
source ~/miniconda3/etc/profile.d/conda.sh | ||
conda create -qyn neural-wsd python=3.9.7 | ||
conda activate neural-wsd | ||
pip install -r /content/neural-wsd/requirements.txt | ||
|
||
# Configure vscode and overwrite default settings | ||
code --install-extension ms-python.python | ||
cp /content/neural-wsd/src/colab/settings.json /root/.vscode-server/data/Machine/settings.json |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,102 @@ | ||
{ | ||
"cells": [ | ||
{ | ||
"cell_type": "markdown", | ||
"metadata": {}, | ||
"source": [ | ||
"# Google Colab + VSCode" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": { | ||
"colab": { | ||
"base_uri": "https://localhost:8080/" | ||
}, | ||
"id": "yxChFURdSJfQ", | ||
"outputId": "09dde9a4-528c-4557-e1b2-2f4a7d0a0578" | ||
}, | ||
"outputs": [], | ||
"source": [ | ||
"!nvidia-smi" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": { | ||
"colab": { | ||
"base_uri": "https://localhost:8080/", | ||
"height": 381 | ||
}, | ||
"id": "aqXACjLhFX1C", | ||
"outputId": "ffce36c3-93e9-41b1-90f7-4211ffbfd122" | ||
}, | ||
"outputs": [], | ||
"source": [ | ||
"!pip install -q colab_ssh python-dotenv --upgrade\n", | ||
"\n", | ||
"copy_env_from_gdrive = False\n", | ||
"if copy_env_from_gdrive:\n", | ||
" from google.colab import drive\n", | ||
"\n", | ||
" drive.mount(\"/content/drive\")\n", | ||
"\n", | ||
"import os\n", | ||
"from dotenv import load_dotenv\n", | ||
"\n", | ||
"load_dotenv()\n", | ||
"\n", | ||
"from colab_ssh import launch_ssh_cloudflared, init_git_cloudflared\n", | ||
"\n", | ||
"launch_ssh_cloudflared(password=os.getenv(\"CLOUDFLARED_PASSWORD\"))\n", | ||
"\n", | ||
"init_git_cloudflared(\n", | ||
" repository_url=os.getenv(\"GITHUB_REPO_URL\"),\n", | ||
" personal_token=os.getenv(\"GITHUB_PERSONAL_ACCESS_TOKEN\"),\n", | ||
" branch=os.getenv(\"GITHUB_BRANCH\"),\n", | ||
" email=os.getenv(\"GITHUB_EMAIL\"),\n", | ||
" username=os.getenv(\"GITHUB_USERNAME\"),\n", | ||
")" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": { | ||
"colab": { | ||
"base_uri": "https://localhost:8080/" | ||
}, | ||
"id": "-tTM1EMGJH0z", | ||
"outputId": "8839b443-f0a6-4cc8-fea9-180b0b34002c" | ||
}, | ||
"outputs": [], | ||
"source": [ | ||
"# Install dependecies and configure bash\n", | ||
"%%bash\n", | ||
"source neural-wsd/src/colab/setup.sh\n", | ||
"echo \"cd /content/neural-wsd/\" >> ~/.bashrc**\n", | ||
"echo \"source ~/miniconda3/etc/profile.d/conda.sh\" >> ~/.bashrc**\n", | ||
"echo \"conda activate neural-wsd\" >> ~/.bashrc**" | ||
] | ||
} | ||
], | ||
"metadata": { | ||
"accelerator": "GPU", | ||
"colab": { | ||
"collapsed_sections": [], | ||
"name": "setup_colab.ipynb", | ||
"provenance": [] | ||
}, | ||
"kernelspec": { | ||
"display_name": "Python 3", | ||
"name": "python3" | ||
}, | ||
"language_info": { | ||
"name": "python" | ||
} | ||
}, | ||
"nbformat": 4, | ||
"nbformat_minor": 0 | ||
} |
Oops, something went wrong.