Skip to content

Commit

Permalink
updates requirements and conda env to hopefully fix readthedocs build…
Browse files Browse the repository at this point in the history
… issueOC
  • Loading branch information
Justin Sybrandt committed Apr 29, 2020
1 parent ba1e5b1 commit a859a8d
Show file tree
Hide file tree
Showing 7 changed files with 115 additions and 164 deletions.
5 changes: 1 addition & 4 deletions agatha/ml/abstract_generator/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,14 +4,13 @@
from agatha.ml.abstract_generator.abstract_generator import AbstractGenerator
from agatha.ml.abstract_generator.generation_util import evaluate, name_thy_self
from agatha.ml.abstract_generator.path_util import get_paths
from agatha.ml.abstract_generator.prep_training_data import prep, extract_predicates
from pytorch_lightning import Trainer
from pytorch_lightning.callbacks import ModelCheckpoint
import torch


# Eval added as an alias for evaluate
MODES = ["train", "evaluate", "prep", "eval", "name", "extract_predicates"]
MODES = ["train", "evaluate", "prep", "eval", "name"]


def get_model_from_config(
Expand Down Expand Up @@ -94,5 +93,3 @@ def train(config:cpb.AbstractGeneratorConfig):
evaluate(config)
if config.mode == "name":
name_thy_self(config)
if config.mode == "extract_predicates":
extract_predicates(config)
95 changes: 0 additions & 95 deletions agatha/ml/abstract_generator/predicate_util.py

This file was deleted.

29 changes: 0 additions & 29 deletions agatha/ml/abstract_generator/prep_training_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@
from agatha.construct import dask_checkpoint, file_util
from agatha.ml.abstract_generator.misc_util import items_to_ordered_index
from agatha.ml.abstract_generator.path_util import get_paths
from agatha.ml.abstract_generator import predicate_util
from agatha.util.misc_util import Record
import random
import sentencepiece as spm
Expand All @@ -15,34 +14,6 @@
from typing import Iterable, Optional
from agatha.construct import dask_process_global as dpg

def extract_predicates(config:cpb.AbstractGeneratorConfig):
paths = get_paths(config)
dask_client = connect_to_dask_cluster(config)

preloader = dpg.WorkerPreloader()
preloader.register(*predicate_util.get_scispacy_initalizer(
config.predicate_spacy_model
))
preloader.register(*predicate_util.get_stopwordlist_initializer(
config.predicate_stopword_list
))
dpg.add_global_preloader(client=dask_client, preloader=preloader)

abstracts = file_util.load(
paths["checkpoint_dir"]
.joinpath("medline_documents")
)

predicates = abstracts.map_partitions(predicate_util.abstracts_to_predicates)
predicates = dask_checkpoint.checkpoint(
predicates,
name="predicates",
checkpoint_dir=paths["model_ckpt_dir"],
overwrite=False,
)
predicates.compute()


def prep(config:cpb.AbstractGeneratorConfig):
# all important paths
paths = get_paths(config)
Expand Down
7 changes: 7 additions & 0 deletions data_requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
# Spacy

# en_ner_bionlp13cg_md
https://s3-us-west-2.amazonaws.com/ai2-s2-scispacy/releases/v0.2.4/en_ner_bionlp13cg_md-0.2.4.tar.gz

# en_core_sci_lg
https://s3-us-west-2.amazonaws.com/ai2-s2-scispacy/releases/v0.2.4/en_core_sci_lg-0.2.4.tar.gz
102 changes: 102 additions & 0 deletions docs/environment.yml
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
name: agatha_readthedocs
channels:
- conda-forge
- defaults
dependencies:
- _libgcc_mutex=0.1=main
Expand All @@ -25,23 +26,107 @@ dependencies:
- xz=5.2.5=h7b6447c_0
- zlib=1.2.11=h7b6447c_3
- pip:
- absl-py==0.9.0
- alabaster==0.7.12
- attrs==19.3.0
- awscli==1.18.49
- babel==2.8.0
- bcrypt==3.1.7
- blis==0.4.1
- bokeh==2.0.2
- boto==2.49.0
- boto3==1.12.49
- botocore==1.15.49
- cachetools==4.1.0
- catalogue==1.0.0
- cffi==1.14.0
- chardet==3.0.4
- cloudpickle==1.4.1
- colorama==0.4.3
- commonmark==0.9.1
- conllu==2.3.2
- cryptography==2.9.2
- cycler==0.10.0
- cymem==2.0.3
- dask==2.15.0
- decorator==4.4.2
- distributed==2.15.1
- docutils==0.16
- faiss-cpu==1.6.3
- filelock==3.0.12
- fire==0.3.1
- fsspec==0.7.3
- future==0.18.2
- gensim==3.8.2
- google-api-core==1.17.0
- google-api-python-client==1.8.2
- google-auth==1.14.1
- google-auth-httplib2==0.0.3
- google-auth-oauthlib==0.4.1
- googleapis-common-protos==1.51.0
- grpcio==1.28.1
- h5py==2.10.0
- heapdict==1.0.1
- httplib2==0.17.3
- idna==2.9
- imagesize==1.2.0
- jinja2==2.11.2
- jmespath==0.9.5
- kiwisolver==1.2.0
- lxml==4.5.0
- markdown==3.2.1
- markupsafe==1.1.1
- matplotlib==3.2.1
- more-itertools==8.2.0
- msgpack==1.0.0
- murmurhash==1.0.2
- networkx==2.4
- nltk==3.5
- nmslib==2.0.6
- numpy==1.18.3
- oauth2client==4.1.3
- oauthlib==3.1.0
- packaging==20.3
- pandas==1.0.3
- paramiko==2.7.1
- pbr==5.4.5
- pillow==7.1.2
- plac==1.1.3
- pluggy==0.13.1
- preshed==3.0.2
- psutil==5.7.0
- py==1.8.1
- pyasn1==0.4.8
- pyasn1-modules==0.2.8
- pybind11==2.5.0
- pycparser==2.20
- pygments==2.6.1
- pygsheets==2.0.3.1
- pymongo==3.10.1
- pynacl==1.3.0
- pyparsing==2.4.7
- pysbd==0.2.3
- pysocks==1.7.1
- pytest==5.4.1
- python-dateutil==2.8.1
- pytorch-lightning==0.7.5
- pytz==2020.1
- recommonmark==0.6.0
- regex==2020.4.4
- requests==2.23.0
- requests-oauthlib==1.3.0
- rsa==4.0
- s3transfer==0.3.3
- sacremoses==0.0.41
- scikit-learn==0.22.2.post1
- scipy==1.4.1
- scispacy==0.2.4
- sentencepiece==0.1.86
- sklearn==0.0
- smart-open==2.0.0
- snowballstemmer==2.0.0
- sortedcontainers==2.1.0
- spacy==2.2.4
- sphinx==3.0.3
- sphinx-autodoc-typehints==1.10.3
- sphinx-rtd-theme==0.4.3
Expand All @@ -52,6 +137,23 @@ dependencies:
- sphinxcontrib-jsmath==1.0.1
- sphinxcontrib-qthelp==1.0.3
- sphinxcontrib-serializinghtml==1.1.4
- srsly==1.0.2
- tblib==1.6.0
- tensorboard==2.2.1
- tensorboard-plugin-wit==1.6.0.post3
- termcolor==1.1.0
- thinc==7.4.0
- tokenizers==0.5.2
- toolz==0.10.0
- torch==1.5.0
- tornado==6.0.4
- transformers==2.8.0
- typing-extensions==3.7.4.2
- uritemplate==3.0.1
- urllib3==1.25.9
- wasabi==0.6.0
- wcwidth==0.1.9
- werkzeug==1.0.1
- zict==2.0.0
prefix: /home/jsybran/anaconda3/envs/agatha_readthedocs

39 changes: 4 additions & 35 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,53 +4,22 @@
# pip install -r requirements.txt

################################################################################
# Required to Construct the Semantic Network ###################################
# Required to Construct the Semantic Network And Train Models ##################
################################################################################

faiss-cpu
fsspec
gensim
lxml
networkx
nltk
pandas
pygsheets
pymongo
spacy
transformers

# Required by the semmeddb conversion tool
pymysql

################################################################################
# Required for Topic Model Queries #############################################
################################################################################

gensim
networkx

################################################################################
# Required for Replication - Agatha ############################################
################################################################################

scispacy
spacy

# en_core_sci_lg
https://s3-us-west-2.amazonaws.com/ai2-s2-scispacy/releases/v0.2.4/en_core_sci_lg-0.2.4.tar.gz

################################################################################
# Required for Replication - CBAG ##############################################
################################################################################

git+https://github.com/hltcoe/PredPatt.git
scispacy
spacy
spacy_conll

# Used to store generated abstracts online
pygsheets
transformers

# en_ner_bionlp13cg_md
https://s3-us-west-2.amazonaws.com/ai2-s2-scispacy/releases/v0.2.4/en_ner_bionlp13cg_md-0.2.4.tar.gz

################################################################################
# Quality of Life ##############################################################
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,7 @@ def run(self):
"numpy",
"pandas",
"protobuf",
"pytorch-lightning",
"pytorch-lightning",
"sentencepiece", # needed to run CBAG
"sklearn", # needed to run Agatha predictor
"sqlitedict", # needed for CBAG
Expand Down

0 comments on commit a859a8d

Please sign in to comment.