lrec2024.bib

@inproceedings{agrawal_large_2022,
  title = {Large {{Language Models}} Are {{Few-Shot Clinical Information Extractors}}},
  booktitle = {Proceedings of the 2022 {{Conference}} on {{Empirical Methods}} in {{Natural Language Processing}}},
  author = {Agrawal, Monica and Hegselmann, Stefan and Lang, Hunter and Kim, Yoon and Sontag, David},
  year = {2022},
  pages = {1998--2022},
  abstract = {A long-running goal of the clinical NLP community is the extraction of important variables trapped in clinical notes. However, roadblocks have included dataset shift from the general domain and a lack of public clinical corpora and annotations. In this work, we show that large language models, such as InstructGPT (Ouyang et al., 2022), perform well at zero- and few-shot information extraction from clinical text despite not being trained specifically for the clinical domain. Whereas text classification and generation performance have already been studied extensively in such models, here we additionally demonstrate how to leverage them to tackle a diverse set of NLP tasks which require more structured outputs, including span identification, token-level sequence classification, and relation extraction. Further, due to the dearth of available data to evaluate these systems, we introduce new datasets for benchmarking fewshot clinical information extraction based on a manual re-annotation of the CASI dataset (Moon et al., 2014) for new tasks1. On the clinical extraction tasks we studied, the GPT-3 systems significantly outperform existing zeroand few-shot baselines.},
  langid = {english},
  file = {/home/lisa/Zotero/storage/SH8S6AFF/Agrawal et al. - Large Language Models are Few-Shot Clinical Inform.pdf}
}

@article{doi:10.1080/13691180801946150,
author = {Eszter Hargittai and Gina Walejko},
title = {THE PARTICIPATION DIVIDE: Content creation and sharing in the digital age1 },
journal = {Information, Communication \& Society},
volume = {11},
number = {2},
pages = {239-256},
year = {2008},
publisher = {Routledge},
doi = {10.1080/13691180801946150},
}

@inproceedings{wagner2015s,
  title={It's a man's Wikipedia? Assessing gender inequality in an online encyclopedia},
  author={Wagner, Claudia and Garcia, David and Jadidi, Mohsen and Strohmaier, Markus},
  booktitle={Proceedings of the international AAAI conference on web and social media},
  volume={9},
  number={1},
  pages={454--463},
  year={2015}
}


@inproceedings{alimova_machine_2017,
  title = {A Machine Learning Approach to Classification of Drug Reviews in {{Russian}}},
  booktitle = {2017 {{Ivannikov ISPRAS Open Conference}} ({{ISPRAS}})},
  author = {Alimova, Ilseyar and Tutubalina, Elena and Alferova, Julia and Gafiyatullina, Guzel},
  year = {2017},
  month = nov,
  pages = {64--69},
  doi = {10.1109/ISPRAS.2017.00018},
  abstract = {The automatic extraction of drug side effects from social media has gained popularity in pharmacovigilance. Information extraction methods tailored to medical subjects are essential for the task of drug repurposing and finding drug reactions. In this article, we focus on extracting information about side effects and symptoms in users' reviews about medications in Russian. We manually develop a real-world dataset by crawling user reviews from a health-related website and annotate a set of reviews on a sentence level. The paper addresses the classification problem with more than two classes, comparing a simple bag-of-words baseline and a feature-rich machine learning approach.},
  keywords = {Clinical trials,Diseases,drug side effects,Drugs,machine learning,Russian,social media,Social network services,Speech,Support vector machines,Task analysis,text mining},
  file = {/home/lisa/Zotero/storage/ND9NSTRZ/8273300.html;/home/lisa/Zotero/storage/XNPDPJRN/8273300.html}
}

@article{arase_annotation_2020,
  title = {Annotation of Adverse Drug Reactions in Patients' {{Weblogs}}},
  author = {Arase, Yuki and Kajiwara, Tomoyuki and Chu, Chenhui},
  year = {2020},
  journal = {Proceedings of the 12th Conference on Language Resources and Evaluation (LREC 2020)},
  pages = {6769--6776},
  abstract = {Adverse drug reactions are a severe problem that significantly degrade quality of life, or even threaten the life of patients. Patientgenerated texts available on the web have been gaining attention as a promising source of information in this regard. While previous studies annotated such patient-generated content, they only reported on limited information, such as whether a text described an adverse drug reaction or not. Further, they only annotated short texts of a few sentences crawled from online forums and social networking services. The dataset we present in this paper is unique for the richness of annotated information, including detailed descriptions of drug reactions with full context. We crawled patient's weblog articles shared on an online patient-networking platform and annotated the effects of drugs therein reported. We identified spans describing drug reactions and assigned labels for related drug names, standard codes for the symptoms of the reactions, and types of effects. As a first dataset, we annotated 677 drug reactions with these detailed labels based on 169 weblog articles by Japanese lung cancer patients. Our annotation dataset is made publicly available for further research on the detection of adverse drug reactions and more broadly, on patient-generated text processing.},
  langid = {english},
  file = {/home/lisa/Zotero/storage/2TBGNTVR/Arase et al. - Annotation of Adverse Drug Reactions in Patients' .pdf}
}

@inproceedings{bahdanau_neural_2015,
  title = {Neural {{Machine Translation}} by {{Jointly Learning}} to {{Align}} and {{Translate}}},
  booktitle = {3rd {{International Conference}} on {{Learning Representations}}, {{ICLR}} 2015, {{San Diego}}, {{CA}}, {{USA}}, {{May}} 7-9, 2015, {{Conference Track Proceedings}}},
  author = {Bahdanau, Dzmitry and Cho, Kyunghyun and Bengio, Yoshua},
  editor = {Bengio, Yoshua and LeCun, Yann},
  year = {2015},
  urldate = {2023-08-04}
}

@inproceedings{baldini_soares_matching_2019,
  title = {Matching the {{Blanks}}: {{Distributional Similarity}} for {{Relation Learning}}},
  shorttitle = {Matching the {{Blanks}}},
  booktitle = {Proceedings of the 57th {{Annual Meeting}} of the {{Association}} for {{Computational Linguistics}}},
  author = {Baldini Soares, Livio and FitzGerald, Nicholas and Ling, Jeffrey and Kwiatkowski, Tom},
  year = {2019},
  pages = {2895--2905},
  publisher = {{Association for Computational Linguistics}},
  address = {{Florence, Italy}},
  doi = {10.18653/v1/P19-1279},
  urldate = {2023-10-10},
  abstract = {General purpose relation extractors, which can model arbitrary relations, are a core aspiration in information extraction. Efforts have been made to build general purpose extractors that represent relations with their surface forms, or which jointly embed surface forms with relations from an existing knowledge graph. However, both of these approaches are limited in their ability to generalize. In this paper, we build on extensions of Harris' distributional hypothesis to relations, as well as recent advances in learning text representations (specifically, BERT), to build task agnostic relation representations solely from entity-linked text. We show that these representations significantly outperform previous work on exemplar based relation extraction (FewRel) even without using any of that task's training data. We also show that models initialized with our task agnostic representations, and then tuned on supervised relation extraction datasets, significantly outperform the previous methods on SemEval 2010 Task 8, KBP37, and TACRED.},
  langid = {english},
  file = {/home/lisa/Zotero/storage/LBFSWEJR/Baldini Soares et al. - 2019 - Matching the Blanks Distributional Similarity for.pdf}
}

@inproceedings{baldini_soares_matching_2019-1,
  title = {Matching the {{Blanks}}: {{Distributional Similarity}} for {{Relation Learning}}},
  shorttitle = {Matching the {{Blanks}}},
  booktitle = {Proceedings of the 57th {{Annual Meeting}} of the {{Association}} for {{Computational Linguistics}}},
  author = {Baldini Soares, Livio and FitzGerald, Nicholas and Ling, Jeffrey and Kwiatkowski, Tom},
  year = {2019},
  month = jul,
  pages = {2895--2905},
  publisher = {{Association for Computational Linguistics}},
  address = {{Florence, Italy}},
  doi = {10.18653/v1/P19-1279},
  urldate = {2023-10-19},
  abstract = {General purpose relation extractors, which can model arbitrary relations, are a core aspiration in information extraction. Efforts have been made to build general purpose extractors that represent relations with their surface forms, or which jointly embed surface forms with relations from an existing knowledge graph. However, both of these approaches are limited in their ability to generalize. In this paper, we build on extensions of Harris' distributional hypothesis to relations, as well as recent advances in learning text representations (specifically, BERT), to build task agnostic relation representations solely from entity-linked text. We show that these representations significantly outperform previous work on exemplar based relation extraction (FewRel) even without using any of that task's training data. We also show that models initialized with our task agnostic representations, and then tuned on supervised relation extraction datasets, significantly outperform the previous methods on SemEval 2010 Task 8, KBP37, and TACRED},
  file = {/home/lisa/Zotero/storage/VRBT2AMB/Baldini Soares et al. - 2019 - Matching the Blanks Distributional Similarity for.pdf}
}

@article{bojanowski_enriching_2017-1,
  title = {Enriching {{Word Vectors}} with {{Subword Information}}},
  author = {Bojanowski, Piotr and Grave, Edouard and Joulin, Armand and Mikolov, Tomas},
  year = {2017},
  journal = {Transactions of the Association for Computational Linguistics},
  volume = {5},
  pages = {135--146},
  publisher = {{MIT Press}},
  address = {{Cambridge, MA}},
  doi = {10.1162/tacl_a_00051},
  urldate = {2023-10-17},
  abstract = {Continuous word representations, trained on large unlabeled corpora are useful for many natural language processing tasks. Popular models that learn such representations ignore the morphology of words, by assigning a distinct vector to each word. This is a limitation, especially for languages with large vocabularies and many rare words. In this paper, we propose a new approach based on the skipgram model, where each word is represented as a bag of character n-grams. A vector representation is associated to each character n-gram; words being represented as the sum of these representations. Our method is fast, allowing to train models on large corpora quickly and allows us to compute word representations for words that did not appear in the training data. We evaluate our word representations on nine different languages, both on word similarity and analogy tasks. By comparing to recently proposed morphological word representations, we show that our vectors achieve state-of-the-art performance on these tasks.},
  file = {/home/lisa/Zotero/storage/339U2VCL/Bojanowski et al. - 2017 - Enriching Word Vectors with Subword Information.pdf}
}

@inproceedings{chen_multilingual_2022,
  title = {Multilingual {{Relation Classification}} via {{Efficient}} and {{Effective Prompting}}},
  booktitle = {Proceedings of the 2022 {{Conference}} on {{Empirical Methods}} in {{Natural Language Processing}}},
  author = {Chen, Yuxuan and Harbecke, David and Hennig, Leonhard},
  year = {2022},
  month = dec,
  pages = {1059--1075},
  publisher = {{Association for Computational Linguistics}},
  address = {{Abu Dhabi, United Arab Emirates}},
  urldate = {2023-07-03},
  abstract = {Prompting pre-trained language models has achieved impressive performance on various NLP tasks, especially in low data regimes. Despite the success of prompting in monolingual settings, applying prompt-based methods in multilingual scenarios has been limited to a narrow set of tasks, due to the high cost of handcrafting multilingual prompts. In this paper, we present the first work on prompt-based multilingual relation classification (RC), by introducing an efficient and effective method that constructs prompts from relation triples and involves only minimal translation for the class labels. We evaluate its performance in fully supervised, few-shot and zero-shot scenarios, and analyze its effectiveness across 14 languages, prompt variants, and English-task training in cross-lingual settings. We find that in both fully supervised and few-shot scenarios, our prompt method beats competitive baselines: fine-tuning XLM-R\_EM and null prompts. It also outperforms the random baseline by a large margin in zero-shot experiments. Our method requires little in-language knowledge and can be used as a strong baseline for similar multilingual classification tasks.},
  file = {/home/lisa/Zotero/storage/ZGYSBNX2/Chen et al. - 2022 - Multilingual Relation Classification via Efficient.pdf}
}

@inproceedings{chowdhury_multi-task_2018,
  title = {Multi-{{Task Pharmacovigilance Mining}} from {{Social Media Posts}}},
  booktitle = {Proceedings of the 2018 {{World Wide Web Conference}}},
  author = {Chowdhury, Shaika and Zhang, Chenwei and Yu, Philip S.},
  year = {2018},
  month = apr,
  series = {{{WWW}} '18},
  pages = {117--126},
  publisher = {{International World Wide Web Conferences Steering Committee}},
  address = {{Republic and Canton of Geneva, CHE}},
  doi = {10.1145/3178876.3186053},
  urldate = {2023-10-10},
  abstract = {Social media has grown to be a crucial information source for pharmacovigilance studies where an increasing number of people post adverse reactions to medical drugs that are previously unreported. Aiming to effectively monitor various aspects of Adverse Drug Reactions (ADRs) from diversely expressed social medical posts, we propose a multi-task neural network framework that learns several tasks associated with ADR monitoring with different levels of supervisions collectively. Besides being able to correctly classify ADR posts and accurately extract ADR mentions from online posts, the proposed framework is also able to further understand reasons for which the drug is being taken, known as \guillemotright indications\guillemotright, from the given social media post. A coverage-based attention mechanism is adopted in our framework to help the model properly identify \guillemotright phrasal\guillemotright{} ADRs and Indications that are attentive to multiple words in a post. Our framework is applicable in situations where limited parallel data for different pharmacovigilance tasks are available. We evaluate the proposed framework on real-world Twitter datasets, where the proposed model outperforms the state-of-the-art alternatives of each individual task consistently.},
  isbn = {978-1-4503-5639-8},
  keywords = {adverse drug reaction,attention mechanism,coverage,multi-task learning,pharmacovigilance,recurrent neural network,social media},
  file = {/home/lisa/Zotero/storage/9KLA9G49/Chowdhury et al. - 2018 - Multi-Task Pharmacovigilance Mining from Social Me.pdf}
}

@inproceedings{conneau_unsupervised_2020,
  title = {Unsupervised Cross-Lingual Representation Learning at Scale},
  booktitle = {Proceedings of the 58th {{Annual Meeting}} of the {{Association}} for {{Computational Linguistics}}},
  author = {Conneau, Alexis and Khandelwal, Kartikay and Goyal, Naman and Chaudhary, Vishrav and Wenzek, Guillaume and Guzm{\'a}n, Francisco and Grave, Edouard and Ott, Myle and Zettlemoyer, Luke and Stoyanov, Veselin},
  year = {2020},
  pages = {8440--8451},
  publisher = {{Association for Computational Linguistics}},
  address = {{Online}},
  doi = {10.18653/v1/2020.acl-main.747},
  langid = {english},
  keywords = {XLM-RoBERTa},
  file = {/home/lisa/Zotero/storage/5YT4WCZV/Conneau et al. - 2020 - Unsupervised Cross-lingual Representation Learning.pdf}
}

@inproceedings{dai_effective_2020-2,
  title = {An {{Effective Transition-based Model}} for {{Discontinuous NER}}},
  booktitle = {Proceedings of the 58th {{Annual Meeting}} of the {{Association}} for {{Computational Linguistics}}},
  author = {Dai, Xiang and Karimi, Sarvnaz and Hachey, Ben and Paris, Cecile},
  year = {2020},
  month = jul,
  pages = {5860--5870},
  publisher = {{Association for Computational Linguistics}},
  address = {{Online}},
  doi = {10.18653/v1/2020.acl-main.520},
  urldate = {2023-10-19},
  abstract = {Unlike widely used Named Entity Recognition (NER) data sets in generic domains, biomedical NER data sets often contain mentions consisting of discontinuous spans. Conventional sequence tagging techniques encode Markov assumptions that are efficient but preclude recovery of these mentions. We propose a simple, effective transition-based model with generic neural encoding for discontinuous NER. Through extensive experiments on three biomedical data sets, we show that our model can effectively recognize discontinuous mentions without sacrificing the accuracy on continuous mentions.},
  file = {/home/lisa/Zotero/storage/I4VVADGM/Dai et al. - 2020 - An Effective Transition-based Model for Discontinu.pdf}
}

@article{denck_machine-learning-based_2023,
  title = {Machine-Learning-Based Adverse Drug Event Prediction from Observational Health Data: A Review},
  shorttitle = {Machine-Learning-Based Adverse Drug Event Prediction from Observational Health Data},
  author = {Denck, Jonas and Ozkirimli, Elif and Wang, Ken},
  year = {2023},
  month = jul,
  journal = {Drug Discovery Today},
  pages = {103715},
  issn = {1359-6446},
  doi = {10.1016/j.drudis.2023.103715},
  urldate = {2023-07-21},
  abstract = {Adverse drug events (ADEs) are responsible for a significant number of hospital admissions and fatalities. Machine learning models have been developed to assess individual patient risk of having an ADE. In this article, we have reviewed studies addressing the prediction of ADEs in observational health data with machine learning. The field of individualised ADE prediction is rapidly emerging through the increasing availability of additional data modalities (e.g., genetic data, screening data, wearables data) and advanced deep learning models such as transformers. Consequently, personalised adverse drug event predictions are becoming more feasible and tangible.},
  langid = {english},
  keywords = {adverse drug event,electronic health record,Machine learning,prediction model},
  file = {/home/lisa/Zotero/storage/97JQWL8Z/S1359644623002313.html}
}

@inproceedings{devlin_bert_2019,
  title = {{{BERT}}: {{Pre-training}} of {{Deep Bidirectional Transformers}} for {{Language Understanding}}},
  shorttitle = {{{BERT}}},
  booktitle = {Proceedings of the 2019 {{Conference}} of the {{North American Chapter}} of the {{Association}} for {{Computational Linguistics}}: {{Human Language Technologies}}},
  author = {Devlin, Jacob and Chang, Ming-Wei and Lee, Kenton and Toutanova, Kristina},
  year = {2019},
  publisher = {{Association for Computational Linguistics}},
  address = {{Minneapolis, Minnesota, USA}},
  doi = {10.18653/v1/n19-1423},
  abstract = {We introduce a new language representation model called BERT, which stands for Bidirectional Encoder Representations from Transformers. Unlike recent language representation models, BERT is designed to pre-train deep bidirectional representations from unlabeled text by jointly conditioning on both left and right context in all layers. As a result, the pre-trained BERT model can be fine-tuned with just one additional output layer to create state-of-the-art models for a wide range of tasks, such as question answering and language inference, without substantial task-specific architecture modifications.  BERT is conceptually simple and empirically powerful. It obtains new state-of-the-art results on eleven natural language processing tasks, including pushing the GLUE score to 80.5\% (7.7\% point absolute improvement), MultiNLI accuracy to 86.7\% (4.6\% absolute improvement), SQuAD v1.1 question answering Test F1 to 93.2 (1.5 point absolute improvement) and SQuAD v2.0 Test F1 to 83.1 (5.1 point absolute improvement).},
  keywords = {annotated},
  file = {/home/lisa/Zotero/storage/62SGE433/Devlin et al. - 2019 - BERT Pre-training of Deep Bidirectional Transform.pdf}
}

@inproceedings{dirkson_fuzzybio_2021-1,
  title = {{{FuzzyBIO}}: {{A Proposal}} for {{Fuzzy Representation}} of {{Discontinuous Entities}}},
  shorttitle = {{{FuzzyBIO}}},
  booktitle = {Proceedings of the 12th {{International Workshop}} on {{Health Text Mining}} and {{Information Analysis}}},
  author = {Dirkson, Anne and Verberne, Suzan and Kraaij, Wessel},
  year = {2021},
  month = apr,
  pages = {77--82},
  publisher = {{Association for Computational Linguistics}},
  address = {{online}},
  urldate = {2023-10-19},
  abstract = {Discontinuous entities pose a challenge to named entity recognition (NER). These phenomena occur commonly in the biomedical domain. As a solution, expansions of the BIO representation scheme that can handle these entity types are commonly used (i.e. BIOHD). However, the extra tag types make the NER task more difficult to learn. In this paper we propose an alternative; a fuzzy continuous BIO scheme (FuzzyBIO). We focus on the task of Adverse Drug Response extraction and normalization to compare FuzzyBIO to BIOHD. We find that FuzzyBIO improves recall of NER for two of three data sets and results in a higher percentage of correctly identified disjoint and composite entities for all data sets. Using FuzzyBIO also improves end-to-end performance for continuous and composite entities in two of three data sets. Since FuzzyBIO improves performance for some data sets and the conversion from BIOHD to FuzzyBIO is straightforward, we recommend investigating which is more effective for any data set containing discontinuous entities.},
  file = {/home/lisa/Zotero/storage/TMLJMR9N/Dirkson et al. - 2021 - FuzzyBIO A Proposal for Fuzzy Representation of D.pdf}
}

@article{edwards_adverse_2000-1,
  title = {Adverse Drug Reactions: Definitions, Diagnosis, and Management},
  shorttitle = {Adverse Drug Reactions},
  author = {Edwards, I. Ralph and Aronson, Jeffrey K.},
  year = {2000},
  month = oct,
  journal = {The Lancet},
  volume = {356},
  number = {9237},
  pages = {1255--1259},
  publisher = {{Elsevier}},
  issn = {0140-6736, 1474-547X},
  doi = {10.1016/S0140-6736(00)02799-9},
  urldate = {2023-04-04},
  langid = {english},
  pmid = {11072960}
}

@article{eronen_zero-shot_2023,
  title = {Zero-Shot Cross-Lingual Transfer Language Selection Using Linguistic Similarity},
  author = {Eronen, Juuso and Ptaszynski, Michal and Masui, Fumito},
  year = {2023},
  month = may,
  journal = {Information Processing \& Management},
  volume = {60},
  number = {3},
  pages = {103250},
  issn = {0306-4573},
  doi = {10.1016/j.ipm.2022.103250},
  urldate = {2023-07-03},
  abstract = {We study the selection of transfer languages for different Natural Language Processing tasks, specifically sentiment analysis, named entity recognition and dependency parsing. In order to select an optimal transfer language, we propose to utilize different linguistic similarity metrics to measure the distance between languages and make the choice of transfer language based on this information instead of relying on intuition. We demonstrate that linguistic similarity correlates with cross-lingual transfer performance for all of the proposed tasks. We also show that there is a statistically significant difference in choosing the optimal language as the transfer source instead of English. This allows us to select a more suitable transfer language which can be used to better leverage knowledge from high-resource languages in order to improve the performance of language applications lacking data. For the study, we used datasets from eight different languages from three language families.},
  langid = {english},
  keywords = {Language similarity,Linguistics,Multilingual natural language processing,Transfer learning,Zero-shot learning},
  file = {/home/lisa/Zotero/storage/83FJM6HK/Eronen et al. - 2023 - Zero-shot cross-lingual transfer language selectio.pdf;/home/lisa/Zotero/storage/I7GSLVBQ/S030645732200351X.html}
}

@article{feng_dkade_2023,
  title = {{{DKADE}}: A Novel Framework Based on Deep Learning and Knowledge Graph for Identifying Adverse Drug Events and Related Medications},
  shorttitle = {{{DKADE}}},
  author = {Feng, Ze-Ying and Wu, Xue-Hong and Ma, Jun-Long and Li, Min and He, Ge-Fei and Cao, Dong-Sheng and Yang, Guo-Ping},
  year = {2023},
  month = jun,
  journal = {Briefings in Bioinformatics},
  pages = {bbad228},
  issn = {1477-4054},
  doi = {10.1093/bib/bbad228},
  abstract = {Adverse drug events (ADEs) are common in clinical practice and can cause significant harm to patients and increase resource use. Natural language processing (NLP) has been applied to automate ADE detection, but NLP systems become less adaptable when drug entities are missing or multiple medications are specified in clinical narratives. Additionally, no Chinese-language NLP system has been developed for ADE detection due to the complexity of Chinese semantics, despite ˃10 million cases of drug-related adverse events occurring annually in China. To address these challenges, we propose DKADE, a deep learning and knowledge graph-based framework for identifying ADEs. DKADE infers missing drug entities and evaluates their correlations with ADEs by combining medication orders and existing drug knowledge. Moreover, DKADE can automatically screen for new adverse drug reactions. Experimental results show that DKADE achieves an overall F1-score value of 91.13\%. Furthermore, the adaptability of DKADE is validated using real-world external clinical data. In summary, DKADE is a powerful tool for studying drug safety and automating adverse event monitoring.},
  langid = {english},
  pmid = {37344167},
  keywords = {adverse drug events,Chinese natural language processing,deep learning,knowledge graph}
}

@inproceedings{gencoglu_sentence_2020,
  title = {Sentence {{Transformers}} and {{Bayesian Optimization}} for {{Adverse Drug Effect Detection}} from {{Twitter}}},
  booktitle = {Proceedings of the {{Fifth Social Media Mining}} for {{Health Applications Workshop}} \& {{Shared Task}}},
  author = {Gencoglu, Oguzhan},
  year = {2020},
  month = dec,
  pages = {161--164},
  publisher = {{Association for Computational Linguistics}},
  address = {{Barcelona, Spain (Online)}},
  urldate = {2023-06-04},
  abstract = {This paper describes our approach for detecting adverse drug effect mentions on Twitter as part of the Social Media Mining for Health Applications (SMM4H) 2020, Shared Task 2. Our approach utilizes multilingual sentence embeddings (sentence-BERT) for representing tweets and Bayesian hyperparameter optimization of sample weighting parameter for counterbalancing high class imbalance.},
  file = {/home/lisa/Zotero/storage/DIUTTCGL/Gencoglu - 2020 - Sentence Transformers and Bayesian Optimization fo.pdf}
}

@book{gonzalez-hernandez_proceedings_2020,
  title = {Proceedings of the {{Fifth Social Media Mining}} for {{Health Applications Workshop}} \& {{Shared Task}}},
  editor = {{Gonzalez-Hernandez}, Graciela and Klein, Ari Z. and Flores, Ivan and Weissenbacher, Davy and Magge, Arjun and O'Connor, Karen and Sarker, Abeed and Minard, Anne-Lyse and Tutubalina, Elena and Miftahutdinov, Zulfat and Alimova, Ilseyar},
  year = {2020},
  month = dec,
  publisher = {{Association for Computational Linguistics}},
  address = {{Barcelona, Spain (Online)}},
  urldate = {2023-03-09},
  file = {/home/lisa/Zotero/storage/UHIHJRPY/Gonzalez-Hernandez et al. - 2020 - Proceedings of the Fifth Social Media Mining for H.pdf}
}

@misc{gu_distilling_2023,
  title = {Distilling {{Large Language Models}} for {{Biomedical Knowledge Extraction}}: {{A Case Study}} on {{Adverse Drug Events}}},
  shorttitle = {Distilling {{Large Language Models}} for {{Biomedical Knowledge Extraction}}},
  author = {Gu, Yu and Zhang, Sheng and Usuyama, Naoto and Woldesenbet, Yonas and Wong, Cliff and Sanapathi, Praneeth and Wei, Mu and Valluri, Naveen and Strandberg, Erika and Naumann, Tristan and Poon, Hoifung},
  year = {2023},
  month = jul,
  number = {arXiv:2307.06439},
  eprint = {2307.06439},
  primaryclass = {cs},
  publisher = {{arXiv}},
  doi = {10.48550/arXiv.2307.06439},
  urldate = {2023-07-18},
  abstract = {Large language models (LLMs), such as GPT-4, have demonstrated remarkable capabilities across a wide range of tasks, including health applications. In this paper, we study how LLMs can be used to scale biomedical knowledge curation. We find that while LLMs already possess decent competency in structuring biomedical text, by distillation into a task-specific student model through self-supervised learning, substantial gains can be attained over out-of-box LLMs, with additional advantages such as cost, efficiency, and white-box model access. We conduct a case study on adverse drug event (ADE) extraction, which is an important area for improving care. On standard ADE extraction evaluation, a GPT-3.5 distilled PubMedBERT model attained comparable accuracy as supervised state-of-the-art models without using any labeled data. Despite being over 1,000 times smaller, the distilled model outperformed its teacher GPT-3.5 by over 6 absolute points in F1 and GPT-4 by over 5 absolute points. Ablation studies on distillation model choice (e.g., PubMedBERT vs BioGPT) and ADE extraction architecture shed light on best practice for biomedical knowledge extraction. Similar gains were attained by distillation for other standard biomedical knowledge extraction tasks such as gene-disease associations and protected health information, further illustrating the promise of this approach.},
  archiveprefix = {arxiv},
  keywords = {Computer Science - Artificial Intelligence,Computer Science - Computation and Language},
  file = {/home/lisa/Zotero/storage/AC5H9QKJ/Gu et al. - 2023 - Distilling Large Language Models for Biomedical Kn.pdf;/home/lisa/Zotero/storage/X2IY8SYQ/2307.html}
}

@article{hazell_under-reporting_2006,
  title = {Under-Reporting of Adverse Drug Reactions : A Systematic Review},
  shorttitle = {Under-Reporting of Adverse Drug Reactions},
  author = {Hazell, Lorna and Shakir, Saad A. W.},
  year = {2006},
  journal = {Drug Safety},
  volume = {29},
  number = {5},
  pages = {385--396},
  issn = {0114-5916},
  doi = {10.2165/00002018-200629050-00003},
  abstract = {The purpose of this review was to estimate the extent of under-reporting of adverse drug reactions (ADRs) to spontaneous reporting systems and to investigate whether there are differences between different types of ADRs. A systematic literature search was carried out to identify studies providing a numerical estimate of under-reporting. Studies were included regardless of the methodology used or the setting, e.g. hospital versus general practice. Estimates of under-reporting were either extracted directly from the published study or calculated from the study data. These were expressed as the percentage of ADRs detected from intensive data collection that were not reported to the relevant local, regional or national spontaneous reporting systems. The median under-reporting rate was calculated across all studies and within subcategories of studies using different methods or settings. In total, 37 studies using a wide variety of surveillance methods were identified from 12 countries. These generated 43 numerical estimates of under-reporting. The median under-reporting rate across the 37 studies was 94\% (interquartile range 82-98\%). There was no significant difference in the median under-reporting rates calculated for general practice and hospital-based studies. Five of the ten general practice studies provided evidence of a higher median under-reporting rate for all ADRs compared with more serious or severe ADRs (95\% and 80\%, respectively). In comparison, for five of the eight hospital-based studies the median under-reporting rate for more serious or severe ADRs remained high (95\%). The median under-reporting rate was lower for 19 studies investigating specific serious/severe ADR-drug combinations but was still high at 85\%. This systematic review provides evidence of significant and widespread under-reporting of ADRs to spontaneous reporting systems including serious or severe ADRs. Further work is required to assess the impact of under-reporting on public health decisions and the effects of initiatives to improve reporting such as internet reporting, pharmacist/nurse reporting and direct patient reporting as well as improved education and training of healthcare professionals.},
  langid = {english},
  pmid = {16689555},
  keywords = {Adverse Drug Reaction Reporting Systems,Drug Monitoring,Drug-Related Side Effects and Adverse Reactions,Humans}
}

@article{kamba_medical_2021,
  title = {Medical {{Needs Extraction}} for {{Breast Cancer Patients}} from {{Question}} and {{Answer Services}}: {{Natural Language Processing-Based Approach}}},
  shorttitle = {Medical {{Needs Extraction}} for {{Breast Cancer Patients}} from {{Question}} and {{Answer Services}}},
  author = {Kamba, Masaru and Manabe, Masae and Wakamiya, Shoko and Yada, Shuntaro and Aramaki, Eiji and Odani, Satomi and Miyashiro, Isao},
  year = {2021},
  month = oct,
  journal = {JMIR Cancer},
  volume = {7},
  number = {4},
  pages = {e32005},
  publisher = {{JMIR Publications Inc., Toronto, Canada}},
  doi = {10.2196/32005},
  urldate = {2023-10-18},
  abstract = {Background: A large number of patient narratives are available on various web services. As for web question and answer services, patient questions often relate to medical needs, and we expect these questions to provide clues for a better understanding of patients' medical needs. Objective: This study aimed to extract patients' needs and classify them into thematic categories. Clarifying patient needs is the first step in solving social issues that patients with cancer encounter. Methods: For this study, we used patient question texts containing the key phrase ``breast cancer,`` available at the Yahoo! Japan question and answer service, Yahoo! Chiebukuro, which contains over 60,000 questions on cancer. First, we converted the question text into a vector representation. Next, the relevance between patient needs and existing cancer needs categories was calculated based on cosine similarity. Results: The proportion of correct classifications in our proposed method was approximately 70\%. Considering the results of classifying questions, we found the variation and the number of needs. Conclusions: We created 3 corpora to classify the problems of patients with cancer. The proposed method was able to classify the problems considering the question text. Moreover, as an application example, the question text that included the side effect signaling of drugs and the unmet needs of cancer patients could be extracted. Revealing these needs is important to fulfill the medical needs of patients with cancer.},
  copyright = {Unless stated otherwise, all articles are open-access distributed under the terms of the Creative Commons Attribution License (http://creativecommons.org/licenses/by/2.0/), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work ("first published in the Journal of Medical Internet Research...") is properly cited with original URL and bibliographic citation information. The complete bibliographic information, a link to the original publication on http://www.jmir.org/, as well as this copyright and license information must be included.},
  langid = {english},
  file = {/home/lisa/Zotero/storage/NNAUS5C4/Kamba et al. - 2021 - Medical Needs Extraction for Breast Cancer Patient.pdf}
}

@article{karimi_cadec_2015,
  ids = {karimi_cadec_2015-3},
  title = {Cadec: {{A}} Corpus of Adverse Drug Event Annotations},
  shorttitle = {Cadec},
  author = {Karimi, Sarvnaz and {Metke-Jimenez}, Alejandro and Kemp, Madonna and Wang, Chen},
  year = {2015},
  month = jun,
  journal = {Journal of Biomedical Informatics},
  volume = {55},
  pages = {73--81},
  issn = {1532-0464},
  doi = {10.1016/j.jbi.2015.03.010},
  abstract = {CSIRO Adverse Drug Event Corpus (Cadec) is a new rich annotated corpus of medical forum posts on patient-reported Adverse Drug Events (ADEs). The corpus is sourced from posts on social media, and contains text that is largely written in colloquial language and often deviates from formal English grammar and punctuation rules. Annotations contain mentions of concepts such as drugs, adverse effects, symptoms, and diseases linked to their corresponding concepts in controlled vocabularies, i.e., SNOMED Clinical Terms and MedDRA. The quality of the annotations is ensured by annotation guidelines, multi-stage annotations, measuring inter-annotator agreement, and final review of the annotations by a clinical terminologist. This corpus is useful for studies in the area of information extraction, or more generally text mining, from social media to detect possible adverse drug reactions from direct patient reports. The corpus is publicly available at https://data.csiro.au.1The data can be used for research purposes only, under the CSIRO data licence.1},
  langid = {english},
  keywords = {Adverse drug reaction,Annotated corpus,Consumer reviews,Drug safety,Information extraction,MedDRA,Medical forum,SNOMED CT,Social media},
  file = {/home/lisa/Zotero/storage/ZPZUNJER/Karimi et al. - 2015 - Cadec A corpus of adverse drug event annotations.pdf;/home/lisa/Zotero/storage/GPIVJ3QY/S1532046415000532.html;/home/lisa/Zotero/storage/LARWUPM7/S1532046415000532.html;/home/lisa/Zotero/storage/RNTI285C/S1532046415000532.html}
}

@inproceedings{klein_overview_2020,
  title = {Overview of the {{Fifth Social Media Mining}} for {{Health Applications}} (\#{{SMM4H}}) {{Shared Tasks}} at {{COLING}} 2020},
  booktitle = {Fifth {{Social Media Mining}} for {{Health Applications}}(\#{{SMM4H}}) {{Shared Tasks}} at {{COLING}} 2020},
  author = {Klein, Ari and Alimova, Ilseyar and Flores, Ivan and Magge, Arjun and Miftahutdinov, Zulfat and Minard, Anne-Lyse and O'Connor, Karen and Sarker, Abeed and Tutubalina, Elena and Weissenbacher, Davy and {Gonzalez-Hernandez}, Graciela},
  year = {2020},
  pages = {10},
  abstract = {The vast amount of data on social media presents significant opportunities and challenges for utilizing it as a resource for health informatics. The fifth iteration of the Social Media Mining for Health Applications (\#SMM4H) shared tasks sought to advance the use of Twitter data (tweets) for pharmacovigilance, toxicovigilance, and epidemiology of birth defects. In addition to re-runs of three tasks, \#SMM4H 2020 included new tasks for detecting adverse effects of medications in French and Russian tweets, characterizing chatter related to prescription medication abuse, and detecting self reports of birth defect pregnancy outcomes. The five tasks required methods for binary classification, multi-class classification, and named entity recognition (NER). With 29 teams and a total of 130 system submissions, participation in the \#SMM4H shared tasks continues to grow.},
  langid = {english},
  file = {/home/lisa/Zotero/storage/LFBYJQS7/Klein et al. - Overview of the Fifth Social Media Mining for Heal.pdf}
}

@inproceedings{leaman_towards_2010-1,
  title = {Towards {{Internet-Age Pharmacovigilance}}: {{Extracting Adverse Drug Reactions}} from {{User Posts}} in {{Health-Related Social Networks}}},
  shorttitle = {Towards {{Internet-Age Pharmacovigilance}}},
  booktitle = {Proceedings of the 2010 {{Workshop}} on {{Biomedical Natural Language Processing}}},
  author = {Leaman, Robert and Wojtulewicz, Laura and Sullivan, Ryan and Skariah, Annie and Yang, Jian and Gonzalez, Graciela},
  year = {2010},
  month = jul,
  pages = {117--125},
  publisher = {{Association for Computational Linguistics}},
  address = {{Uppsala, Sweden}},
  urldate = {2023-06-02},
  file = {/home/lisa/Zotero/storage/86BNW6PU/Leaman et al. - 2010 - Towards Internet-Age Pharmacovigilance Extracting.pdf}
}

@inproceedings{li_span-based_2021,
  title = {A {{Span-Based Model}} for {{Joint Overlapped}} and {{Discontinuous Named Entity Recognition}}},
  booktitle = {Proceedings of the 59th {{Annual Meeting}} of the {{Association}} for {{Computational Linguistics}} and the 11th {{International Joint Conference}} on {{Natural Language Processing}} ({{Volume}} 1: {{Long Papers}})},
  author = {Li, Fei and Lin, ZhiChao and Zhang, Meishan and Ji, Donghong},
  year = {2021},
  pages = {4814--4828},
  publisher = {{Association for Computational Linguistics}},
  address = {{Online}},
  doi = {10.18653/v1/2021.acl-long.372},
  urldate = {2023-10-19},
  abstract = {Research on overlapped and discontinuous named entity recognition (NER) has received increasing attention. The majority of previous work focuses on either overlapped or discontinuous entities. In this paper, we propose a novel span-based model that can recognize both overlapped and discontinuous entities jointly. The model includes two major steps. First, entity fragments are recognized by traversing over all possible text spans, thus, overlapped entities can be recognized. Second, we perform relation classification to judge whether a given pair of entity fragments to be overlapping or succession. In this way, we can recognize not only discontinuous entities, and meanwhile doubly check the overlapped entities. As a whole, our model can be regarded as a relation extraction paradigm essentially. Experimental results on multiple benchmark datasets (i.e., CLEF, GENIA and ACE05) show that our model is highly competitive for overlapped and discontinuous NER.},
  langid = {english},
  file = {/home/lisa/Zotero/storage/RPELIWYF/Li et al. - 2021 - A Span-Based Model for Joint Overlapped and Discon.pdf}
}

@misc{liu_roberta_2019-1,
  title = {{{RoBERTa}}: {{A Robustly Optimized BERT Pretraining Approach}}},
  shorttitle = {{{RoBERTa}}},
  author = {Liu, Yinhan and Ott, Myle and Goyal, Naman and Du, Jingfei and Joshi, Mandar and Chen, Danqi and Levy, Omer and Lewis, Mike and Zettlemoyer, Luke and Stoyanov, Veselin},
  year = {2019},
  month = jul,
  number = {arXiv:1907.11692},
  eprint = {1907.11692},
  primaryclass = {cs},
  publisher = {{arXiv}},
  urldate = {2023-08-07},
  abstract = {Language model pretraining has led to significant performance gains but careful comparison between different approaches is challenging. Training is computationally expensive, often done on private datasets of different sizes, and, as we will show, hyperparameter choices have significant impact on the final results. We present a replication study of BERT pretraining (Devlin et al., 2019) that carefully measures the impact of many key hyperparameters and training data size. We find that BERT was significantly undertrained, and can match or exceed the performance of every model published after it. Our best model achieves state-of-the-art results on GLUE, RACE and SQuAD. These results highlight the importance of previously overlooked design choices, and raise questions about the source of recently reported improvements. We release our models and code.},
  archiveprefix = {arxiv},
  langid = {english},
  keywords = {Computer Science - Computation and Language},
  file = {/home/lisa/Zotero/storage/QDUHLG8Q/Liu et al. - 2019 - RoBERTa A Robustly Optimized BERT Pretraining App.pdf}
}

@article{magge_deepademiner_2021-1,
  ids = {magge_deepademiner_2021},
  title = {{{DeepADEMiner}}: {{A}} Deep Learning Pharmacovigilance Pipeline for Extraction and Normalization of Adverse Drug Event Mentions on {{Twitter}}},
  shorttitle = {{{DeepADEMiner}}},
  author = {Magge, Arjun and Tutubalina, Elena and Miftahutdinov, Zulfat and Alimova, Ilseyar and Dirkson, Anne and Verberne, Suzan and Weissenbacher, Davy and {Gonzalez-Hernandez}, Graciela},
  year = {2021},
  month = sep,
  journal = {Journal of the American Medical Informatics Association},
  volume = {28},
  number = {10},
  pages = {2184--2192},
  issn = {1527-974X},
  doi = {10.1093/jamia/ocab114},
  abstract = {Objective: Research on pharmacovigilance from social media data has focused on mining adverse drug events (ADEs) using annotated datasets, with publications generally focusing on 1 of 3 tasks: ADE classification, named entity recognition for identifying the span of ADE mentions, and ADE mention normalization to standardized terminologies. While the common goal of such systems is to detect ADE signals that can be used to inform public policy, it has been impeded largely by limited end-to-end solutions for large-scale analysis of social media reports for different drugs. Materials and Methods: We present a dataset for training and evaluation of ADE pipelines where the ADE distribution is closer to the average `natural balance' with ADEs present in about 7\% of the tweets. The deep learning architecture involves an ADE extraction pipeline with individual components for all 3 tasks. Results: The system presented achieved state-of-the-art performance on comparable datasets and scored a classification performance of F1 {$\frac{1}{4}$} 0.63, span extraction performance of F1 {$\frac{1}{4}$} 0.44 and an end-to-end entity resolution performance of F1 {$\frac{1}{4}$} 0.34 on the presented dataset. Discussion: The performance of the models continues to highlight multiple challenges when deploying pharmacovigilance systems that use social media data. We discuss the implications of such models in the downstream tasks of signal detection and suggest future enhancements. Conclusion: Mining ADEs from Twitter posts using a pipeline architecture requires the different components to be trained and tuned based on input data imbalance in order to ensure optimal performance on the end-to-end resolution task.},
  langid = {english},
  keywords = {HLP-ADE},
  file = {/home/lisa/Zotero/storage/RUJJWM7K/Magge et al. - 2021 - DeepADEMiner a deep learning pharmacovigilance pi.pdf}
}

@book{magge_proceedings_2021,
  title = {Proceedings of the {{Sixth Social Media Mining}} for {{Health}} (\#{{SMM4H}}) {{Workshop}} and {{Shared Task}}},
  editor = {Magge, Arjun and Klein, Ari and {Miranda-Escalada}, Antonio and {Al-garadi}, Mohammed Ali and Alimova, Ilseyar and Miftahutdinov, Zulfat and {Farre-Maduell}, Eulalia and Lopez, Salvador Lima and Flores, Ivan and O'Connor, Karen and Weissenbacher, Davy and Tutubalina, Elena and Sarker, Abeed and Banda, Juan M and Krallinger, Martin and {Gonzalez-Hernandez}, Graciela},
  year = {2021},
  month = jun,
  publisher = {{Association for Computational Linguistics}},
  address = {{Mexico City, Mexico}},
  urldate = {2023-03-09},
  file = {/home/lisa/Zotero/storage/CL3PT8B4/Magge et al. - 2021 - Proceedings of the Sixth Social Media Mining for H.pdf}
}

@inproceedings{meoni_large_2023,
  title = {Large {{Language Models}} as {{Instructors}}: {{A Study}} on {{Multilingual Clinical Entity Extraction}}},
  shorttitle = {Large {{Language Models}} as {{Instructors}}},
  booktitle = {The 22nd {{Workshop}} on {{Biomedical Natural Language Processing}} and {{BioNLP Shared Tasks}}},
  author = {Meoni, Simon and {De la Clergerie}, Eric and Ryffel, Theo},
  year = {2023},
  month = jul,
  pages = {178--190},
  publisher = {{Association for Computational Linguistics}},
  address = {{Toronto, Canada}},
  urldate = {2023-07-10},
  abstract = {In clinical and other specialized domains, data are scarce due to their confidential nature. This lack of data is a major problem when fine-tuning language models.Nevertheless, very large language models (LLMs) are promising for the medical domain but cannot be used directly in healthcare facilities due to data confidentiality issues. We explore an approach of annotating training data with LLMs to train smaller models more adapted to our problem. We show that this method yields promising results for information extraction tasks.},
  file = {/home/lisa/Zotero/storage/7723Z78J/Meoni et al. - 2023 - Large Language Models as Instructors A Study on M.pdf}
}

@inproceedings{metke-jimenez_evaluation_2014,
  title = {Evaluation of Text-Processing Algorithms for Adverse Drug Event Extraction from Social Media},
  booktitle = {Proceedings of the First International Workshop on {{Social}} Media Retrieval and Analysis},
  author = {{Metke-Jimenez}, Alejandro and Karimi, Sarvnaz and Paris, Cecile},
  year = {2014},
  month = jul,
  series = {{{SoMeRA}} '14},
  pages = {15--20},
  publisher = {{Association for Computing Machinery}},
  address = {{New York, NY, USA}},
  doi = {10.1145/2632188.2632200},
  urldate = {2023-06-05},
  abstract = {The discovery of suspected adverse drug reactions is no longer restricted to mining reports that pharmaceutical companies and health professionals send to regulators for possible safety signals. Patient forums and other social media are being studied for additional sources of information to assist in expediting adverse reaction discovery. Extracting information on drugs, adverse drug reactions, diseases and symptoms, or patient demographics from such media is an essential step of this process, but it is not straightforward. While most studies in this area use a lexicon-based information extraction methodology, they do not explicitly evaluate the impact of text-processing steps on their final results. We experimentally quantify the value of the most popular techniques to establish whether or not they benefit the information extraction process.},
  isbn = {978-1-4503-3022-0},
  keywords = {adverse drug reaction discovery,information extraction,social media,text processing},
  file = {/home/lisa/Zotero/storage/ND67LPVU/Metke-Jimenez et al. - 2014 - Evaluation of text-processing algorithms for adver.pdf}
}

@inproceedings{miftahutdinov_kfu_2020,
  title = {{{KFU NLP Team}} at {{SMM4H}} 2020 {{Tasks}}: {{Cross-lingual Transfer Learning}} with {{Pretrained Language Models}} for {{Drug Reactions}}},
  shorttitle = {{{KFU NLP Team}} at {{SMM4H}} 2020 {{Tasks}}},
  booktitle = {Proceedings of the {{Fifth Social Media Mining}} for {{Health Applications Workshop}} \& {{Shared Task}}},
  author = {Miftahutdinov, Zulfat and Sakhovskiy, Andrey and Tutubalina, Elena},
  year = {2020},
  month = dec,
  pages = {51--56},
  publisher = {{Association for Computational Linguistics}},
  address = {{Barcelona, Spain (Online)}},
  urldate = {2023-06-04},
  abstract = {This paper describes neural models developed for the Social Media Mining for Health (SMM4H) 2020 shared tasks. Specifically, we participated in two tasks. We investigate the use of a language representation model BERT pretrained on a large-scale corpus of 5 million health-related user reviews in English and Russian. The ensemble of neural networks for extraction and normalization of adverse drug reactions ranked first among 7 teams at the SMM4H 2020 Task 3 and obtained a relaxed F1 of 46\%. The BERT-based multilingual model for classification of English and Russian tweets that report adverse reactions ranked second among 16 and 7 teams at two first subtasks of the SMM4H 2019 Task 2 and obtained a relaxed F1 of 58\% on English tweets and 51\% on Russian tweets.},
  file = {/home/lisa/Zotero/storage/7JIBG98Z/Miftahutdinov et al. - 2020 - KFU NLP Team at SMM4H 2020 Tasks Cross-lingual Tr.pdf}
}

@article{murphy_adverse_2023,
  title = {Adverse Drug Event Detection Using Natural Language Processing: {{A}} Scoping Review of Supervised Learning Methods},
  shorttitle = {Adverse Drug Event Detection Using Natural Language Processing},
  author = {Murphy, Rachel M. and Klopotowska, Joanna E. and de Keizer, Nicolette F. and Jager, Kitty J. and Leopold, Jan Hendrik and Dongelmans, Dave A. and {Abu-Hanna}, Ameen and Schut, Martijn C.},
  year = {2023},
  month = jan,
  journal = {PLOS ONE},
  volume = {18},
  number = {1},
  pages = {e0279842},
  publisher = {{Public Library of Science}},
  issn = {1932-6203},
  doi = {10.1371/journal.pone.0279842},
  urldate = {2023-06-01},
  abstract = {To reduce adverse drug events (ADEs), hospitals need a system to support them in monitoring ADE occurrence routinely, rapidly, and at scale. Natural language processing (NLP), a computerized approach to analyze text data, has shown promising results for the purpose of ADE detection in the context of pharmacovigilance. However, a detailed qualitative assessment and critical appraisal of NLP methods for ADE detection in the context of ADE monitoring in hospitals is lacking. Therefore, we have conducted a scoping review to close this knowledge gap, and to provide directions for future research and practice. We included articles where NLP was applied to detect ADEs in clinical narratives within electronic health records of inpatients. Quantitative and qualitative data items relating to NLP methods were extracted and critically appraised. Out of 1,065 articles screened for eligibility, 29 articles met the inclusion criteria. Most frequent tasks included named entity recognition (n = 17; 58.6\%) and relation extraction/classification (n = 15; 51.7\%). Clinical involvement was reported in nine studies (31\%). Multiple NLP modelling approaches seem suitable, with Long Short Term Memory and Conditional Random Field methods most commonly used. Although reported overall performance of the systems was high, it provides an inflated impression given a steep drop in performance when predicting the ADE entity or ADE relation class. When annotating corpora, treating an ADE as a relation between a drug and non-drug entity seems the best practice. Future research should focus on semi-automated methods to reduce the manual annotation effort, and examine implementation of the NLP methods in practice.},
  langid = {english},
  keywords = {Drug safety,Drug therapy,Electronic medical records,Language,Machine learning,Natural language processing,Primary care,Support vector machines,survey},
  file = {/home/lisa/Zotero/storage/V6VZHAYH/Murphy et al. - 2023 - Adverse drug event detection using natural languag.pdf}
}

@article{neveol_clinical_2018,
  ids = {neveol_clinical_2018-1},
  title = {Clinical {{Natural Language Processing}} in Languages Other than {{English}}: Opportunities and Challenges},
  shorttitle = {Clinical {{Natural Language Processing}} in Languages Other than {{English}}},
  author = {N{\'e}v{\'e}ol, Aur{\'e}lie and Dalianis, Hercules and Velupillai, Sumithra and Savova, Guergana and Zweigenbaum, Pierre},
  year = {2018},
  month = mar,
  journal = {Journal of Biomedical Semantics},
  volume = {9},
  number = {1},
  pages = {12},
  issn = {2041-1480},
  doi = {10.1186/s13326-018-0179-8},
  abstract = {BACKGROUND: Natural language processing applied to clinical text or aimed at a clinical outcome has been thriving in recent years. This paper offers the first broad overview of clinical Natural Language Processing (NLP) for languages other than English. Recent studies are summarized to offer insights and outline opportunities in this area. MAIN BODY: We envision three groups of intended readers: (1) NLP researchers leveraging experience gained in other languages, (2) NLP researchers faced with establishing clinical text processing in a language other than English, and (3) clinical informatics researchers and practitioners looking for resources in their languages in order to apply NLP techniques and tools to clinical practice and/or investigation. We review work in clinical NLP in languages other than English. We classify these studies into three groups: (i) studies describing the development of new NLP systems or components de novo, (ii) studies describing the adaptation of NLP architectures developed for English to another language, and (iii) studies focusing on a particular clinical application. CONCLUSION: We show the advantages and drawbacks of each method, and highlight the appropriate application context. Finally, we identify major challenges and opportunities that will affect the impact of NLP on clinical practice and public health studies in a context that encompasses English as well as other languages.},
  langid = {english},
  pmcid = {PMC5877394},
  pmid = {29602312},
  keywords = {Clinical Decision-Making,Humans,Languages other than English,Natural Language Processing,Semantics},
  file = {/home/lisa/Zotero/storage/467WK3PZ/Névéol et al. - 2018 - Clinical Natural Language Processing in languages .pdf;/home/lisa/Zotero/storage/VEQCEIHI/Névéol et al. - 2018 - Clinical Natural Language Processing in languages .pdf}
}

@article{ouyang_training_2022,
  title = {Training Language Models to Follow Instructions with Human Feedback},
  author = {Ouyang, Long and Wu, Jeff and Jiang, Xu and Almeida, Diogo and Wainwright, Carroll L and Mishkin, Pamela and Zhang, Chong and Agarwal, Sandhini and Slama, Katarina and Ray, Alex and Schulman, John and Hilton, Jacob and Kelton, Fraser and Miller, Luke and Simens, Maddie and Askell, Amanda and Welinder, Peter and Christiano, Paul and Leike, Jan and Lowe, Ryan},
  year = {2022},
  journal = {36th Conference on Neural Information Processing Systems (NeurIPS 2022)},
  abstract = {Making language models bigger does not inherently make them better at following a user's intent. For example, large language models can generate outputs that are untruthful, toxic, or simply not helpful to the user. In other words, these models are not aligned with their users. In this paper, we show an avenue for aligning language models with user intent on a wide range of tasks by fine-tuning with human feedback. Starting with a set of labeler-written prompts and prompts submitted through a language model API, we collect a dataset of labeler demonstrations of the desired model behavior, which we use to fine-tune GPT-3 using supervised learning. We then collect a dataset of rankings of model outputs, which we use to further fine-tune this supervised model using reinforcement learning from human feedback. We call the resulting models InstructGPT. In human evaluations on our prompt distribution, outputs from the 1.3B parameter InstructGPT model are preferred to outputs from the 175B GPT-3, despite having 100x fewer parameters. Moreover, InstructGPT models show improvements in truthfulness and reductions in toxic output generation while having minimal performance regressions on public NLP datasets. Even though InstructGPT still makes simple mistakes, our results show that fine-tuning with human feedback is a promising direction for aligning language models with human intent.},
  langid = {english},
  file = {/home/lisa/Zotero/storage/YUV57A8V/Ouyang et al. - Training language models to follow instructions wi.pdf}
}

@article{palleria_limitations_2013,
  title = {Limitations and Obstacles of the Spontaneous Adverse Drugs Reactions Reporting: {{Two}} ``Challenging'' Case Reports},
  shorttitle = {Limitations and Obstacles of the Spontaneous Adverse Drugs Reactions Reporting},
  author = {Palleria, Caterina and Leporini, Christian and Chimirri, Serafina and Marrazzo, Giuseppina and Sacchetta, Sabrina and Bruno, Lucrezia and Lista, Rosaria M. and Staltari, Orietta and Scuteri, Antonio and Scicchitano, Francesca and Russo, Emilio},
  year = {2013},
  month = dec,
  journal = {Journal of Pharmacology \& Pharmacotherapeutics},
  volume = {4},
  number = {Suppl1},
  pages = {S66-S72},
  issn = {0976-500X},
  doi = {10.4103/0976-500X.120955},
  urldate = {2023-04-05},
  abstract = {Introduction: Nowadays, based on several epidemiological data, iatrogenic disease is an emerging public health problem, especially in industrialized countries. Adverse drugs reactions (ADRs) are extremely common and, therefore, clinically, socially, and economically worthy of attention. Spontaneous reporting system for suspected ADRs represents the cornerstone of the pharmacovigilance, because it allows rapid detection of potential alarm signals related to drugs use. However, spontaneous reporting system shows several limitations, which are mainly related to under-reporting. In this paper, we describe two particular case reports, which emphasize some reasons of under-reporting and other common criticisms of spontaneous reporting systems. Materials and Methods: We performed a computer-aided search of Medline, PubMed, Embase, Cochrane library databases, national and international databases of suspected ADRs reports in order to identify previous published case reports and spontaneous reports about the ADRs reviewed in this paper, and to examine the role of suspected drugs in the pathogenesis of the described adverse reactions. Results: First, we reported a case of tizanidine-induced hemorrhagic cystitis. In the second case report, we presented an episode of asthma exacerbation after taking bimatoprost. Through the review of these two cases, we highlighted some common criticisms of spontaneous reporting systems: under-reporting and false causality attribution. Discussion and Conclusion: Healthcare workers sometimes do not report ADRs because it is challenging to establish with certainty the causal relationship between drug and adverse reaction; however, according to a key principle of pharmacovigilance, it is always better to report even a suspicion to generate an alarm in the interest of protecting public health.},
  pmcid = {PMC3853673},
  pmid = {24347986},
  file = {/home/lisa/Zotero/storage/JYE8KD9G/Palleria et al. - 2013 - Limitations and obstacles of the spontaneous adver.pdf}
}

@inproceedings{portelli_ailab-udinesmm4h22_2022,
  title = {{{AILAB-Udine}}@{{SMM4H}}'22: {{Limits}} of {{Transformers}} and {{BERT Ensembles}}},
  shorttitle = {{{AILAB-Udine}}@{{SMM4H}}'22},
  booktitle = {Proceedings of {{The Seventh Workshop}} on {{Social Media Mining}} for {{Health Applications}}, {{Workshop}} \& {{Shared Task}}},
  author = {Portelli, Beatrice and Scaboro, Simone and Chersoni, Emmanuele and Santus, Enrico and Serra, Giuseppe},
  year = {2022},
  month = oct,
  pages = {130--134},
  publisher = {{Association for Computational Linguistics}},
  address = {{Gyeongju, Republic of Korea}},
  urldate = {2023-07-20},
  abstract = {This paper describes the models developed by the AILAB-Udine team for the SMM4H'22 Shared Task. We explored the limits of Transformer based models on text classification, entity extraction and entity normalization, tackling Tasks 1, 2, 5, 6 and 10. The main takeaways we got from participating in different tasks are: the overwhelming positive effects of combining different architectures when using ensemble learning, and the great potential of generative models for term normalization.},
  file = {/home/lisa/Zotero/storage/MLTLLI7T/Portelli et al. - 2022 - AILAB-Udine@SMM4H'22 Limits of Transformers and B.pdf}
}

@article{radford_language_2019,
  title = {Language {{Models}} Are {{Unsupervised Multitask Learners}}},
  author = {Radford, Alec and Wu, Jeffrey and Child, Rewon and Luan, David and Amodei, Dario and Sutskever, Ilya},
  year = {2019},
  abstract = {Natural language processing tasks, such as question answering, machine translation, reading comprehension, and summarization, are typically approached with supervised learning on taskspecific datasets. We demonstrate that language models begin to learn these tasks without any explicit supervision when trained on a new dataset of millions of webpages called WebText. When conditioned on a document plus questions, the answers generated by the language model reach 55 F1 on the CoQA dataset - matching or exceeding the performance of 3 out of 4 baseline systems without using the 127,000+ training examples. The capacity of the language model is essential to the success of zero-shot task transfer and increasing it improves performance in a log-linear fashion across tasks. Our largest model, GPT-2, is a 1.5B parameter Transformer that achieves state of the art results on 7 out of 8 tested language modeling datasets in a zero-shot setting but still underfits WebText. Samples from the model reflect these improvements and contain coherent paragraphs of text. These findings suggest a promising path towards building language processing systems which learn to perform tasks from their naturally occurring demonstrations.},
  langid = {english},
  file = {/home/lisa/Zotero/storage/EHEBKNHY/Radford et al. - Language Models are Unsupervised Multitask Learner.pdf}
}

@article{raffel_exploring_2020,
  title = {Exploring the Limits of Transfer Learning with a Unified Text-to-Text Transformer},
  author = {Raffel, Colin and Shazeer, Noam and Roberts, Adam and Lee, Katherine and Narang, Sharan and Matena, Michael and Zhou, Yanqi and Li, Wei and Liu, Peter J.},
  year = {2020},
  month = jan,
  journal = {The Journal of Machine Learning Research},
  volume = {21},
  number = {1},
  pages = {140:5485--140:5551},
  issn = {1532-4435},
  abstract = {Transfer learning, where a model is first pre-trained on a data-rich task before being fine-tuned on a downstream task, has emerged as a powerful technique in natural language processing (NLP). The effectiveness of transfer learning has given rise to a diversity of approaches, methodology, and practice. In this paper, we explore the landscape of transfer learning techniques for NLP by introducing a unified framework that converts all text-based language problems into a text-to-text format. Our systematic study compares pretraining objectives, architectures, unlabeled data sets, transfer approaches, and other factors on dozens of language understanding tasks. By combining the insights from our exploration with scale and our new "Colossal Clean Crawled Corpus", we achieve state-of-the-art results on many benchmarks covering summarization, question answering, text classification, and more. To facilitate future work on transfer learning for NLP, we release our data set, pre-trained models, and code.},
  keywords = {attention based models,deep learning,multi-task learning,natural language processing,transfer learning},
  file = {/home/lisa/Zotero/storage/AITZFR96/Raffel et al. - 2020 - Exploring the limits of transfer learning with a u.pdf}
}

@inproceedings{raithel_cross-lingual_2022,
  ids = {raithel_2022},
  title = {Cross-Lingual {{Approaches}} for the {{Detection}} of {{Adverse Drug Reactions}} in {{German}} from a {{Patient}}'s {{Perspective}}},
  booktitle = {Proceedings of the {{Language Resources}} and {{Evaluation Conference}}},
  author = {Raithel, Lisa and Thomas, Philippe and Roller, Roland and Sapina, Oliver and M{\"o}ller, Sebastian and Zweigenbaum, Pierre},
  year = {2022},
  pages = {3637--3649},
  publisher = {{European Language Resources Association}},
  address = {{Marseille}},
  abstract = {In this work, we present the first corpus for German Adverse Drug Reaction (ADR) detection in patient-generated content. The data consists of 4,169 binary annotated documents from a German patient forum, where users talk about health issues and get advice from medical doctors. As is common in social media data in this domain, the class labels of the corpus are very imbalanced. This and a high topic imbalance make it a very challenging dataset, since often, the same symptom can have several causes and is not always related to a medication intake. We aim to encourage further multi-lingual efforts in the domain of ADR detection and provide preliminary experiments for binary classification using different methods of zero- and few-shot learning based on a multi-lingual model. When fine-tuning XLM-RoBERTa first on English patient forum data and then on the new German data, we achieve an F1-score of 37.52 for the positive class. We make the dataset and models publicly available for the community.},
  copyright = {All rights reserved},
  langid = {english},
  file = {/home/lisa/Zotero/storage/N8IFCH5S/Raithel et al. - Cross-lingual Approaches for the Detection of Adve.pdf}
}

@inproceedings{ratinov_design_2009,
  title = {Design Challenges and Misconceptions in Named Entity Recognition},
  booktitle = {Proceedings of the {{Thirteenth Conference}} on {{Computational Natural Language Learning}} - {{CoNLL}} '09},
  author = {Ratinov, Lev and Roth, Dan},
  year = {2009},
  pages = {147},
  publisher = {{Association for Computational Linguistics}},
  address = {{Boulder, Colorado}},
  doi = {10.3115/1596374.1596399},
  urldate = {2023-10-19},
  abstract = {We analyze some of the fundamental design challenges and misconceptions that underlie the development of an efficient and robust NER system. In particular, we address issues such as the representation of text chunks, the inference approach needed to combine local NER decisions, the sources of prior knowledge and how to use them within an NER system. In the process of comparing several solutions to these challenges we reach some surprising conclusions, as well as develop an NER system that achieves 90.8 F1 score on the CoNLL-2003 NER shared task, the best reported result for this dataset.},
  isbn = {978-1-932432-29-9},
  langid = {english},
  file = {/home/lisa/Zotero/storage/SBGVKPP6/Ratinov and Roth - 2009 - Design challenges and misconceptions in named enti.pdf}
}

@inproceedings{raval_exploring_2021-2,
  title = {Exploring a {{Unified Sequence-To-Sequence Transformer}} for {{Medical Product Safety Monitoring}} in {{Social Media}}},
  booktitle = {Findings of the {{Association}} for {{Computational Linguistics}}: {{EMNLP}} 2021},
  author = {Raval, Shivam and Sedghamiz, Hooman and Santus, Enrico and Alhanai, Tuka and Ghassemi, Mohammad and Chersoni, Emmanuele},
  year = {2021},
  month = nov,
  pages = {3534--3546},
  publisher = {{Association for Computational Linguistics}},
  address = {{Punta Cana, Dominican Republic}},
  doi = {10.18653/v1/2021.findings-emnlp.300},
  urldate = {2023-08-16},
  abstract = {Adverse Events (AE) are harmful events resulting from the use of medical products. Although social media may be crucial for early AE detection, the sheer scale of this data makes it logistically intractable to analyze using human agents, with NLP representing the only low-cost and scalable alternative. In this paper, we frame AE Detection and Extraction as a sequence-to-sequence problem using the T5 model architecture and achieve strong performance improvements over the baselines on several English benchmarks (F1 = 0.71, 12.7\% relative improvement for AE Detection; Strict F1 = 0.713, 12.4\% relative improvement for AE Extraction). Motivated by the strong commonalities between AE tasks, the class imbalance in AE benchmarks, and the linguistic and structural variety typical of social media texts, we propose a new strategy for multi-task training that accounts, at the same time, for task and dataset characteristics. Our approach increases model robustness, leading to further performance gains. Finally, our framework shows some language transfer capabilities, obtaining higher performance than Multilingual BERT in zero-shot learning on French data.},
  file = {/home/lisa/Zotero/storage/A6CAHSM2/Raval et al. - 2021 - Exploring a Unified Sequence-To-Sequence Transform.pdf}
}

@inproceedings{reimers_sentence-bert_2019,
  title = {Sentence-{{BERT}}: {{Sentence Embeddings}} Using {{Siamese BERT-Networks}}},
  shorttitle = {Sentence-{{BERT}}},
  booktitle = {Proceedings of the 2019 {{Conference}} on {{Empirical Methods}} in {{Natural Language Processing}} and the 9th {{International Joint Conference}} on {{Natural Language Processing}} ({{EMNLP-IJCNLP}})},
  author = {Reimers, Nils and Gurevych, Iryna},
  year = {2019},
  month = nov,
  pages = {3982--3992},
  publisher = {{Association for Computational Linguistics}},
  address = {{Hong Kong, China}},
  doi = {10.18653/v1/D19-1410},
  urldate = {2023-08-18},
  abstract = {BERT (Devlin et al., 2018) and RoBERTa (Liu et al., 2019) has set a new state-of-the-art performance on sentence-pair regression tasks like semantic textual similarity (STS). However, it requires that both sentences are fed into the network, which causes a massive computational overhead: Finding the most similar pair in a collection of 10,000 sentences requires about 50 million inference computations (\textbackslash textasciitilde65 hours) with BERT. The construction of BERT makes it unsuitable for semantic similarity search as well as for unsupervised tasks like clustering. In this publication, we present Sentence-BERT (SBERT), a modification of the pretrained BERT network that use siamese and triplet network structures to derive semantically meaningful sentence embeddings that can be compared using cosine-similarity. This reduces the effort for finding the most similar pair from 65 hours with BERT / RoBERTa to about 5 seconds with SBERT, while maintaining the accuracy from BERT. We evaluate SBERT and SRoBERTa on common STS tasks and transfer learning tasks, where it outperforms other state-of-the-art sentence embeddings methods.},
  file = {/home/lisa/Zotero/storage/JQJMUYBF/Reimers and Gurevych - 2019 - Sentence-BERT Sentence Embeddings using Siamese B.pdf}
}

@inproceedings{sanh_distilbert_2019,
  title = {{{DistilBERT}}, a Distilled Version of {{BERT}}: Smaller, Faster, Cheaper and Lighter},
  booktitle = {{{EMC}}\^2: 5th {{Edition Co-located}} with {{NeurIPS}}'19},
  author = {Sanh, Victor and Debut, Lysandre and Chaumond, Julien and Wolf, Thomas},
  year = {2019},
  abstract = {As Transfer Learning from large-scale pre-trained models becomes more prevalent in Natural Language Processing (NLP), operating these large models in on-theedge and/or under constrained computational training or inference budgets remains challenging. In this work, we propose a method to pre-train a smaller generalpurpose language representation model, called DistilBERT, which can then be finetuned with good performances on a wide range of tasks like its larger counterparts. While most prior work investigated the use of distillation for building task-specific models, we leverage knowledge distillation during the pre-training phase and show that it is possible to reduce the size of a BERT model by 40\%, while retaining 97\% of its language understanding capabilities and being 60\% faster. To leverage the inductive biases learned by larger models during pre-training, we introduce a triple loss combining language modeling, distillation and cosine-distance losses. Our smaller, faster and lighter model is cheaper to pre-train and we demonstrate its capabilities for on-device computations in a proof-of-concept experiment and a comparative on-device study.},
  langid = {english},
  file = {/home/lisa/Zotero/storage/MZL6974N/Sanh - DistilBERT, a distilled version of BERT smaller, .pdf}
}

@article{sarker_utilizing_2015-1,
  title = {Utilizing Social Media Data for Pharmacovigilance: {{A}} Review},
  shorttitle = {Utilizing Social Media Data for Pharmacovigilance},
  author = {Sarker, Abeed and Ginn, Rachel and Nikfarjam, Azadeh and O'Connor, Karen and Smith, Karen and Jayaraman, Swetha and Upadhaya, Tejaswi and Gonzalez, Graciela},
  year = {2015},
  month = apr,
  journal = {Journal of Biomedical Informatics},
  volume = {54},
  pages = {202--212},
  issn = {15320464},
  doi = {10.1016/j.jbi.2015.02.004},
  urldate = {2023-06-02},
  abstract = {Objective: Automatic monitoring of Adverse Drug Reactions (ADRs), defined as adverse patient outcomes caused by medications, is a challenging research problem that is currently receiving significant attention from the medical informatics community. In recent years, user-posted data on social media, primarily due to its sheer volume, has become a useful resource for ADR monitoring. Research using social media data has progressed using various data sources and techniques, making it difficult to compare distinct systems and their performances. In this paper, we perform a methodical review to characterize the different approaches to ADR detection/extraction from social media, and their applicability to pharmacovigilance. In addition, we present a potential systematic pathway to ADR monitoring from social media. Methods: We identified studies describing approaches for ADR detection from social media from the Medline, Embase, Scopus and Web of Science databases, and the Google Scholar search engine. Studies that met our inclusion criteria were those that attempted to extract ADR information posted by users on any publicly available social media platform. We categorized the studies according to different characteristics such as primary ADR detection approach, size of corpus, data source(s), availability, and evaluation criteria. Results: Twenty-two studies met our inclusion criteria, with fifteen (68\%) published within the last two years. However, publicly available annotated data is still scarce, and we found only six studies that made the annotations used publicly available, making system performance comparisons difficult. In terms of algorithms, supervised classification techniques to detect posts containing ADR mentions, and lexiconbased approaches for extraction of ADR mentions from texts have been the most popular. Conclusion: Our review suggests that interest in the utilization of the vast amounts of available social media data for ADR monitoring is increasing. In terms of sources, both health-related and general social media data have been used for ADR detection\textemdash while health-related sources tend to contain higher proportions of relevant data, the volume of data from general social media websites is significantly higher. There is still very limited amount of annotated data publicly available , and, as indicated by the promising results obtained by recent supervised learning approaches, there is a strong need to make such data available to the research community.},
  langid = {english},
  file = {/home/lisa/Zotero/storage/CQIEL78P/Sarker et al. - 2015 - Utilizing social media data for pharmacovigilance.pdf}
}

@article{sboev_analysis_2022,
  title = {Analysis of the {{Full-Size Russian Corpus}} of {{Internet Drug Reviews}} with {{Complex NER Labeling Using Deep Learning Neural Networks}} and {{Language Models}}},
  author = {Sboev, Alexander and Sboeva, Sanna and Moloshnikov, Ivan and Gryaznov, Artem and Rybka, Roman and Naumov, Alexander and Selivanov, Anton and Rylkov, Gleb and Ilyin, Vyacheslav},
  year = {2022},
  month = jan,
  journal = {Applied Sciences},
  volume = {12},
  number = {1},
  pages = {491},
  publisher = {{Multidisciplinary Digital Publishing Institute}},
  issn = {2076-3417},
  doi = {10.3390/app12010491},
  urldate = {2023-10-10},
  abstract = {The paper presents the full-size Russian corpus of Internet users' reviews on medicines with complex named entity recognition (NER) labeling of pharmaceutically relevant entities. We evaluate the accuracy levels reached on this corpus by a set of advanced deep learning neural networks for extracting mentions of these entities. The corpus markup includes mentions of the following entities: medication (33,005 mentions), adverse drug reaction (1778), disease (17,403), and note (4490). Two of them\textemdash medication and disease\textemdash include a set of attributes. A part of the corpus has a coreference annotation with 1560 coreference chains in 300 documents. A multi-label model based on a language model and a set of features has been developed for recognizing entities of the presented corpus. We analyze how the choice of different model components affects the entity recognition accuracy. Those components include methods for vector representation of words, types of language models pre-trained for the Russian language, ways of text normalization, and other pre-processing methods. The sufficient size of our corpus allows us to study the effects of particularities of annotation and entity balancing. We compare our corpus to existing ones by the occurrences of entities of different types and show that balancing the corpus by the number of texts with and without adverse drug event (ADR) mentions improves the ADR recognition accuracy with no notable decline in the accuracy of detecting entities of other types. As a result, the state of the art for the pharmacological entity extraction task for the Russian language is established on a full-size labeled corpus. For the ADR entity type, the accuracy achieved is 61.1\% by the F1-exact metric, which is on par with the accuracy level for other language corpora with similar characteristics and ADR representativeness. The accuracy of the coreference relation extraction evaluated on our corpus is 71\%, which is higher than the results achieved on the other Russian-language corpora.},
  copyright = {http://creativecommons.org/licenses/by/3.0/},
  langid = {english},
  keywords = {adverse drug events,annotated corpus,coreference relation extraction,deep learning,information extraction,language models,machine learning,MESHRUS,named entity recognition,neural networks,pharmacovigilance,social media,UMLS},
  file = {/home/lisa/Zotero/storage/RE38CFVX/Sboev et al. - 2022 - Analysis of the Full-Size Russian Corpus of Intern.pdf}
}

@article{scaboro_extensive_2023-1,
  title = {Extensive Evaluation of Transformer-Based Architectures for Adverse Drug Events Extraction},
  author = {Scaboro, Simone and Portelli, Beatrice and Chersoni, Emmanuele and Santus, Enrico and Serra, Giuseppe},
  year = {2023},
  month = sep,
  journal = {Knowledge-Based Systems},
  volume = {275},
  pages = {110675},
  issn = {0950-7051},
  doi = {10.1016/j.knosys.2023.110675},
  urldate = {2023-10-12},
  abstract = {Adverse Drug Event (ADE) extraction is one of the core tasks in digital pharmacovigilance, especially when applied to informal texts. This task has been addressed by the Natural Language Processing community using large pre-trained language models, such as BERT. Despite the great number of Transformer-based architectures used in the literature, it is unclear which of them has better performances and why. Therefore, in this paper we perform an extensive evaluation and analysis of 19 Transformer-based models for ADE extraction on informal texts. We compare the performance of all the considered models on two datasets with increasing levels of informality (forums posts and tweets). We also combine the purely Transformer-based models with two commonly-used additional processing layers (CRF and LSTM), and analyze their effect on the models performance. Furthermore, we use a well-established feature importance technique (SHAP) to correlate the performance of the models with a set of features that describe them: model category (AutoEncoding, AutoRegressive, Text-to-Text), pre-training domain, training from scratch, and model size in number of parameters. At the end of our analyses, we identify a list of take-home messages that can be derived from the experimental data.},
  keywords = {Adverse drug events,Extraction,Side effects,Transformers},
  file = {/home/lisa/Zotero/storage/G4WSIN8U/Scaboro et al. - 2023 - Extensive evaluation of transformer-based architec.pdf;/home/lisa/Zotero/storage/WPYY2PQD/S0950705123004252.html}
}

@article{scaboro_increasing_2022,
  ids = {scaboro_increasing_2022-1},
  title = {Increasing Adverse Drug Events Extraction Robustness on Social Media: {{Case}} Study on Negation and Speculation},
  shorttitle = {Increasing Adverse Drug Events Extraction Robustness on Social Media},
  author = {Scaboro, Simone and Portelli, Beatrice and Chersoni, Emmanuele and Santus, Enrico and Serra, Giuseppe},
  year = {2022},
  month = oct,
  journal = {Experimental Biology and Medicine (Maywood, N.J.)},
  pages = {15353702221128577},
  issn = {1535-3699},
  doi = {10.1177/15353702221128577},
  abstract = {In the last decade, an increasing number of users have started reporting adverse drug events (ADEs) on social media platforms, blogs, and health forums. Given the large volume of reports, pharmacovigilance has focused on ways to use natural language processing (NLP) techniques to rapidly examine these large collections of text, detecting mentions of drug-related adverse reactions to trigger medical investigations. However, despite the growing interest in the task and the advances in NLP, the robustness of these models in face of linguistic phenomena such as negations and speculations is an open research question. Negations and speculations are pervasive phenomena in natural language and can severely hamper the ability of an automated system to discriminate between factual and non-factual statements in text. In this article, we take into consideration four state-of-the-art systems for ADE detection on social media texts. We introduce SNAX, a benchmark to test their performance against samples containing negated and speculated ADEs, showing their fragility against these phenomena. We then introduce two possible strategies to increase the robustness of these models, showing that both of them bring significant increases in performance, lowering the number of spurious entities predicted by the models by 60\% for negation and 80\% for speculations.},
  langid = {english},
  pmid = {36314865},
  keywords = {Adverse drug events,deep learning,digital pharmacovigilance,linguistic phenomena,natural language processing,social media,Twitter},
  file = {/home/lisa/Zotero/storage/2GD4NBAJ/Scaboro et al. - 2022 - Increasing adverse drug events extraction robustne.pdf;/home/lisa/Zotero/storage/Y9NW6JDE/Scaboro et al. - 2022 - Increasing adverse drug events extraction robustne.pdf}
}

@misc{see_get_2017,
  title = {Get {{To The Point}}: {{Summarization}} with {{Pointer-Generator Networks}}},
  shorttitle = {Get {{To The Point}}},
  author = {See, Abigail and Liu, Peter J. and Manning, Christopher D.},
  year = {2017},
  month = apr,
  number = {arXiv:1704.04368},
  eprint = {1704.04368},
  primaryclass = {cs},
  publisher = {{arXiv}},
  doi = {10.48550/arXiv.1704.04368},
  urldate = {2023-10-16},
  abstract = {Neural sequence-to-sequence models have provided a viable new approach for abstractive text summarization (meaning they are not restricted to simply selecting and rearranging passages from the original text). However, these models have two shortcomings: they are liable to reproduce factual details inaccurately, and they tend to repeat themselves. In this work we propose a novel architecture that augments the standard sequence-to-sequence attentional model in two orthogonal ways. First, we use a hybrid pointer-generator network that can copy words from the source text via pointing, which aids accurate reproduction of information, while retaining the ability to produce novel words through the generator. Second, we use coverage to keep track of what has been summarized, which discourages repetition. We apply our model to the CNN / Daily Mail summarization task, outperforming the current abstractive state-of-the-art by at least 2 ROUGE points.},
  archiveprefix = {arxiv},
  keywords = {Computer Science - Computation and Language},
  file = {/home/lisa/Zotero/storage/FZRCE889/See et al. - 2017 - Get To The Point Summarization with Pointer-Gener.pdf;/home/lisa/Zotero/storage/UGG5TUJN/1704.html}
}

@inproceedings{segura-bedmar_detecting_2014,
  ids = {segura-bedmar_detecting_2014-1},
  title = {Detecting Drugs and Adverse Events from {{Spanish}} Social Media Streams},
  booktitle = {Proceedings of the 5th {{International Workshop}} on {{Health Text Mining}} and {{Information Analysis}} ({{Louhi}})},
  author = {{Segura-Bedmar}, Isabel and Revert, Ricardo and Mart{\'i}nez, Paloma},
  year = {2014},
  pages = {106--115},
  publisher = {{Association for Computational Linguistics}},
  address = {{Gothenburg, Sweden}},
  doi = {10.3115/v1/W14-1117},
  abstract = {To the best of our knowledge, this is the first work that does drug and adverse event detection from Spanish posts collected from a health social media. First, we created a goldstandard corpus annotated with drugs and adverse events from social media. Then, Textalytics, a multilingual text analysis engine, was applied to identify drugs and possible adverse events. Overall recall and precision were 0.80 and 0.87 for drugs, and 0.56 and 0.85 for adverse events.},
  langid = {english},
  file = {/home/lisa/Zotero/storage/C9LDQ356/Segura-Bedmar et al. - 2014 - Detecting drugs and adverse events from Spanish so.pdf;/home/lisa/Zotero/storage/UK9I6H72/Segura-Bedmar et al. - 2014 - Detecting drugs and adverse events from Spanish so.pdf}
}

@misc{touvron_llama_2023-1,
  title = {{{LLaMA}}: {{Open}} and {{Efficient Foundation Language Models}}},
  shorttitle = {{{LLaMA}}},
  author = {Touvron, Hugo and Lavril, Thibaut and Izacard, Gautier and Martinet, Xavier and Lachaux, Marie-Anne and Lacroix, Timoth{\'e}e and Rozi{\`e}re, Baptiste and Goyal, Naman and Hambro, Eric and Azhar, Faisal and Rodriguez, Aurelien and Joulin, Armand and Grave, Edouard and Lample, Guillaume},
  year = {2023},
  month = feb,
  number = {arXiv:2302.13971},
  eprint = {2302.13971},
  primaryclass = {cs},
  publisher = {{arXiv}},
  doi = {10.48550/arXiv.2302.13971},
  urldate = {2023-10-19},
  abstract = {We introduce LLaMA, a collection of foundation language models ranging from 7B to 65B parameters. We train our models on trillions of tokens, and show that it is possible to train state-of-the-art models using publicly available datasets exclusively, without resorting to proprietary and inaccessible datasets. In particular, LLaMA-13B outperforms GPT-3 (175B) on most benchmarks, and LLaMA-65B is competitive with the best models, Chinchilla-70B and PaLM-540B. We release all our models to the research community.},
  archiveprefix = {arxiv},
  keywords = {Computer Science - Computation and Language},
  file = {/home/lisa/Zotero/storage/FTG9BFKS/Touvron et al. - 2023 - LLaMA Open and Efficient Foundation Language Mode.pdf;/home/lisa/Zotero/storage/YRIMIM2J/2302.html}
}

@article{tutubalina_russian_2021,
  title = {The {{Russian Drug Reaction Corpus}} and Neural Models for Drug Reactions and Effectiveness Detection in User Reviews},
  author = {Tutubalina, Elena and Alimova, Ilseyar and Miftahutdinov, Zulfat and Sakhovskiy, Andrey and Malykh, Valentin and Nikolenko, Sergey},
  year = {2021},
  month = apr,
  journal = {Bioinformatics (Oxford, England)},
  volume = {37},
  number = {2},
  pages = {243--249},
  issn = {1367-4811},
  doi = {10.1093/bioinformatics/btaa675},
  abstract = {MOTIVATION: Drugs and diseases play a central role in many areas of biomedical research and healthcare. Aggregating knowledge about these entities across a broader range of domains and languages is critical for information extraction (IE) applications. To facilitate text mining methods for analysis and comparison of patient's health conditions and adverse drug reactions reported on the Internet with traditional sources such as drug labels, we present a new corpus of Russian language health reviews. RESULTS: The Russian Drug Reaction Corpus (RuDReC) is a new partially annotated corpus of consumer reviews in Russian about pharmaceutical products for the detection of health-related named entities and the effectiveness of pharmaceutical products. The corpus itself consists of two parts, the raw one and the labeled one. The raw part includes 1.4 million health-related user-generated texts collected from various Internet sources, including social media. The labeled part contains 500 consumer reviews about drug therapy with drug- and disease-related information. Labels for sentences include health-related issues or their absence. The sentences with one are additionally labeled at the expression level for identification of fine-grained subtypes such as drug classes and drug forms, drug indications and drug reactions. Further, we present a baseline model for named entity recognition (NER) and multilabel sentence classification tasks on this corpus. The macro F1 score of 74.85\% in the NER task was achieved by our RuDR-BERT model. For the sentence classification task, our model achieves the macro F1 score of 68.82\% gaining 7.47\% over the score of BERT model trained on Russian data. AVAILABILITY AND IMPLEMENTATION: We make the RuDReC corpus and pretrained weights of domain-specific BERT models freely available at https://github.com/cimm-kzn/RuDReC. SUPPLEMENTARY INFORMATION: Supplementary data are available at Bioinformatics online.},
  langid = {english},
  pmid = {32722774},
  keywords = {Data Mining,Drug-Related Side Effects and Adverse Reactions,Humans,Language,Pharmaceutical Preparations,Russia},
  file = {/home/lisa/Zotero/storage/GTHHJCBI/Tutubalina et al. - 2021 - The Russian Drug Reaction Corpus and neural models.pdf}
}

@article{vaswani_attention_2017-1,
  ids = {vaswani_attention_nodate},
  title = {Attention Is All You Need},
  author = {Vaswani, Ashish and Shazeer, Noam and Parmar, Niki and Uszkoreit, Jakob and Jones, Llion and Gomez, Aidan N and Kaiser, {\L}ukasz and Polosukhin, Illia},
  year = {2017},
  journal = {31st Conference on Neural Information Processing Systems (NIPS 2017},
  pages = {11},
  abstract = {The dominant sequence transduction models are based on complex recurrent or convolutional neural networks that include an encoder and a decoder. The best performing models also connect the encoder and decoder through an attention mechanism. We propose a new simple network architecture, the Transformer, based solely on attention mechanisms, dispensing with recurrence and convolutions entirely. Experiments on two machine translation tasks show these models to be superior in quality while being more parallelizable and requiring significantly less time to train. Our model achieves 28.4 BLEU on the WMT 2014 Englishto-German translation task, improving over the existing best results, including ensembles, by over 2 BLEU. On the WMT 2014 English-to-French translation task, our model establishes a new single-model state-of-the-art BLEU score of 41.0 after training for 3.5 days on eight GPUs, a small fraction of the training costs of the best models from the literature.},
  langid = {english},
  file = {/home/lisa/Zotero/storage/7UUQSVRK/Vaswani et al. - Attention is All you Need.pdf}
}

@inproceedings{weissenbacher_overview_2022,
  title = {Overview of the {{Seventh Social Media Mining}} for {{Health Applications}} (\#{{SMM4H}}) {{Shared Tasks}} at {{COLING}} 2022},
  booktitle = {Proceedings of {{The Seventh Workshop}} on {{Social Media Mining}} for {{Health Applications}}, {{Workshop}} \& {{Shared Task}}},
  author = {Weissenbacher, Davy and Banda, Juan and Davydova, Vera and Estrada Zavala, Darryl and Gasco S{\'a}nchez, Luis and Ge, Yao and Guo, Yuting and Klein, Ari and Krallinger, Martin and Leddin, Mathias and Magge, Arjun and {Rodriguez-Esteban}, Raul and Sarker, Abeed and Schmidt, Lucia and Tutubalina, Elena and {Gonzalez-Hernandez}, Graciela},
  year = {2022},
  month = oct,
  pages = {221--241},
  publisher = {{Association for Computational Linguistics}},
  address = {{Gyeongju, Republic of Korea}},
  urldate = {2023-10-12},
  abstract = {For the past seven years, the Social Media Mining for Health Applications (\#SMM4H) shared tasks have promoted the community-driven development and evaluation of advanced natural language processing systems to detect, extract, and normalize health-related information in public, user-generated content. This seventh iteration consists of ten tasks that include English and Spanish posts on Twitter, Reddit, and WebMD. Interest in the \#SMM4H shared tasks continues to grow, with 117 teams that registered and 54 teams that participated in at least one task\textemdash a 17.5\% and 35\% increase in registration and participation, respectively, over the last iteration. This paper provides an overview of the tasks and participants' systems. The data sets remain available upon request, and new systems can be evaluated through the post-evaluation phase on CodaLab.},
  file = {/home/lisa/Zotero/storage/K3ZU7YRH/Weissenbacher et al. - 2022 - Overview of the Seventh Social Media Mining for He.pdf}
}

@inproceedings{wenzek_ccnet_2020,
  title = {{{CCNet}}: {{Extracting High Quality Monolingual Datasets}} from {{Web Crawl Data}}},
  shorttitle = {{{CCNet}}},
  booktitle = {Proceedings of the {{Twelfth Language Resources}} and {{Evaluation Conference}}},
  author = {Wenzek, Guillaume and Lachaux, Marie-Anne and Conneau, Alexis and Chaudhary, Vishrav and Guzm{\'a}n, Francisco and Joulin, Armand and Grave, Edouard},
  year = {2020},
  month = may,
  pages = {4003--4012},
  publisher = {{European Language Resources Association}},
  address = {{Marseille, France}},
  urldate = {2023-08-07},
  abstract = {Pre-training text representations have led to significant improvements in many areas of natural language processing. The quality of these models benefits greatly from the size of the pretraining corpora as long as its quality is preserved. In this paper, we describe an automatic pipeline to extract massive high-quality monolingual datasets from Common Crawl for a variety of languages. Our pipeline follows the data processing introduced in fastText (Mikolov et al., 2017; Grave et al., 2018), that deduplicates documents and identifies their language. We augment this pipeline with a filtering step to select documents that are close to high quality corpora like Wikipedia.},
  isbn = {979-10-95546-34-4},
  langid = {english},
  file = {/home/lisa/Zotero/storage/S39SMF7R/Wenzek et al. - 2020 - CCNet Extracting High Quality Monolingual Dataset.pdf}
}

@article{white_early_2016,
  title = {Early Identification of Adverse Drug Reactions from Search Log Data},
  author = {White, Ryen W. and Wang, Sheng and Pant, Apurv and Harpaz, Rave and Shukla, Pushpraj and Sun, Walter and DuMouchel, William and Horvitz, Eric},
  year = {2016},
  month = feb,
  journal = {Journal of Biomedical Informatics},
  volume = {59},
  pages = {42--48},
  issn = {1532-0464},
  doi = {10.1016/j.jbi.2015.11.005},
  urldate = {2023-10-10},
  abstract = {The timely and accurate identification of adverse drug reactions (ADRs) following drug approval is a persistent and serious public health challenge. Aggregated data drawn from anonymized logs of Web searchers has been shown to be a useful source of evidence for detecting ADRs. However, prior studies have been based on the analysis of established ADRs, the existence of which may already be known publically. Awareness of these ADRs can inject existing knowledge about the known ADRs into online content and online behavior, and thus raise questions about the ability of the behavioral log-based methods to detect new ADRs. In contrast to previous studies, we investigate the use of search logs for the early detection of known ADRs. We use a large set of recently labeled ADRs and negative controls to evaluate the ability of search logs to accurately detect ADRs in advance of their publication. We leverage the Internet Archive to estimate when evidence of an ADR first appeared in the public domain and adjust the index date in a backdated analysis. Our results demonstrate how search logs can be used to detect new ADRs, the central challenge in pharmacovigilance.},
  keywords = {Adverse drug reactions,Pharmacovigilance,Search log analysis},
  file = {/home/lisa/Zotero/storage/Y4I2FUAT/White et al. - 2016 - Early identification of adverse drug reactions fro.pdf;/home/lisa/Zotero/storage/6FFMC4HL/S1532046415002427.html}
}

@article{yang_identification_2013,
  title = {Identification of {{Consumer Adverse Drug Reaction Messages}} on {{Social Media}}},
  author = {Yang, Ming and Wang, Xiaodi and Kiang, Melody},
  year = {2013},
  month = jun,
  journal = {PACIS 2013 Proceedings},
  file = {/home/lisa/Zotero/storage/VEJHBQTY/193.html}
}

@inproceedings{yang_social_2012,
  title = {Social Media Mining for Drug Safety Signal Detection},
  booktitle = {Proceedings of the 2012 International Workshop on {{Smart}} Health and Wellbeing},
  author = {Yang, Christopher C. and Yang, Haodong and Jiang, Ling and Zhang, Mi},
  year = {2012},
  month = oct,
  series = {{{SHB}} '12},
  pages = {33--40},
  publisher = {{Association for Computing Machinery}},
  address = {{New York, NY, USA}},
  doi = {10.1145/2389707.2389714},
  urldate = {2023-06-05},
  abstract = {Adverse Drug Reactions (ADRs) represent a serious problem all over the world. They may complicate a patient's medical conditions and increase the morbidity, even mortality. Drug safety currently depends heavily on post-marketing surveillance, because pre-marketing review process cannot identify all possible adverse drug reactions in that it is limited by scale and time span. However, current post-marketing surveillance is conducted through centralized volunteering reporting systems, and the reporting rate is low. Consequently, it is difficult to detect the adverse drug reactions signals in a timely manner. To solve this problem, many researchers have explored methods to detect ADRs in electronic health records. Nevertheless, we only have access to electronic health records form particular health units. Aggregating and integrating electronic health records from multiple sources is rather challenging. With the advance of Web 2.0 technologies and the popularity of social media, many health consumers are discussing and exchanging health-related information with their peers. Many of this online discussion involve adverse drug reactions. In this work, we propose to use association mining and Proportional Reporting Ratios to mine the associations between drugs and adverse reactions from the user contributed content in social media. We have conducted an experiment using ten drugs and five adverse drug reactions. The FDA alerts are used as the gold standard to test the performance of the proposed techniques. The result shows that the metrics leverage, lift, and PRR are all promising to detect the adverse drug reactions reported by FDA. However, PRR outperformed the other two metrics.},
  isbn = {978-1-4503-1712-2},
  keywords = {adverse drug reaction,association mining,online health community,proportional reporting ratios},
  file = {/home/lisa/Zotero/storage/A7M2HZPN/Yang et al. - 2012 - Social media mining for drug safety signal detecti.pdf}
}

@inproceedings{yarmohammadi_everything_2021,
  title = {Everything {{Is All It Takes}}: {{A Multipronged Strategy}} for {{Zero-Shot Cross-Lingual Information Extraction}}},
  shorttitle = {Everything {{Is All It Takes}}},
  booktitle = {Proceedings of the 2021 {{Conference}} on {{Empirical Methods}} in {{Natural Language Processing}}},
  author = {Yarmohammadi, Mahsa and Wu, Shijie and Marone, Marc and Xu, Haoran and Ebner, Seth and Qin, Guanghui and Chen, Yunmo and Guo, Jialiang and Harman, Craig and Murray, Kenton and White, Aaron Steven and Dredze, Mark and Van Durme, Benjamin},
  year = {2021},
  month = nov,
  pages = {1950--1967},
  publisher = {{Association for Computational Linguistics}},
  address = {{Online and Punta Cana, Dominican Republic}},
  doi = {10.18653/v1/2021.emnlp-main.149},
  urldate = {2023-07-14},
  abstract = {Zero-shot cross-lingual information extraction (IE) describes the construction of an IE model for some target language, given existing annotations exclusively in some other language, typically English. While the advance of pretrained multilingual encoders suggests an easy optimism of ``train on English, run on any language'', we find through a thorough exploration and extension of techniques that a combination of approaches, both new and old, leads to better performance than any one cross-lingual strategy in particular. We explore techniques including data projection and self-training, and how different pretrained encoders impact them. We use English-to-Arabic IE as our initial example, demonstrating strong performance in this setting for event extraction, named entity recognition, part-of-speech tagging, and dependency parsing. We then apply data projection and self-training to three tasks across eight target languages. Because no single set of techniques performs the best across all tasks, we encourage practitioners to explore various configurations of the techniques described in this work when seeking to improve on zero-shot training.},
  file = {/home/lisa/Zotero/storage/9PA8YLN9/Yarmohammadi et al. - 2021 - Everything Is All It Takes A Multipronged Strateg.pdf}
}

@article{zhang_automatic_2019,
  title = {Automatic Discovery of Adverse Reactions through {{Chinese}} Social Media},
  author = {Zhang, Mengxue and Zhang, Meizhuo and Ge, Chen and Liu, Quanyang and Wang, Jiemin and Wei, Jia and Zhu, Kenny Q.},
  year = {2019},
  month = jul,
  journal = {Data Mining and Knowledge Discovery},
  volume = {33},
  number = {4},
  pages = {848--870},
  issn = {1573-756X},
  doi = {10.1007/s10618-018-00610-2},
  urldate = {2023-10-10},
  abstract = {Despite tremendous efforts made before the release of every drug, some adverse drug reactions (ADRs) may go undetected and thus, cause harm to both the users and to the pharmaceutical companies. One plausible venue to collect evidence of such ADRs is online social media, where patients and doctors discuss medical conditions and their treatments. There is substantial previous research on ADRs extraction from English online forums. However, very limited research was done on Chinese data. In this paper, we try to use the posts from two popular Chinese social media as the original dataset. We propose a semi-supervised learning framework that detects mentions of medications and colloquial ADR terms and extracts lexicon-syntactic features from natural language text to recognize positive associations between drug use and ADRs. The key contribution is an automatic label generation algorithm, which requires very little manual annotation. This bootstrapping algorithm could also be further applied on English data. The research results indicate that our algorithm outperforms the hidden Markov model and conditional random fields. With this approach, we discovered a large number of side effects for a variety of popular medicines in real world scenarios.},
  langid = {english},
  keywords = {Adverse drug reaction,Chinese social media,Natural language processing},
  file = {/home/lisa/Zotero/storage/WEXIIZVD/Zhang et al. - 2019 - Automatic discovery of adverse reactions through C.pdf}
}

@inproceedings{zhang_ernie_2019,
  title = {{{ERNIE}}: {{Enhanced Language Representation}} with {{Informative Entities}}},
  shorttitle = {{{ERNIE}}},
  booktitle = {Proceedings of the 57th {{Annual Meeting}} of the {{Association}} for {{Computational Linguistics}}},
  author = {Zhang, Zhengyan and Han, Xu and Liu, Zhiyuan and Jiang, Xin and Sun, Maosong and Liu, Qun},
  year = {2019},
  month = jul,
  pages = {1441--1451},
  publisher = {{Association for Computational Linguistics}},
  address = {{Florence, Italy}},
  doi = {10.18653/v1/P19-1139},
  urldate = {2023-10-10},
  abstract = {Neural language representation models such as BERT pre-trained on large-scale corpora can well capture rich semantic patterns from plain text, and be fine-tuned to consistently improve the performance of various NLP tasks. However, the existing pre-trained language models rarely consider incorporating knowledge graphs (KGs), which can provide rich structured knowledge facts for better language understanding. We argue that informative entities in KGs can enhance language representation with external knowledge. In this paper, we utilize both large-scale textual corpora and KGs to train an enhanced language representation model (ERNIE), which can take full advantage of lexical, syntactic, and knowledge information simultaneously. The experimental results have demonstrated that ERNIE achieves significant improvements on various knowledge-driven tasks, and meanwhile is comparable with the state-of-the-art model BERT on other common NLP tasks. The code and datasets will be available in the future.},
  file = {/home/lisa/Zotero/storage/K4AQXX2U/Zhang et al. - 2019 - ERNIE Enhanced Language Representation with Infor.pdf}
}

@article{zhou_document-level_2021,
  title = {Document-{{Level Relation Extraction}} with {{Adaptive Thresholding}} and {{Localized Context Pooling}}},
  author = {Zhou, Wenxuan and Huang, Kevin and Ma, Tengyu and Huang, Jing},
  year = {2021},
  month = may,
  journal = {Proceedings of the AAAI Conference on Artificial Intelligence},
  volume = {35},
  number = {16},
  pages = {14612--14620},
  issn = {2374-3468},
  doi = {10.1609/aaai.v35i16.17717},
  urldate = {2023-10-10},
  abstract = {Document-level relation extraction (RE) poses new challenges compared to its sentence-level counterpart. One document commonly contains multiple entity pairs, and one entity pair occurs multiple times in the document associated with multiple possible relations. In this paper, we propose two novel techniques, adaptive thresholding and localized context pooling, to solve the multi-label and multi-entity problems. The adaptive thresholding replaces the global threshold for multi-label classification in the prior work with a learnable entities-dependent threshold. The localized context pooling directly transfers attention from pre-trained language models to locate relevant context that is useful to decide the relation. We experiment on three document-level RE benchmark datasets: DocRED, a recently released large-scale RE dataset, and two datasets CDRand GDA in the biomedical domain. Our ATLOP (Adaptive Thresholding and Localized cOntext Pooling) model achieves an F1 score of 63.4, and also significantly outperforms existing models on both CDR and GDA. We have released our code at https://github.com/wzhouad/ATLOP.},
  copyright = {Copyright (c) 2021 Association for the Advancement of Artificial Intelligence},
  langid = {english},
  keywords = {Information Extraction},
  file = {/home/lisa/Zotero/storage/LCV7393L/Zhou et al. - 2021 - Document-Level Relation Extraction with Adaptive T.pdf}
}

@inproceedings{zolnoori_development_2017,
  title = {Development of an {{Adverse Drug Reaction Corpus}} from {{Consumer Health Posts}}.},
  booktitle = {{{SMM4H}}@{{AMIA}}},
  author = {Zolnoori, Maryam and Patrick, Timothy and Fung, Kin and Fontelo, Paul and Faiola, Anthony and Wu, Shirley and Xu, Kelly and Zhu, Jiaxi and Eldredge, Christina},
  year = {2017},
  address = {{Washington, DC, USA}},
  urldate = {2023-06-05},
  file = {/home/lisa/Zotero/storage/WHQEXMZG/DevelopmentofanAdverseDrugReactionCorpusfromConsumerHealthPosts.html}
}

@article{zolnoori_systematic_2019,
  title = {A Systematic Approach for Developing a Corpus of Patient Reported Adverse Drug Events: {{A}} Case Study for {{SSRI}} and {{SNRI}} Medications},
  shorttitle = {A Systematic Approach for Developing a Corpus of Patient Reported Adverse Drug Events},
  author = {Zolnoori, Maryam and Fung, Kin Wah and Patrick, Timothy B. and Fontelo, Paul and Kharrazi, Hadi and Faiola, Anthony and Wu, Yi Shuan Shirley and Eldredge, Christina E. and Luo, Jake and Conway, Mike and Zhu, Jiaxi and Park, Soo Kyung and Xu, Kelly and Moayyed, Hamideh and Goudarzvand, Somaieh},
  year = {2019},
  month = feb,
  journal = {Journal of Biomedical Informatics},
  volume = {90},
  pages = {103091},
  issn = {1532-0464},
  doi = {10.1016/j.jbi.2018.12.005},
  urldate = {2023-03-11},
  abstract = {``Psychiatric Treatment Adverse Reactions'' (PsyTAR) corpus is an annotated corpus that has been developed using patients narrative data for psychiatric medications, particularly SSRIs (Selective Serotonin Reuptake Inhibitor) and SNRIs (Serotonin Norepinephrine Reuptake Inhibitor) medications. This corpus consists of three main components: sentence classification, entity identification, and entity normalization. We split the review posts into sentences and labeled them for presence of adverse drug reactions (ADRs) (2168 sentences), withdrawal symptoms (WDs) (438 sentences), sign/symptoms/illness (SSIs) (789 sentences), drug indications (517), drug effectiveness (EF) (1087 sentences), and drug infectiveness (INF) (337 sentences). In the entity identification phase, we identified and extracted ADRs (4813 mentions), WDs (590 mentions), SSIs (1219 mentions), and DIs (792). In the entity normalization phase, we mapped the identified entities to the corresponding concepts in both UMLS (918 unique concepts) and SNOMED CT (755 unique concepts). Four annotators double coded the sentences and the span of identified entities by strictly following guidelines rules developed for this study. We used the PsyTAR sentence classification component to automatically train a range of supervised machine learning classifiers to identifying text segments with the mentions of ADRs, WDs, DIs, SSIs, EF, and INF. SVMs classifiers had the highest performance with F-Score 0.90. We also measured performance of the cTAKES (clinical Text Analysis and Knowledge Extraction System) in identifying patients' expressions of ADRs and WDs with and without adding PsyTAR dictionary to the core dictionary of cTAKES. Augmenting cTAKES dictionary with PsyTAR improved the F-score cTAKES by 25\%. The findings imply that PsyTAR has significant implications for text mining algorithms aimed to identify information about adverse drug events and drug effectiveness from patients' narratives data, by linking the patients' expressions of adverse drug events to medical standard vocabularies. The corpus is publicly available at Zolnoori et al. [30].},
  langid = {english},
  keywords = {Adverse drug events,Annotated corpus,Drug effectiveness,Drug safety,Information extraction,Machine learning,Online healthcare forums,Patients narratives,Psychiatric medications,Semantic mapping,SNOMED CT,SNRIs,Social media,SSRIs,Text mining,UMLS},
  file = {/home/lisa/Zotero/storage/SBI7W8XA/Zolnoori et al. - 2019 - A systematic approach for developing a corpus of p.pdf;/home/lisa/Zotero/storage/52W66D33/S1532046419300012.html}
}