# Research Question 4: What can topic modeling on FY18 data tell us about grants data and opioids?

## RQ 4.1: What latent topics related to opioids exist in 10 years of project abstract text?

In [198]:
# adapted from Julia Lane course and https://stackabuse.com/python-for-nlp-topic-modeling/

import pandas as pd
import numpy as np
import nltk

from sklearn.feature_extraction.text import CountVectorizer
from sklearn.decomposition import LatentDirichletAllocation
from nltk.corpus import stopwords

nltk.download('stopwords') #download the latest stopwords

[nltk_data] Downloading package stopwords to
[nltk_data]     /Users/bryant/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


True

In [199]:
file = 'opioidRQ3_constructedDataset.csv'
df = (pd.read_csv(file,skipinitialspace=True,encoding='utf-8'))

In [200]:
# subset to only 2018
modeling_df = df[df.FY == 2018]

In [201]:
# prepare stopwords
eng_stopwords = stopwords.words('english')

# we can add our own stopwords here, but max_df should handle it for us...
domain_stopwords = ['experiments','exploration','exploratory','explore','experiment','findings','financial',
                   'experimental','finally','far','five','find','extent']

modified_stopwords = eng_stopwords + domain_stopwords

In [202]:
'''
Before we can apply LDA, we need to create vocabulary of all the words in our data
We specify to only include those words that appear in less than 10% of the document 
and appear in at least 10 documents. 
docs: https://scikit-learn.org/stable/modules/generated/sklearn.feature_extraction.text.CountVectorizer.html
'''

count_vect = CountVectorizer(
    max_df=0.05, 
    min_df=10, 
    ngram_range = (0,2),
    max_features = 500,
    stop_words=modified_stopwords)

doc_term_matrix = count_vect.fit_transform(modeling_df['ABSTRACT'].values.astype('U'))

#doc_term_matrix # second number is total vocabulary

# Using below code, we can print out the many, many words excluded due to:
# - occurred in too many documents (max_df)
# - occurred in too few documents (min_df)
# - were cut off by feature selection (max_features)

# print(count_vect.stop_words_)

In [203]:
'''
Use LDA to create topics. n_components is the number of topics. Setting the seed
makes the result reproduceable.
'''

LDA = LatentDirichletAllocation(n_components=100, random_state=1)  
LDA.fit(doc_term_matrix) 

LatentDirichletAllocation(batch_size=128, doc_topic_prior=None,
             evaluate_every=-1, learning_decay=0.7,
             learning_method='batch', learning_offset=10.0,
             max_doc_update_iter=100, max_iter=10, mean_change_tol=0.001,
             n_components=100, n_jobs=None, n_topics=None, perp_tol=0.1,
             random_state=1, topic_word_prior=None,
             total_samples=1000000.0, verbose=0)

In [204]:
'''
by design, the first topic consists of all words in the vocabulary, along with
probability values. Here we print the 15 words with highest value in each topic.

'''
# initialize list for topics
topicList = []

for i,topic in enumerate(LDA.components_):  
    ithTopic = [count_vect.get_feature_names()[i] for i in topic.argsort()[-15:]]
#     print(f'Top 15 words for topic #{i}:')
#     print(ithTopic)
#     print('\n')
    topicList.append(ithTopic)
    
pd.DataFrame(topicList)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14
0,link,syndrome,whole,physiological,lower,incidence,onset,prevalence,action,impaired,sensitivity,obesity,glucose,insulin,diabetes
1,determine whether,contributes,induce,plays,showed,promoting,molecular mechanisms,phenotype,regulating,expressed,promotes,regulates,macrophages,activated,pro
2,promising,way,require,able,requires,towards,action,optimize,effort,centers,must,step,stability,efficiency,optimal
3,record,sensitive,issues,per,controls,effort,therapeutics,track,must,degree,industry,compounds,molecule,assay,chemistry
4,rapidly,apply,therapeutics,lines,culture,validated,whole,molecule,validate,characterization,assay,screening,radiation,high throughput,throughput
5,biochemical,validated,promising,culture,series,adverse,caused,characterization,animal models,therapeutics,action,class,toxicity,compounds,pd
6,contributes,central hypothesis,caused,occurs,poor,animal models,united states,clinically,physiological,adverse,morbidity,fibrosis,failure,cardiac,heart
7,analyze,100,spectrum,performed,characterization,cases,controls,lines,genetics,sequence,phenotype,phenotypes,whole,variation,variants
8,treated,poorly,burden,epithelial,challenge,lower,poor,cancers,biomarker,resistant,lung,morbidity,severe,asthma,prostate
9,educational,health care,participation,research training,diversity,mentoring,reducing,engagement,communities,underrepresented,research projects,outreach,experiences,minority,disparities


In [228]:
'''
Assign the probability of all the topics to each document, then
add a column to the original data frame that will store the highest-scoring
topic for that abstract.
'''
topic_values = LDA.transform(doc_term_matrix)  
modeling_df['primeTopicId'] = topic_values.argmax(axis=1)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  import sys


In [229]:
# which topics are most common among projects tagged explicitly?
pd.DataFrame(modeling_df[modeling_df.opioid_num == 1].primeTopicId.value_counts())[:10]

Unnamed: 0,primeTopicId
19,226
93,116
38,74
90,71
45,52
66,45
53,42
50,36
27,36
1,31


In [207]:
# topic 19 is the most common, and its top 15 words are...
pd.DataFrame(topicList).iloc[19]

0           adverse
1          reported
2            burden
3          reducing
4     united states
5               100
6        depression
7        clinically
8           affects
9            severe
10         symptoms
11       peripheral
12      sensitivity
13          sensory
14             pain
Name: 19, dtype: object

In [230]:
# let's look at one of the abstracts tagged by explicit and wiki, falling into topic 19
modeling_df.ABSTRACT[(modeling_df.primeTopicId == 19) & (modeling_df.opioid_num == 1)].iloc[0]

# this makes sense -- any human reader would say this is relevant to opioids

'Pain management is critical throughout cancer care, from diagnosis through treatment and survivorship, and inadvanced illness and at the end of life. Improving cancer pain management is critical over the next decade,given (1) more number of cancer survivors with long-term consequences of cancer and its treatment, (2) longersurvival with advanced cancer, with more time with and complexity of pain, and (3) growing evidence thatchronic opioids are often ineffective and have many adverse effects and risks. Cancer pain is oftenchallenging; it interacts with many issues, including psychological and social concerns and other symptoms,especially fatigue and sleep disturbances. Severe cancer pain is still frequent in the United States; the qualityof pain management is often suboptimal; key effective nonpharmacologic strategies, such as rehabilitation, andpsychosocial approaches, such as patient education and support, are underused; and disparities inmanagement persist. A key solution, using a 

In [231]:
'''
But note that topic #19 is the prime topic for 599 projects, 
only 226 of which were tagged by researchers.

In other words, the topic might point to projects associated with
opoids that were not detected by the explicit or wiki methods.
'''

# how many projects share topic #19 as their prime topic? A: 599
print(modeling_df[modeling_df.primeTopicId == 19].shape)


(599, 55)


In [232]:
'''
Let's look at an abstract in topic 19 NOT tagged by researchers as opioid-related
'''

print(modeling_df.ABSTRACT[(modeling_df.primeTopicId == 19) & (modeling_df.opioid_num != 1)].iloc[0])


Project SummaryBackground: The gate theory of pain predicts that inhibitory interneurons in the dorsal horn act as “gatecontrol” elements to mediate interaction between innocuous and noxious stimuli. It has been postulated thatrelease of this inhibitory gate allows innocuous mechanical stimuli to access the nociceptive pathway resultingin tactile allodynia following nerve injury. Increasing evidence demonstrates an excitatory pathway involvingPKCγ-expressing interneurons that is held silent under nonpathological conditions via a strong feedforwardinhibitory gate by local interneurons. Failure of the inhibitory control on PKCγ-expressing interneurons isbelieved to underly tactile alloynia. In the neuropathic pain state failure of the inhibitory gate is due to a BDNF-dependent reduction in inhibitory tone mediated by a change in chloride homeostasis resulting from alteredexpression of chloride cotransporters. Current data demonstrates that the precursor protein VGF (non-acronymic) is upr

In [223]:
'''
Note that this project was:
- not flagged in the project-terms field as related to opioids
- does not contain 'opioid' in the abstract text

but it WAS captured by topic #19, AND, upon a human reading, DOES have something to do with opioids...
it concerns 'the gate theory of pain', 'nociceptive pathway', 'nerve injury', 'neuropathic pain',
    and other terms a medical professional would consider related to opioids

This is helpful in 1) capturing more projects to do with opioids and 2) adding terms to our 'wiki-tagging' approach
'''
print(modeling_df[modeling_df.primeTopicId == 19].shape)
print(modeling_df[modeling_df.primeTopicId == 93].shape)
print(modeling_df[modeling_df.primeTopicId == 38].shape)
print(modeling_df[modeling_df.primeTopicId == 90].shape)
print(modeling_df[modeling_df.primeTopicId == 45].shape)
print(modeling_df[modeling_df.primeTopicId == 66].shape)
print(modeling_df[modeling_df.primeTopicId == 53].shape)
print(modeling_df[modeling_df.primeTopicId == 50].shape)
print(modeling_df[modeling_df.primeTopicId == 27].shape)
print(modeling_df[modeling_df.primeTopicId == 1].shape)



(599, 54)
(2065, 54)
(1154, 54)
(1504, 54)
(1144, 54)
(649, 54)
(1534, 54)
(1305, 54)
(1424, 54)
(2807, 54)


In [217]:
'''
If we were to take the projects associated with topic #19 and nine other most opioid-centric topics,
we would have 14,185 projects to look into, rather than the union of wiki/explicit tags (2,715 projects for this FY)
'''

'\nIf we were to take the projects associated with topic #19 and nine other most opioid-centric topics,\nwe would have ??? projects to look into, rather than just the x (explicit), y (wiki), or z (either)\n'

In [233]:
# store the words associated with each topic in our evolving dataframe
modeling_df['primeTopicWords'] = ' '
for x in range(0, modeling_df.shape[0]):
    modeling_df.iat[x,modeling_df.columns.get_loc("primeTopicWords")] = topicList[modeling_df.iat[x,modeling_df.columns.get_loc("primeTopicId")]]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  


In [216]:
# save in csv
modeling_df.to_csv('opioidTopics_FY18.csv')

In [234]:
with pd.option_context('display.max_colwidth', -1): display(modeling_df[7:9][['ABSTRACT','primeTopicId','primeTopicWords']])

Unnamed: 0,ABSTRACT,primeTopicId,primeTopicWords
836577,"PROJECT SUMMARY/ABSTRACTVascular hyperpermeability is a hallmark of inflammation Current therapy interferes withmechanisms involved in onset of hyperpermeability. We and other investigators are focusing onmechanisms that terminate hyperpermeability. Because many negative effects of hyper-permeability are due to its persistence beyond what is required for preserving organ function,we propose to elucidate mechanisms that terminate hyperpermeability and thereby restoremicrovascular barrier properties, while retaining its beneficial effects. Based on a) protein trafficmechanisms, b) our demonstration that eNOS translocation to cytosol is necessary for onset ofhyperpermeability, c) the demonstration that VASP (vasodilator stimulated phosphoprotein) isfundamental for cell adhesion and endothelial barrier properties, and d) our preliminary data thatselective stimulation of Epac (exchange protein activated by cAMP) returns eNOS to the cellmembrane, we will test the central hypothesis that the signaling that leads to endothelialhyperpermeability initiates a delayed increase in [cAMP], which causes VASP-mediatedtranslocation of eNOS and Epac to the cell membrane leading to inactivation ofhyperpermeability. We will test this hypothesis through 4 Specific Aims; in each Aimmeasurement of permeability will be an end-point in vivo or in vitro: SA1. To determine whetherduring hyperpermeability the activity of cytoplasmic and not plasma membrane eNOS causesthe increase in cAMP required for inactivation of hyperpermeability. We will assess inactivationof hyperpermeability and measure [cAMP] as a function of [NO] in cells with eNOS expressedeither in the cytosol or in the cell membrane, and in primary endothelial cells (EC). SA2. Todetermine whether a G-protein-coupled receptor (GPCR) mediates the increase in [cAMP]. Thisaim will study a plausible alternative mechanism to cytosolic [NO] as the cause for the increasein [cAMP]. SA3. To determine the role of VASP phosphorylation in contributing to eNOS andEpac translocation to cell membrane. In wild type and in VASP-KO endothelial cells and mice,we will test the cause-effect relation between VASP phosphorylation and inactivation ofhyperpermeability. We will stimulate eNOS translocation with 8-cPT-2-O-Me-cAMP, a selectiveactivator of Epac. SA4. To determine the signaling interactions between VASP phosphorylationand VASP S-nitrosation in the regulation of hyperpermeability. S-nitrosation is increasinglyrecognized as an important posttranslational regulatory modification in EC. Whether S-nitrosation promotes inactivation is unknown. The results of this conceptually innovativeproposal will influence current paradigms in microvascular permeability and will provide thebasis for developing new therapies for treating vascular inflammation.",81,"[induce, protective, promotes, plays, severe, determine whether, regulating, animal models, contributes, molecular mechanisms, central hypothesis, impaired, barrier, endothelial, vascular]"
836578,"ABSTRACT Atrial fibrillation (AF) is a common arrhythmia that can have devastating consequences, includingstroke and accelerated decline in cognitive function. Because AF is often asymptomatic and escapes clinicaldetection (subclinical AF), conventional methods for identifying AF have underestimated the populationburden of AF. African Americans have a lower risk of clinically-recognized AF than whites, yet they suffer morestroke and cognitive impairment. A high prevalence of subclinical AF and of another important arrhythmia,supraventricular ectopy (SVE), may help to explain this paradox. In addition, several emerging risk factorsfor AF and SVE, including long-term changes in blood pressure, hypokalemia, the use of certain medications,and psychosocial factors, are modifiable and represent potential targets for AF prevention. The use of sensitiveand unbiased methods to detect AF and SVE, and the discovery of novel modifiable risk factors for thesearrhythmias, may offer opportunities for improved cardiovascular disease (CVD) prevention among AfricanAmericans. Our project will use a non-invasive electrocardiographic (ECG) monitoring device that is both well-tolerated and sensitive for detecting subclinical arrhythmia to measure subclinical AF, AF type (paroxysmal vs.persistent), and SVE frequency. In this ancillary study, we propose to conduct 14-day continuous ECGmonitoring on 2,000 participants who return for Field Center Exam 4 of the Jackson Heart Study (JHS), a largeprospective cohort study of CVD risk factors in African Americans. We will also use data from anticipatedcontract funded components of the Field Center Exam, including cognitive assessments and brain magneticresonance imaging (MRI). Our goals are to assess the population burden of subclinical AF and SVE amongAfrican Americans, and to identify correlates of subclinical AF and SVE that may be modifiable risk factors orconsequences of these arrhythmias. There are four aims. Aim 1: Estimate the age- and sex-specificprevalence of subclinical AF and AF type (paroxysmal vs. persistent) among African Americans in the JHS,and compare with the prevalence of subclinical AF and AF type among white participants undergoing 14-daycontinuous ECG monitoring in the Multi-Ethnic Study of Atherosclerosis and the Atherosclerosis Risk inCommunities Study. Aim 2: Using data from JHS Exams 1-4, (a) evaluate whether traditional AF risk factorsare associated with subclinical AF and SVE, and (b) identify novel, modifiable risk factors for these arrhythmias.Aim 3: Evaluate the utility of a clinical AF risk score, NT-proBNP levels, and risk factors identified from Aim 2for the prediction of subclinical AF. Aim 4: In cross-sectional analyses, determine whether subclinical AF andSVE are associated with worse cognitive function and with brain MRI abnormalities. This project will generatenew knowledge about subclinical AF and SVE that will be relevant to patients, and that will inform preventionand treatment strategies to reduce the burden of AF and AF-related complications among African Americans.",41,"[link, measured, vascular, consequences, clinically, lower, united states, heart, burden, physiological, adverse, morbidity, risk factors, pressure, cardiovascular]"


In [227]:
modeling_df.tagCompare.value_counts()

neither         89398
wikiOnly         1392
both              956
explicitOnly      367
Name: tagCompare, dtype: int64

In [None]:
pd.DataFrame(modeling_df[(modeling_df.primeTopicId == 19) | 
                         (modeling_df.primeTopicId == 93) |
                         (modeling_df.primeTopicId == 38) |
                         (modeling_df.primeTopicId == 90) |
                         (modeling_df.primeTopicId == 45) |
                         (modeling_df.primeTopicId == 66) |
                         (modeling_df.primeTopicId == 53) |
                         (modeling_df.primeTopicId == 50) |
                         (modeling_df.primeTopicId == 27) |
                         (modeling_df.primeTopicId == 1)
                        ])

In [254]:
print('#19 is prime for 599 abstracts,226 of which have explicit tag' + '\n' + str(topicList[19]) + '\n')
print('#93, 2065 | 116' + '\n' + str(topicList[93]) + '\n')
print('#38, 1154 | 74' + '\n' + str(topicList[38]) + '\n')
print('#90, 1504 | 71' + '\n' + str(topicList[90]) + '\n')
print('#45, 1144 | 52' + '\n' + str(topicList[45]) + '\n')
print('#66, 649 | 45' + '\n' + str(topicList[66]) + '\n')
print('#53, 1534 | 42' + '\n' + str(topicList[53]) + '\n')
print('#50, 1305 | 36' + '\n' + str(topicList[50]) + '\n')
print('#27, 1424 | 36' + '\n' + str(topicList[27]) + '\n')
print('#1, 2807 | 31' + '\n' + str(topicList[1]) + '\n')

#19 is prime for 599 abstracts,226 of which have explicit tag
['adverse', 'reported', 'burden', 'reducing', 'united states', '100', 'depression', 'clinically', 'affects', 'severe', 'symptoms', 'peripheral', 'sensitivity', 'sensory', 'pain']

#93, 2065 | 116
['morbidity', 'setting', 'burden', 'infectious', 'treated', 'sex', 'prevalence', 'incidence', 'viral', 'sub', 'living', 'transmission', 'infections', 'infected', 'hiv']

#38, 1154 | 74
['severe', 'reported', 'reducing', 'symptoms', 'lower', 'compare', 'clinical trial', 'feasibility', 'follow', 'effectiveness', 'months', 'receive', 'secondary', 'veterans', 'randomized']

#90, 1504 | 71
['gap', 'reducing', 'feasibility', 'tool', 'benefits', 'burden', 'value', 'practice', 'healthcare', 'effectiveness', 'costs', 'health care', 'barriers', 'mental', 'decision']

#45, 1144 | 52
['suggests', 'consequences', 'hypotheses', 'selective', 'reveal', 'physiological', 'expressed', 'region', 'animals', 'determine whether', 'cortex', 'alterations', 

In [247]:
# which topics are most common among projects tagged explicitly?
pd.DataFrame(modeling_df[modeling_df.opioid_num == 1].primeTopicId.value_counts())[:10]

Unnamed: 0,primeTopicId
19,226
93,116
38,74
90,71
45,52
66,45
53,42
50,36
27,36
1,31


In [248]:
print(modeling_df[modeling_df.primeTopicId == 19].shape)
print(modeling_df[modeling_df.primeTopicId == 93].shape)
print(modeling_df[modeling_df.primeTopicId == 38].shape)
print(modeling_df[modeling_df.primeTopicId == 90].shape)
print(modeling_df[modeling_df.primeTopicId == 45].shape)
print(modeling_df[modeling_df.primeTopicId == 66].shape)
print(modeling_df[modeling_df.primeTopicId == 53].shape)
print(modeling_df[modeling_df.primeTopicId == 50].shape)
print(modeling_df[modeling_df.primeTopicId == 27].shape)
print(modeling_df[modeling_df.primeTopicId == 1].shape)


(599, 56)
(2065, 56)
(1154, 56)
(1504, 56)
(1144, 56)
(649, 56)
(1534, 56)
(1305, 56)
(1424, 56)
(2807, 56)


In [255]:
modeling_df[modeling_df.opioid_num == 1].shape

(1323, 56)

In [256]:
print(modeling_df.ABSTRACT[(modeling_df.primeTopicId == 19) & (modeling_df.opioid_num != 1)].iloc[0])

Project SummaryBackground: The gate theory of pain predicts that inhibitory interneurons in the dorsal horn act as “gatecontrol” elements to mediate interaction between innocuous and noxious stimuli. It has been postulated thatrelease of this inhibitory gate allows innocuous mechanical stimuli to access the nociceptive pathway resultingin tactile allodynia following nerve injury. Increasing evidence demonstrates an excitatory pathway involvingPKCγ-expressing interneurons that is held silent under nonpathological conditions via a strong feedforwardinhibitory gate by local interneurons. Failure of the inhibitory control on PKCγ-expressing interneurons isbelieved to underly tactile alloynia. In the neuropathic pain state failure of the inhibitory gate is due to a BDNF-dependent reduction in inhibitory tone mediated by a change in chloride homeostasis resulting from alteredexpression of chloride cotransporters. Current data demonstrates that the precursor protein VGF (non-acronymic) is upr

In [262]:
print(modeling_df.ABSTRACT[(modeling_df.primeTopicId == 19) & (modeling_df.opioid_num != 1) & (modeling_df.sumTermCounts < 5)].iloc[0])

Project SummaryBackground: The gate theory of pain predicts that inhibitory interneurons in the dorsal horn act as “gatecontrol” elements to mediate interaction between innocuous and noxious stimuli. It has been postulated thatrelease of this inhibitory gate allows innocuous mechanical stimuli to access the nociceptive pathway resultingin tactile allodynia following nerve injury. Increasing evidence demonstrates an excitatory pathway involvingPKCγ-expressing interneurons that is held silent under nonpathological conditions via a strong feedforwardinhibitory gate by local interneurons. Failure of the inhibitory control on PKCγ-expressing interneurons isbelieved to underly tactile alloynia. In the neuropathic pain state failure of the inhibitory gate is due to a BDNF-dependent reduction in inhibitory tone mediated by a change in chloride homeostasis resulting from alteredexpression of chloride cotransporters. Current data demonstrates that the precursor protein VGF (non-acronymic) is upr

In [259]:
modeling_df.head()

Unnamed: 0.1,Unnamed: 0,PROJECT_ID,PROJECT_TERMS,PROJECT_TITLE,DEPARTMENT,AGENCY,IC_CENTER,PROJECT_NUMBER,PROJECT_START_DATE,PROJECT_END_DATE,...,cocaine,codeine,pain,analgesics,sumTermCounts,tagCompare,strongestTopic,primeTopic,primeTopicId,primeTopicWords
836570,836570,983089,,IMPROVING DRUG SAFETY FOR PREGNANT WOMEN BY DE...,HHS,ALLCDC,NCBDD,2R44DD001127-02,9/30/2017,9/29/2019,...,0,0,0,0,0,neither,90,"[gap, reducing, feasibility, tool, benefits, b...",90,"[gap, reducing, feasibility, tool, benefits, b..."
836571,836571,1005000,Area; Biological Assay; Brain; Cell physiology...,LIGHT ADAPTATION AND CIRCADIAN MODULATION,HHS,NIH,NEI,5R01EY024567-06,9/1/2014,6/30/2019,...,0,0,0,0,0,neither,68,"[reveal, able, biochemical, nature, detailed, ...",68,"[reveal, able, biochemical, nature, detailed, ..."
836572,836572,1005001,3-Dimensional; Ablation; Affect; Aggressive be...,EVALUATION OF IRAK4 AS A NOVEL THERAPEUTIC TAR...,HHS,NIH,NCI,1R21CA223112-01,12/7/2017,11/30/2019,...,0,0,0,0,0,neither,71,"[culture, drive, mouse model, treated, therape...",71,"[culture, drive, mouse model, treated, therape..."
836573,836573,1005002,Acids; Acute; Address; Amish; analog; base; Bi...,NOVEL TREATMENT OPTIONS FOR GLUTARIC ACIDURIA,HHS,NIH,NICHD,5R21HD088775-02,9/1/2017,8/31/2019,...,0,0,0,0,0,neither,95,"[lines, activated, resistant, promising, compo...",95,"[lines, activated, resistant, promising, compo..."
836574,836574,1005003,4D Imaging; Address; Anaphase; Bacteria; Bacte...,ILLEGITIMATE RECOMBINATION BY DRUG RESISTANCE ...,HHS,NIH,NIGMS,5R01GM025326-38,7/1/1978,1/31/2020,...,0,0,0,0,0,neither,68,"[reveal, able, biochemical, nature, detailed, ...",68,"[reveal, able, biochemical, nature, detailed, ..."
