In [37]:
'''
For creating Word2Vec embedding-based text classification model

6/30/22 [Shiu] Saving major files that require substantial run time to help
        with reruns.
6/18/22 [Shiu] When getting bi and trigrams, min_count was hard coded to 5,
        instead of using the config file values. Rerun.
6/15/22 Created by Shin-Han Shiu.
'''

## for data
import argparse
import json
import pandas as pd
import numpy as np
import pickle
import sys
import itertools
from pathlib import Path

from sklearn import model_selection, metrics

## for word embedding with w2v
import gensim

## for deep learning
from tensorflow.keras import models, layers, callbacks, preprocessing

def read_configs(config_file):
  """Read configuration file and return a config_dict"""
  # required
  config_dict = {'lang_model':0,
                 'proj_dir':0,
                 'work_dir':0,
                 'corpus_combo_file':0,
                 'rand_state':0,
                 'w2v_param':0,}

  # Read config file and fill in the dictionary
  with open(config_file, 'r') as f:
    configs     = f.readlines()
    for config in configs:
      if config.strip() == "" or config[0] == "#":
        pass
      else:
        config = config.strip().split("=")
        if config[0] in config_dict:
          config_dict[config[0]] = eval(config[1])

  # Check if any config missing
  missing = 0
  for config in config_dict:
    if config_dict[config] == 0:
      print("  missing:", config)
      missing += 1
    else:
      print("  ", config, "=", config_dict[config])

  if missing == 0:
    print("  all config available")
  else:
    print("  missing config, QUIT!")
    sys.exit(0)

  return config_dict


def write_df_as_json(df, file_name):
  json_file_name = work_dir / file_name

  if not json_file_name.is_file():
    json_file = df.to_json()
    with json_file_name.open("w+") as f:
      json.dump(json_file, f)


def split_train_validate_test(corpus_combo_file, rand_state):
  '''Load data and split train, validation, test subsets for the cleaned texts
  Args:
    corpus_combo_file (str): path to the json data file
    rand_state (int): for reproducibility
  Return:
    train, valid, test (pandas dataframes): training, validation, testing sets
  '''
  # Load json file
  with corpus_combo_file.open("r+") as f:
      corpus_combo_json = json.load(f)

  # Convert json back to dataframe
  corpus_combo = pd.read_json(corpus_combo_json)

  # Cleaned corpus
  corpus = corpus_combo[['label','txt_clean']]

  # Split train test
  train, test = model_selection.train_test_split(corpus, 
      test_size=0.2, stratify=corpus['label'], random_state=rand_state)

  # Split train validate
  train, valid = model_selection.train_test_split(train, 
      test_size=0.25, stratify=train['label'], random_state=rand_state)

  # Output train, valid, and test sets as jsons
  print("  write train, valid, test data to json")
  write_df_as_json(train, "corpus_train.json")
  write_df_as_json(valid, "corpus_valid.json")
  write_df_as_json(test , "corpus_test.json")

  X_train = train['txt_clean']
  X_valid = valid['txt_clean']
  X_test  = test['txt_clean']
  y_train = train['label']
  y_valid = valid['label']
  y_test  = test['label']

  print(f"    size: train={X_train.shape}, valid={X_valid.shape}," +\
        f" test={X_test.shape}")

  return [X_train, X_valid, X_test, y_train, y_valid, y_test], corpus_combo
  
def get_hyperparameters(w2v_param):
  ''' Return a list with hyperparameters based on the passed dictionary
  Adopted from:
    https://stackoverflow.com/questions/38721847/how-to-generate-all-combination-from-values-in-dict-of-lists-in-python
  Args:
    param (dict): a dictionary specified in the config.txt file.
  Return:
    param_list (list): a nested list of hyperparameters 
  '''
  print(w2v_param)
  keys, values = zip(*w2v_param.items())
  param_list = [v for v in itertools.product(*values)]
  
  return keys, param_list

def get_unigram(corpus):
  unigram = []
  for txt in corpus:
    lst_words = txt.split()
    unigram.append(lst_words)

  return unigram

def get_ngram(X_corpus, ngram, min_count, subset, work_dir):
  '''Check if ngrams files exisit, if not get ngrams based on passed parameters
  Args:
    X_corpus (pandas series): texts to get ngrams from
    ngram (int): uni (1), bi (2), or tri (3) grams
    min_count (int): minmumal number of term occurence in corpus
    subset (str): train, valid, or test; for file name
    work_dir (Path): does not really need this for call within this script, but
      if called as module, this needs to be passed. So make this required.
  Output:
    ngram_file (pickle): model_cln_ngrams_{subset}_{min_count}-{ngram}
  Return:
    unigrams, bigrams, or trigrams
  '''

  # Check if ngram file exist
  ngram_file = work_dir / f"model_cln_ngrams_{subset}_{min_count}-{ngram}"
  if ngram_file.is_file():
    print("    load ngrams")
    with open(ngram_file, "rb") as f:
        ngrams = pickle.load(f)
    return ngrams

  else:
    # ngrams file does not exist, generate it
    print("    generate ngrams")
    ngrams   = ""

    unigrams = get_unigram(X_corpus)
    if ngram == 1:
      ngrams = unigrams
    # ngram >1
    else:
      # Get bigrams
      bigrams_detector  = gensim.models.phrases.Phrases(
                      unigrams, delimiter=" ", min_count=min_count, threshold=10)
      bigrams_detector  = gensim.models.phrases.Phraser(bigrams_detector)
      bigrams = list(bigrams_detector[unigrams])

      # Return bigrams
      if ngram == 2:
        ngrams = bigrams
      # Get trigrams and return them
      elif ngram == 3:
        trigrams_detector = gensim.models.phrases.Phrases(
                        bigrams_detector[unigrams], delimiter=" ", 
                        min_count=min_count, threshold=10)
        trigrams_detector = gensim.models.phrases.Phraser(trigrams_detector)
        trigrams = list(trigrams_detector[bigrams])
        ngrams = trigrams
      else:
        print('ERR: ngram cannot be larger than 3. QUIT!')
        sys.exit(0)

      # write ngram file
      with open(ngram_file, "wb") as f:
          pickle.dump(ngrams, f)      

      return ngrams

def get_w2v_model(X_train, X_valid, X_test, param, rand_state):
  '''Get ngram lists and w2v model
  Args:
  Return:
  '''
  [min_count, window, ngram] = param

  print("    ngrams for training")
  ngram_train = get_ngram(X_train, ngram, min_count, "train", work_dir) 
  print("    ngrams for validation")
  ngram_valid = get_ngram(X_valid, ngram, min_count, "valid", work_dir)
  print("    ngrams for testing")
  ngram_test  = get_ngram(X_test , ngram, min_count, "test", work_dir)

  # Check if w2v model is already generated
  model_w2v_name = work_dir / f"model_cln_w2v_{min_count}-{window}-{ngram}"

  if model_w2v_name.is_file():
    print("   load the w2v model")
    with open(work_dir / model_w2v_name, "rb") as f:
        model_w2v = pickle.load(f)
  else:
    print("   geneate and save w2v model")
    model_w2v = gensim.models.Word2Vec(ngram_train, vector_size=300, 
                                      window=window, min_count=min_count, 
                                      sg=1, epochs=30, seed=rand_state)
    
    with open(model_w2v_name, "wb") as f:
      pickle.dump(model_w2v, f)

  return model_w2v, model_w2v_name, ngram_train, ngram_valid, ngram_test


def train_tokenizer(corpus, param):
  '''Train a tokenizer
  Args:
    corpus (list): a nested list of word lists
    param (list): for tokenizer and vocab output file names
  Return:
    tokenizer (keras.preprocessing.text.Tokenizer): trained tokenizer
    dic_vocab_token (dict): token as key, index as value
  '''

  # intialize tokenizer
  # See: https://www.tensorflow.org/api_docs/python/tf/keras/layers/TextVectorization
  # This is replaced by tf.keras.layers.TextVectorization
  tokenizer = preprocessing.text.Tokenizer(lower=True, split=' ', 
                oov_token="NaN", filters='!"#$%&()*+,-./:;<=>?@[\\]^_`{|}~\t\n')

  # tokenize corpus 
  tokenizer.fit_on_texts(corpus)

  # get token dictionary, with token as key, index number as value
  dic_vocab_token = tokenizer.word_index

  # Save tokenizer and vocab
  [min_count, window, ngram] = param
  tok_name   = work_dir / f"model_cln_w2v_token_{min_count}-{window}-{ngram}"
  vocab_name = work_dir / f"model_cln_w2v_vocab_{min_count}-{window}-{ngram}"

  if not tok_name.is_file():
    with open(tok_name, "wb") as f:
      pickle.dump(tokenizer, f)

  if not vocab_name.is_file():
    with open(vocab_name, "wb") as f:
      pickle.dump(dic_vocab_token, f)
    
  return tokenizer, dic_vocab_token


def get_embeddings(corpus, model_w2v, tokenizer, dic_vocab_token):

  # Transforms each text in texts to a sequence of integers.
  lst_text2seq = tokenizer.texts_to_sequences(corpus)

  # pad or trucate sequence
  X_w2v = preprocessing.sequence.pad_sequences(
                    lst_text2seq,      # List of sequences, each a list of ints 
                    maxlen=500,        # maximum length of all sequences
                    padding="post",    # 'pre' or 'post' 
                    truncating="post") # remove values from sequences > maxlen

  ## start the matrix (length of vocabulary x vector size) with all 0s

  embeddings = np.zeros((len(dic_vocab_token)+1, 300))
  not_in_emb = {}
  for word, idx in dic_vocab_token.items():
      ## update the row with vector
      try:
          embeddings[idx] =  model_w2v.wv[word]
      ## if word not in model then skip and the row stays all 0s
      except KeyError:
          not_in_emb[word] = 1

  return embeddings, X_w2v


def get_w2v_emb_model(embeddings):
  '''Build a deep learning model with Word2Vec embeddings
  Args:
    embeddings
  '''

  ## code attention layer
  def attention_layer(inputs, neurons):
    x = layers.Permute((2,1))(inputs)
    x = layers.Dense(neurons, activation="softmax")(x)
    x = layers.Permute((2,1), name="attention")(x)
    x = layers.multiply([inputs, x])
    return x

  ## input
  x_in = layers.Input(shape=(500,)) ## embedding
  x = layers.Embedding(input_dim=embeddings.shape[0],  
                      output_dim=embeddings.shape[1], 
                      weights=[embeddings],
                      input_length=500, trainable=False)(x_in)

  ## apply attention
  x = attention_layer(x, neurons=500)

  ## 2 layers of bidirectional lstm
  x = layers.Bidirectional(layers.LSTM(units=15, dropout=0.2, 
                          return_sequences=True))(x)
  x = layers.Bidirectional(layers.LSTM(units=15, dropout=0.2))(x)

  ## final dense layers
  x = layers.Dense(64, activation='relu')(x)
  y_out = layers.Dense(2, activation='softmax')(x)

  ## Initialize and compile model
  model = models.Model(x_in, y_out)
  model.compile(loss='sparse_categorical_crossentropy',
                optimizer='adam', 
                metrics=['accuracy'])

  return model

In [38]:

################################################################################


config_file = Path('config_w2v_run1.txt')

print("\nRead configuration file...")
config_dict = read_configs(config_file)

# Declare config parameters as global variables
proj_dir          = Path(config_dict['proj_dir'])
work_dir          = proj_dir / config_dict['work_dir']
corpus_combo_file = work_dir / config_dict['corpus_combo_file']
lang_model        = config_dict['lang_model']
rand_state        = config_dict['rand_state']
w2v_param         = config_dict['w2v_param']

# Split train/validate/test for cleaned text
#   Will not focus on original due to issues with non-alphanumeric characters
#   and stop words.
print("\nRead file and split train/validate/test...")
subsets, corpus_combo = split_train_validate_test(corpus_combo_file, rand_state)



Read configuration file...
   lang_model = w2v
   proj_dir = /home/shinhan/projects/plant_sci_hist
   work_dir = 2_text_classify
   corpus_combo_file = corpus_combo
   rand_state = 20220609
   w2v_param = {'min_count': [20], 'window': [8], 'ngram': [3]}
  all config available

Read file and split train/validate/test...
  write train, valid, test data to json
    size: train=(51987,), valid=(17329,), test=(17330,)


In [39]:
##DEBUG
# Figure out how the subsets and the original corpus indices are corresponding
# to each other.
corpus_combo.sample(2)

Unnamed: 0,PMID,Date,Journal,Title,Abstract,QualifiedName,txt,label,txt_clean
1137623,28316606,2017-03-21,Frontiers in plant science,A Comprehensive Phenotypic Investigation of th...,Seed shattering in crops is a key domesticatio...,bean,A Comprehensive Phenotypic Investigation of th...,1,comprehensive phenotypic investigation podshat...
790768,22369516,2012-03-01,Plant biotechnology journal,Enhanced polyhydroxybutyrate production in tra...,Polyhydroxybutyrate (PHB) is a bacterial polye...,sugarcane,Enhanced polyhydroxybutyrate production in tra...,1,enhanced polyhydroxybutyrate production transg...


In [45]:
[X_train, X_valid, X_test, y_train, y_valid, y_test] = subsets
X_train.loc[1066736]


'perspective better understanding metabolic integration photorespiration within complex plant primary metabolism network photorespiration essential high flux metabolic pathway found oxygenproducing photosynthetic organism often viewed closed metabolic repair pathway serf detoxify 2phosphoglycolic acid recycle carbon fuel calvinbenson cycle however, view simplistic since photorespiratory cycle known interact several primary metabolic pathways, including photosynthesis, nitrate assimilation, amino acid metabolism, c1 metabolism krebs (tca) cycle review recent advance photorespiration research discus future priority better understand (i) metabolic integration photorespiratory cycle within complex network plant primary metabolism (ii) importance photorespiration response abiotic biotic stress author 2016 published oxford university press behalf society experimental biology right reserved permissions, please email journalspermissionsoupcom'

In [43]:
corpus_combo.loc[1066736]['txt']

'Perspectives for a better understanding of the metabolic integration of photorespiration within a complex plant primary metabolism network.. Photorespiration is an essential high flux metabolic pathway that is found in all oxygen-producing photosynthetic organisms. It is often viewed as a closed metabolic repair pathway that serves to detoxify 2-phosphoglycolic acid and to recycle carbon to fuel the Calvin-Benson cycle. However, this view is too simplistic since the photorespiratory cycle is known to interact with several primary metabolic pathways, including photosynthesis, nitrate assimilation, amino acid metabolism, C1 metabolism and the Krebs (TCA) cycle. Here we will review recent advances in photorespiration research and discuss future priorities to better understand (i) the metabolic integration of the photorespiratory cycle within the complex network of plant primary metabolism and (ii) the importance of photorespiration in response to abiotic and biotic stresses.© The Author 

In [None]:
# get w2c parameter list
#   [min_count, window, ngram]
param_keys, param_list  = get_hyperparameters(w2v_param)

# iterate through different parameters
with open(work_dir / f"scores_cln_w2v", "w") as f:
  f.write("run\ttxt_flag\tlang_model\tparameters\tvalidate_f1\t" +\
          "test_f1\tmodel_dir\n")
run_num = 0

# Assuming only one parameter for now
param = param_list[0]
print(f"\n## param: {param}")

In [7]:
### def run_pipeline(param, subsets):

rand_state = config_dict['rand_state']

[X_train, X_valid, X_test, y_train, y_valid, y_test] = subsets

# Get list of ngrams and w2v model
print("  get list of ngrams and w2v model")
model_w2v, model_w2v_name, ngram_train, ngram_valid, ngram_test = \
                    get_w2v_model(X_train, X_valid, X_test, param, rand_state)

  get list of ngrams and w2v model
    ngrams for training
    load ngrams
    ngrams for validation
    load ngrams
    ngrams for testing
    load ngrams
   load the w2v model


In [10]:
# Train tokenizer
print("  train tokenizer")
tokenizer, dic_vocab_token = train_tokenizer(ngram_train, param)

  train tokenizer


In [16]:
## DEBUG
print(" ngram len:", len(ngram_train), len(ngram_valid), len(ngram_test))
print(ngram_train[0])

 ngram len: 51987 17329 17330
['vivo vitro', 'inhibition', 'catalase', 'leaf', 'nicotiana sylvestris', '3amino1,2,4triazole', 'seedling', 'tobacco (nicotiana', 'sylvestris)', 'treated', 'vivo', '003', '20', 'millimolar', '3amino1,2,4triazole', '(aminotriazole)', 'rapid', 'loss', 'catalase', '(ec', '11116)', 'activity', 'first', '5', 'hour', 'followed', 'slower', 'decrease', 'next', '4 hour', 'level', '15 20', 'initial', 'activity,', 'little', 'change', 'period', '3 day', 'fifty', 'percent', 'loss', 'catalase activity', 'occurred', '010', '015', 'millimolar', 'inhibitor', '(18hour', 'incubation)', 'isozymes', 'tobacco', 'catalase', 'differed', 'sensitivity', 'inhibitor', 'enhancedperoxidatic', 'catalase', '(epcat)', '(havir', 'ea,', 'mchale', 'na,', '1989', 'plant physiol', '91', '812815)', 'decreased', '35', 'condition', 'major', 'isozyme', 'decreased', '85', 'resistance', 'aminotriazole', 'inhibition', 'demonstrated', 'vivo', 'epcat', 'also observed', 'vitro', 'time', '50', 'inhibitio

In [17]:
# Get embeddings
print("  get embeddings")
embeddings, X_train_w2v = get_embeddings(
                          ngram_train, model_w2v, tokenizer, dic_vocab_token)
_, X_valid_w2v = get_embeddings(
                          ngram_valid, model_w2v, tokenizer, dic_vocab_token)
_, X_test_w2v  = get_embeddings(
                          ngram_test , model_w2v, tokenizer, dic_vocab_token)

  get embeddings


In [20]:
##DEBUG
X_train_w2v.shape, X_valid_w2v.shape, X_test_w2v.shape

((51987, 500), (17329, 500), (17330, 500))

In [21]:
# Model checkpoint path and output model file name
cp_filepath  = Path(str(model_w2v_name) + "_dnn")

# Load model if exists
if cp_filepath.is_dir():
  print("  load model in:", cp_filepath)
  model_emb = models.load_model(cp_filepath)

# Train and save model if not
else:
  print("  train model")
  model_emb    = get_w2v_emb_model(embeddings)

  # setup check points
  callback_es  = callbacks.EarlyStopping(monitor='val_loss', patience=5)
  callback_mcp = callbacks.ModelCheckpoint(filepath=cp_filepath, mode='max', 
          save_weights_only=False, monitor='val_accuracy', save_best_only=True)

  # Train model
  history = model_emb.fit(x=X_train_w2v, y=y_train, batch_size=256, 
                          epochs=20, shuffle=True, verbose=1, 
                          validation_data=(X_valid_w2v, y_valid), 
                          callbacks=[callback_es, callback_mcp])

  load model in: /home/shinhan/projects/plant_sci_hist/2_text_classify/model_cln_w2v_20-8-3_dnn


2022-06-30 19:10:50.149129: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:922] could not open file to read NUMA node: /sys/bus/pci/devices/0000:08:00.0/numa_node
Your kernel may have been built without NUMA support.
2022-06-30 19:10:50.306489: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:922] could not open file to read NUMA node: /sys/bus/pci/devices/0000:08:00.0/numa_node
Your kernel may have been built without NUMA support.
2022-06-30 19:10:50.306732: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:922] could not open file to read NUMA node: /sys/bus/pci/devices/0000:08:00.0/numa_node
Your kernel may have been built without NUMA support.
2022-06-30 19:10:50.309105: I tensorflow/core/platform/cpu_feature_guard.cc:151] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate

In [35]:
def predict_and_output(corpus_pred_file, X_w2v, X, y):

  # prediction probability
  print("    get prediction probability")
  y_prob  = model_emb.predict(X_w2v)

  # label mapping
  y_map   = {n:label for n,label in enumerate(np.unique(y))}
  # prediction
  print("    get predictions")
  #y_pred  = pd.Series([y_map[np.argmax(pred)] for pred in y_prob])
  y_pred  = [y_map[np.argmax(pred)] for pred in y_prob]

  # Convert y_prob to series. There are probabilities for two classes. Take
  # the column with class=1 (2nd column)
  y_prob_series = pd.Series(y_prob[:,1], index=y.index)

  # convert y_pred to series
  y_pred_series = pd.Series(y_pred, index=y.index)

  # dataframe with everything
  pred_df = pd.DataFrame({'y': y, 
                          'y_pred': y_pred_series, 
                          'y_prob': y_prob_series, 
                          'X':X})

  print("    write prediciton dataframe")
  pred_df.to_csv(corpus_pred_file, sep="\t")

  return pred_df

In [36]:
print("  output predictions of training data")
train_pred_file = work_dir / "corpus_train_pred"
train_pred_df = predict_and_output(
                            train_pred_file, X_train_w2v, X_train, y_train)
print("     train_pred_df.shape:", train_pred_df.shape)


  output predictions of training data
    get prediction probability
    get predictions
    write prediciton dataframe
     train_pred_df.shape: (51987, 4)


## For working out how to output the prediction dataframe

In [24]:
##DEBUG: modified
#def predict_and_output(corpus_pred_file, X_w2v, X, y):
def predict_and_output(X_w2v, y):

  # prediction probability
  print("    get prediction probability")
  y_prob  = model_emb.predict(X_w2v)
  #print(y_prob.shape) # has two columns

  # label mapping
  y_map   = {n:label for n,label in enumerate(np.unique(y))}
  # prediction
  print("    get predictions")
  #y_pred  = pd.Series([y_map[np.argmax(pred)] for pred in y_prob])
  y_pred  = [y_map[np.argmax(pred)] for pred in y_prob]

  # convert y_prob column index=1 to pandas series
  #y_prob_1= pd.Series(y_prob[:,1])

  # get values from X otherwise the index does not match
  #X_idx   = pd.Series(X.index)
  #X_val   = pd.Series(X.value)

  # dataframe with everything
  #pred_df = pd.DataFrame({'y': y, "y_pred": y_pred, "y_prob": y_prob_1, 
  #                        "X_idx":X_idx, "X_val": X_val})

  #print("    write prediciton dataframe")
  #pred_df.to_csv(corpus_pred_file, sep="\t")

  return y_pred, y_prob

In [25]:
print("  output predictions of training data")
train_pred_file = work_dir / "corpus_train_pred"
#predict_and_output(train_pred_file, X_train_w2v, X_train, y_train)
y_pred, y_prob = predict_and_output(X_train_w2v, y_train)

  output predictions of training data
    get prediction probability


2022-06-30 19:15:55.149205: I tensorflow/stream_executor/cuda/cuda_blas.cc:1786] TensorFloat-32 will be used for the matrix multiplication. This will only be logged once.
2022-06-30 19:15:56.923758: I tensorflow/stream_executor/cuda/cuda_dnn.cc:368] Loaded cuDNN version 8201


    get predictions


In [27]:
##DEBUG
print(type(y_pred), type(y_prob), type(X_train), type(y_train))
print(len(y_pred), y_prob.shape, X_train.shape, y_train.shape)

<class 'list'> <class 'numpy.ndarray'> <class 'pandas.core.series.Series'> <class 'pandas.core.series.Series'>
51987 (51987, 2) (51987,) (51987,)


In [28]:
X_train[:3]

516651    vivo vitro inhibition catalase leaf nicotiana ...
521301    pathway glucose regulation monosaccharide tran...
65516     feasibility home treatment diarrhoea packaged ...
Name: txt_clean, dtype: object

In [29]:
y_train[:3]

516651    1
521301    1
65516     0
Name: label, dtype: int64

In [30]:
y_pred_series = pd.Series(y_pred, index=y_train.index)
y_pred_series[:3]

516651    1
521301    1
65516     0
dtype: int64

In [31]:
y_prob_series = pd.Series(y_prob[:,1], index=y_train.index)
y_prob_series[:3]

516651    0.982180
521301    0.993828
65516     0.001417
dtype: float32

In [32]:
pred_df = pd.DataFrame({'y': y_train, 'y_pred': y_pred_series, 
                        'y_prob': y_prob_series, 'X':X_train})

In [33]:
pred_df.shape

(51987, 4)

In [34]:
pred_df.head()

Unnamed: 0,y,y_pred,y_prob,X
516651,1,1,0.98218,vivo vitro inhibition catalase leaf nicotiana ...
521301,1,1,0.993828,pathway glucose regulation monosaccharide tran...
65516,0,0,0.001417,feasibility home treatment diarrhoea packaged ...
277058,1,1,0.990589,modulation phosphatidylcholine biosynthesis ce...
753225,1,1,0.921157,120yr period dr beals seed viability experimen...
