# Task 1: Create a Prescription Parser using CRF
This task tests your ability to build a Doctor Prescription Parser with the help of CRF model

Your job is to build a Prescription Parser that takes a prescription (sentence) as an input and find / label the words in that sentence with one of the already pre-defined labels

### Problem: SEQUENCE PREDICTION - Label words in a sentence
#### Input : Doctor Prescription in the form of a sentence split into tokens
- Ex: Take 2 tablets once a day for 10 days

#### Output : FHIR Labels
- ('Take', 'Method')
- ('2', 'Qty') 
- ('tablets', 'Form')
- ('once', 'Frequency')
- ('a', 'Period') 
- ('day', 'PeriodUnit')
- ('for', 'FOR')
- ('10', 'Duration')
- ('days', 'DurationUnit') 

### Major Steps
- Install necessary library
- Import the libraries
- Create training data with labels
    - Split the sentence into tokens
    - Compute POS tags
    - Create triples
- Extract features
- Split the data into training and testing set
- Create CRF model
- Save the CRF model
- Load the CRF model
- Predict on test data
- Accuracy

#### Install necesaary library

In [1]:
from nltk import pos_tag, word_tokenize
from sklearn.model_selection import train_test_split
from sklearn_crfsuite import CRF, metrics
# import re

#### Import the necessary libraries

### Input data (GIVEN)
#### Creating the inputs to the ML model in the following form:
- sigs --> ['take 3 tabs for 10 days']       INPUT SIG
- input_sigs --> [['take', '3', 'tabs', 'for', '10', 'days']]      TOKENS
- output_labels --> [['Method','Qty', 'Form', 'FOR', 'Duration', 'DurationUnit']]       LABELS

In [2]:
sigs = ["for 5 to 6 days", "inject 2 units", "x 2 weeks", "x 3 days", "every day", "every 2 weeks", "every 3 days", "every 1 to 2 months", "every 2 to 6 weeks", "every 4 to 6 days", "take two to four tabs", "take 2 to 4 tabs", "take 3 tabs orally bid for 10 days at bedtime", "swallow three capsules tid orally", "take 2 capsules po every 6 hours", "take 2 tabs po for 10 days", "take 100 caps by mouth tid for 10 weeks", "take 2 tabs after an hour", "2 tabs every 4-6 hours", "every 4 to 6 hours", "q46h", "q4-6h", "2 hours before breakfast", "before 30 mins at bedtime", "30 mins before bed", "and 100 tabs twice a month", "100 tabs twice a month", "100 tabs once a month", "100 tabs thrice a month", "3 tabs daily for 3 days then 1 tab per day at bed", "30 tabs 10 days tid", "take 30 tabs for 10 days three times a day", "qid q6h", "bid", "qid", "30 tabs before dinner and bedtime", "30 tabs before dinner & bedtime", "take 3 tabs at bedtime", "30 tabs thrice daily for 10 days ", "30 tabs for 10 days three times a day", "Take 2 tablets a day", "qid for 10 days", "every day", "take 2 caps at bedtime", "apply 3 drops before bedtime", "take three capsules daily", "swallow 3 pills once a day", "swallow three pills thrice a day", "apply daily", "apply three drops before bedtime", "every 6 hours", "before food", "after food", "for 20 days", "for twenty days", "with meals"]
input_sigs = [['for', '5', 'to', '6', 'days'], ['inject', '2', 'units'], ['x', '2', 'weeks'], ['x', '3', 'days'], ['every', 'day'], ['every', '2', 'weeks'], ['every', '3', 'days'], ['every', '1', 'to', '2', 'months'], ['every', '2', 'to', '6', 'weeks'], ['every', '4', 'to', '6', 'days'], ['take', 'two', 'to', 'four', 'tabs'], ['take', '2', 'to', '4', 'tabs'], ['take', '3', 'tabs', 'orally', 'bid', 'for', '10', 'days', 'at', 'bedtime'], ['swallow', 'three', 'capsules', 'tid', 'orally'], ['take', '2', 'capsules', 'po', 'every', '6', 'hours'], ['take', '2', 'tabs', 'po', 'for', '10', 'days'], ['take', '100', 'caps', 'by', 'mouth', 'tid', 'for', '10', 'weeks'], ['take', '2', 'tabs', 'after', 'an', 'hour'], ['2', 'tabs', 'every', '4-6', 'hours'], ['every', '4', 'to', '6', 'hours'], ['q46h'], ['q4-6h'], ['2', 'hours', 'before', 'breakfast'], ['before', '30', 'mins', 'at', 'bedtime'], ['30', 'mins', 'before', 'bed'], ['and', '100', 'tabs', 'twice', 'a', 'month'], ['100', 'tabs', 'twice', 'a', 'month'], ['100', 'tabs', 'once', 'a', 'month'], ['100', 'tabs', 'thrice', 'a', 'month'], ['3', 'tabs', 'daily', 'for', '3', 'days', 'then', '1', 'tab', 'per', 'day', 'at', 'bed'], ['30', 'tabs', '10', 'days', 'tid'], ['take', '30', 'tabs', 'for', '10', 'days', 'three', 'times', 'a', 'day'], ['qid', 'q6h'], ['bid'], ['qid'], ['30', 'tabs', 'before', 'dinner', 'and', 'bedtime'], ['30', 'tabs', 'before', 'dinner', '&', 'bedtime'], ['take', '3', 'tabs', 'at', 'bedtime'], ['30', 'tabs', 'thrice', 'daily', 'for', '10', 'days'], ['30', 'tabs', 'for', '10', 'days', 'three', 'times', 'a', 'day'], ['take', '2', 'tablets', 'a', 'day'], ['qid', 'for', '10', 'days'], ['every', 'day'], ['take', '2', 'caps', 'at', 'bedtime'], ['apply', '3', 'drops', 'before', 'bedtime'], ['take', 'three', 'capsules', 'daily'], ['swallow', '3', 'pills', 'once', 'a', 'day'], ['swallow', 'three', 'pills', 'thrice', 'a', 'day'], ['apply', 'daily'], ['apply', 'three', 'drops', 'before', 'bedtime'], ['every', '6', 'hours'], ['before', 'food'], ['after', 'food'], ['for', '20', 'days'], ['for', 'twenty', 'days'], ['with', 'meals']]
output_labels = [['FOR', 'Duration', 'TO', 'DurationMax', 'DurationUnit'], ['Method', 'Qty', 'Form'], ['FOR', 'Duration', 'DurationUnit'], ['FOR', 'Duration', 'DurationUnit'], ['EVERY', 'Period'], ['EVERY', 'Period', 'PeriodUnit'], ['EVERY', 'Period', 'PeriodUnit'], ['EVERY', 'Period', 'TO', 'PeriodMax', 'PeriodUnit'], ['EVERY', 'Period', 'TO', 'PeriodMax', 'PeriodUnit'], ['EVERY', 'Period', 'TO', 'PeriodMax', 'PeriodUnit'], ['Method', 'Qty', 'TO', 'Qty', 'Form'], ['Method', 'Qty', 'TO', 'Qty', 'Form'], ['Method', 'Qty', 'Form', 'PO', 'BID', 'FOR', 'Duration', 'DurationUnit', 'AT', 'WHEN'], ['Method', 'Qty', 'Form', 'TID', 'PO'], ['Method', 'Qty', 'Form', 'PO', 'EVERY', 'Period', 'PeriodUnit'], ['Method', 'Qty', 'Form', 'PO', 'FOR', 'Duration', 'DurationUnit'], ['Method', 'Qty', 'Form', 'BY', 'PO', 'TID', 'FOR', 'Duration', 'DurationUnit'], ['Method', 'Qty', 'Form', 'AFTER', 'Period', 'PeriodUnit'], ['Qty', 'Form', 'EVERY', 'Period', 'PeriodUnit'], ['EVERY', 'Period', 'TO', 'PeriodMax', 'PeriodUnit'], ['Q46H'], ['Q4-6H'], ['Qty', 'PeriodUnit', 'BEFORE', 'WHEN'], ['BEFORE', 'Qty', 'M', 'AT', 'WHEN'], ['Qty', 'M', 'BEFORE', 'WHEN'], ['AND', 'Qty', 'Form', 'Frequency', 'Period', 'PeriodUnit'], ['Qty', 'Form', 'Frequency', 'Period', 'PeriodUnit'], ['Qty', 'Form', 'Frequency', 'Period', 'PeriodUnit'], ['Qty', 'Form', 'Frequency', 'Period', 'PeriodUnit'], ['Qty', 'Form', 'Frequency', 'FOR', 'Duration', 'DurationUnit', 'THEN', 'Qty', 'Form', 'Frequency', 'PeriodUnit', 'AT', 'WHEN'], ['Qty', 'Form', 'Duration', 'DurationUnit', 'TID'], ['Method', 'Qty', 'Form', 'FOR', 'Duration', 'DurationUnit', 'Qty', 'TIMES', 'Period', 'PeriodUnit'], ['QID', 'Q6H'], ['BID'], ['QID'],['Qty', 'Form', 'BEFORE', 'WHEN', 'AND', 'WHEN'], ['Qty', 'Form', 'BEFORE', 'WHEN', 'AND', 'WHEN'], ['Method', 'Qty', 'Form', 'AT', 'WHEN'], ['Qty', 'Form', 'Frequency', 'DAILY', 'FOR', 'Duration', 'DurationUnit'], ['Qty', 'Form', 'FOR', 'Duration', 'DurationUnit', 'Frequency', 'TIMES', 'Period', 'PeriodUnit'], ['Method', 'Qty', 'Form', 'Period', 'PeriodUnit'], ['QID', 'FOR', 'Duration', 'DurationUnit'], ['EVERY', 'PeriodUnit'], ['Method', 'Qty', 'Form', 'AT', 'WHEN'], ['Method', 'Qty', 'Form', 'BEFORE', 'WHEN'], ['Method', 'Qty', 'Form', 'DAILY'], ['Method', 'Qty', 'Form', 'Frequency', 'Period', 'PeriodUnit'], ['Method', 'Qty', 'Form', 'Frequency', 'Period', 'PeriodUnit'], ['Method', 'DAILY'], ['Method', 'Qty', 'Form', 'BEFORE', 'WHEN'], ['EVERY', 'Period', 'PeriodUnit'], ['BEFORE', 'FOOD'], ['AFTER', 'FOOD'], ['FOR', 'Duration', 'DurationUnit'], ['FOR', 'Duration', 'DurationUnit'], ['WITH', 'FOOD']]

In [3]:
# p = 0
# for i in input_sigs:
#     for j in i:
#         p+=1
# print(p)

### Creating a Tuples Maker method
Create the tuples as given below by writing a function **tuples_maker(input_sigs, output_labels)** and returns **output** as given below

Input(s): 
- input_sigs
- output_lables

Output:

[[('for', 'FOR'),
  ('5', 'Duration'),
  ('to', 'TO'),
  ('6', 'DurationMax'),
  ('days', 'DurationUnit')], [second sentence], ...]

In [4]:
# def tuples_maker(inp, out):
#     sample_data = []
#     for i in range(len(inp)):
#         tmp = []
#         for j in range(len(inp[i])):
#             tmp.append((inp[i][j],out[i][j]))
#         sample_data.append(tmp)
#     return sample_data

# whole_data = tuples_maker(input_sigs,output_labels)
# whole_data

### Creating the triples_maker( ) for feature extraction
- input: tuples_maker_output
- output: 
[[('for', 'IN', 'FOR'),
  ('5', 'CD', 'Duration'),
  ('to', 'TO', 'TO'),
  ('6', 'CD', 'DurationMax'),
  ('days', 'NNS', 'DurationUnit')], [second sentence], ... ]

In [5]:
def triples_maker(inp, out):
    sample_data = []
    for i in range(len(inp)):
        tmp = []
        t1 = pos_tag(inp[i])
        for j in range(len(inp[i])):
            tmp.append((inp[i][j],t1[j][1],out[i][j]))
        sample_data.append(tmp)
    return sample_data

sample_data = triples_maker(input_sigs,output_labels)
sample_data



[[('for', 'IN', 'FOR'),
  ('5', 'CD', 'Duration'),
  ('to', 'TO', 'TO'),
  ('6', 'CD', 'DurationMax'),
  ('days', 'NNS', 'DurationUnit')],
 [('inject', 'JJ', 'Method'), ('2', 'CD', 'Qty'), ('units', 'NNS', 'Form')],
 [('x', 'RB', 'FOR'),
  ('2', 'CD', 'Duration'),
  ('weeks', 'NNS', 'DurationUnit')],
 [('x', 'RB', 'FOR'),
  ('3', 'CD', 'Duration'),
  ('days', 'NNS', 'DurationUnit')],
 [('every', 'DT', 'EVERY'), ('day', 'NN', 'Period')],
 [('every', 'DT', 'EVERY'),
  ('2', 'CD', 'Period'),
  ('weeks', 'NNS', 'PeriodUnit')],
 [('every', 'DT', 'EVERY'),
  ('3', 'CD', 'Period'),
  ('days', 'NNS', 'PeriodUnit')],
 [('every', 'DT', 'EVERY'),
  ('1', 'CD', 'Period'),
  ('to', 'TO', 'TO'),
  ('2', 'CD', 'PeriodMax'),
  ('months', 'NNS', 'PeriodUnit')],
 [('every', 'DT', 'EVERY'),
  ('2', 'CD', 'Period'),
  ('to', 'TO', 'TO'),
  ('6', 'CD', 'PeriodMax'),
  ('weeks', 'NNS', 'PeriodUnit')],
 [('every', 'DT', 'EVERY'),
  ('4', 'CD', 'Period'),
  ('to', 'TO', 'TO'),
  ('6', 'CD', 'PeriodMax'),
  ('

In [6]:
# def triples_maker(whole_data):
    
#     return sample_data

In [7]:
sample_data = triples_maker(input_sigs,output_labels)
sample_data

[[('for', 'IN', 'FOR'),
  ('5', 'CD', 'Duration'),
  ('to', 'TO', 'TO'),
  ('6', 'CD', 'DurationMax'),
  ('days', 'NNS', 'DurationUnit')],
 [('inject', 'JJ', 'Method'), ('2', 'CD', 'Qty'), ('units', 'NNS', 'Form')],
 [('x', 'RB', 'FOR'),
  ('2', 'CD', 'Duration'),
  ('weeks', 'NNS', 'DurationUnit')],
 [('x', 'RB', 'FOR'),
  ('3', 'CD', 'Duration'),
  ('days', 'NNS', 'DurationUnit')],
 [('every', 'DT', 'EVERY'), ('day', 'NN', 'Period')],
 [('every', 'DT', 'EVERY'),
  ('2', 'CD', 'Period'),
  ('weeks', 'NNS', 'PeriodUnit')],
 [('every', 'DT', 'EVERY'),
  ('3', 'CD', 'Period'),
  ('days', 'NNS', 'PeriodUnit')],
 [('every', 'DT', 'EVERY'),
  ('1', 'CD', 'Period'),
  ('to', 'TO', 'TO'),
  ('2', 'CD', 'PeriodMax'),
  ('months', 'NNS', 'PeriodUnit')],
 [('every', 'DT', 'EVERY'),
  ('2', 'CD', 'Period'),
  ('to', 'TO', 'TO'),
  ('6', 'CD', 'PeriodMax'),
  ('weeks', 'NNS', 'PeriodUnit')],
 [('every', 'DT', 'EVERY'),
  ('4', 'CD', 'Period'),
  ('to', 'TO', 'TO'),
  ('6', 'CD', 'PeriodMax'),
  ('

### Creating the features extractor method (GIVEN as a BASELINE)
#### The features used are:
- SOS, EOS, lowercase, uppercase, title, digit, postag, previous_tag, next_tag
#### Feel free to include more features

In [8]:
def token_to_features(doc, i):
    word = doc[i][0]
    postag = doc[i][1]

    # Common features for all words
    features = [
        'bias',
        'word.lower=' + word.lower(),
        'word[-3:]=' + word[-3:],
        'word[-2:]=' + word[-2:],
        'word.isupper=%s' % word.isupper(),
        'word.istitle=%s' % word.istitle(),
        'word.isdigit=%s' % word.isdigit(),
        'postag=' + postag
    ]

    # Features for words that are not
    # at the beginning of a document
    if i > 0:
        word1 = doc[i-1][0]
        postag1 = doc[i-1][1]
        features.extend([
            '-1:word.lower=' + word1.lower(),
            '-1:word.istitle=%s' % word1.istitle(),
            '-1:word.isupper=%s' % word1.isupper(),
            '-1:word.isdigit=%s' % word1.isdigit(),
            '-1:postag=' + postag1
        ])
    else:
        # Indicate that it is the 'beginning of a document'
        features.append('BOS')

    # Features for words that are not
    # at the end of a document
    if i < len(doc)-1:
        word1 = doc[i+1][0]
        postag1 = doc[i+1][1]
        features.extend([
            '+1:word.lower=' + word1.lower(),
            '+1:word.istitle=%s' % word1.istitle(),
            '+1:word.isupper=%s' % word1.isupper(),
            '+1:word.isdigit=%s' % word1.isdigit(),
            '+1:postag=' + postag1
        ])
    else:
        # Indicate that it is the 'end of a document'
        features.append('EOS')

    return features

In [9]:
def X_y_split(l1):
    X = []
    y = []
    # loop Sample data
    for i in l1:
        t1=[]
        t2=[]
        # Words in sample data: ('for', 'IN', 'FOR'), ('5', 'CD', 'Duration'), ('to', 'TO', 'TO'), ('6', 'CD', 'DurationMax'), ('days', 'NNS', 'DurationUnit')
        for j in range(len(i)):
            # print(i)
            t3=dict()
            feat = token_to_features(i,j)
            feat.remove("bias")
            feat.append("bias=1.0")

            if ('BOS' in feat):
                # feat.append('-1:word.lower=""')
                # feat.append('-1:word.istitle=False')
                # feat.append('-1:word.isupper=False')
                # feat.append('-1:word.isdigit=False')
                # feat.append('-1:postag="BOS"')
                feat.remove('BOS')
                feat.append('BOS=True')

            if ('EOS' in feat):
                # feat.append('+1:word.lower=""') ,
                # feat.append('+1:word.istitle=False') ,
                # feat.append('+1:word.isupper=False'),
                # feat.append('+1:word.isdigit=False') ,
                # feat.append('+1:postag=EOS')
                feat.remove('EOS')
                feat.append('EOS=True')
            for k in feat:
                # try:
                t4 = k[k.find('=')+1:]
                try:
                    t4 = eval(k[k.find('=')+1:])
                    if (type(t4)!= int and type(t4)!= bool and type(t4)!= float):
                        t4 = k[k.find('=')+1:]
                except:
                        t4 = k[k.find('=')+1:]
                t3[str(k[:k.find('=')])] = t4                    
                # except:
                # t3[str(k[:k.find('=')])] =  
            t1.append(t3)
            # print(t3)
            t2.append(i[j][2])
        X.append(t1)
        y.append(t2)
    # for i in range(len(X)):
        # print(X[i],y[i])

    return X,y 


In [10]:
# X,y = X_y_split(sample_data)
# # sample_data[0][0][1]
# # token_to_features(sample_data[0][0],2)
# X[0][0]
# for i in X:
#     for j in i:
#         print(j.values)

In [11]:
# # eval("True")
# # X.shape
# len(X),len(y)
# # X
# # for i in sample_data:
# #     for j in range(len(i)):
#         print(i[j][2])
# #     print("")
# a=7
# X[a],y[a]

In [12]:
# type(CRF)

### Running the feature extractor on the training data 
- Feature extraction
- Train-test-split

In [13]:
X,y = X_y_split(sample_data)
X_train,X_test,y_train,y_test = train_test_split(X , y, test_size = 0.3)


In [14]:
# for i in range(10):
#     print(X_train[i],y_train[i])
# len(X[0]),len(y[0])

In [15]:
# X[0]

In [16]:
# y[0][0]

In [17]:
# type(X_train)
# type(X_train[0])
# type(X_train[0][0])


In [18]:
# X_train,X_test,y_train,y_test

### Training the CRF model with the features extracted using the feature extractor method

In [19]:
crf = CRF(algorithm='lbfgs', c1=0.1, c2=10, max_iterations=50)
crf.fit(X_train,y_train)

labels = list(crf.classes_)
labels.sort()


In [20]:
# labels


In [21]:
# Submit training data to the trainer


# Set the parameters of the model


# Providing a file name as a parameter to the train function, such that
# the model will be saved to the file when training is finished


### Predicting the test data with the built model

In [22]:
y_pred = crf.predict(X_test)
print(f"F-Score = {metrics.flat_f1_score(y_test, y_pred,average='weighted', labels=labels)}")
print(metrics.flat_classification_report(y_test, y_pred, labels=labels, digits=3))


F-Score = 0.5865310365310366
              precision    recall  f1-score   support

         AND      0.000     0.000     0.000         2
          AT      0.000     0.000     0.000         2
      BEFORE      0.667     0.667     0.667         3
         BID      0.000     0.000     0.000         0
       DAILY      0.000     0.000     0.000         1
    Duration      1.000     0.250     0.400         4
 DurationMax      0.000     0.000     0.000         0
DurationUnit      1.000     0.250     0.400         4
       EVERY      1.000     1.000     1.000         1
        FOOD      0.000     0.000     0.000         1
         FOR      0.500     0.667     0.571         3
        Form      0.647     1.000     0.786        11
   Frequency      0.200     1.000     0.333         1
           M      0.000     0.000     0.000         1
      Method      0.833     0.714     0.769         7
          PO      0.000     0.000     0.000         2
      Period      0.444     1.000     0.615         

  _warn_prf(average, "true nor predicted", "F-score is", len(true_sum))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [23]:
# crf.predict(["take"],[2],["tabs"], ["every"], [6], ["hours"], ["x"], ["10"], ["days"])
# clf.predict

In [24]:
# X_test[0],y_pred[0],y_test[0]

### Putting all the prediction logic inside a predict method

In [25]:
def predict(sig):
    sig = word_tokenize(sig)
    sig = pos_tag(sig)
    ret = []
    for i in range(len(sig)):
        t = dict()
        feat = token_to_features(sig,i)
        feat.remove("bias")
        feat.append("bias=1.0")
        if ('BOS' in feat):
            feat.remove('BOS')
            feat.append('BOS=True')

        if ('EOS' in feat):
            feat.remove('EOS')
            feat.append('EOS=True')
        for k in feat:
            t4 = k[k.find('=')+1:]
            try:
                t4 = eval(k[k.find('=')+1:])
                if (type(t4)!= int and type(t4)!= bool and type(t4)!= float):
                    t4 = k[k.find('=')+1:]
            except:
                    t4 = k[k.find('=')+1:]
            t[str(k[:k.find('=')])] = t4 
        ret.append(t)
    #         t3=dict()
    #         feat = token_to_features(i,j)
    #   
    #                           
    #         t1.append(t3)
    #         t2.append(i[j][2])
    #     X.append(t1)
    #     y.append(t2)

    # return X,y 



    """
    predict(sig)
    Purpose: Labels the given sig into corresponding labels
    @param sig. A Sentence  # A medical prescription sig written by a doctor
    @return     A list      # A list with predicted labels (first level of labeling)
    >>> predict('2 tabs every 4 hours')
    [['Qty', 'Form', 'EVERY', 'Period', 'PeriodUnit']]
    >>> predict('2 tabs with food')
    [['Qty', 'Form', 'WITH', 'FOOD']]
    >>> predict('2 tabs qid x 30 days')
    [['Qty', 'Form', 'QID', 'FOR', 'Duration', 'DurationUnit']]
    """
    # print(ret)
    # return ret
    # print(crf.predict(sig))
    print(crf.predict([ret]))

In [26]:
# predict('2 tabs every 4 hours')
# predict(['take','2'])
# predict('take 2 tabs every 6 hours x 10 days')[0]

### Sample predictions

In [27]:
predictions = predict("take 2 tabs every 6 hours x 10 days")

[['Method', 'Qty', 'Form', 'EVERY', 'Qty', 'Form', 'FOR', 'Duration', 'DurationUnit']]


In [28]:
predictions = predict("2 capsu for 10 day at bed")

[['Qty', 'Form', 'FOR', 'Qty', 'Form', 'Period', 'PeriodUnit']]


In [29]:
predictions = predict("2 capsu for 10 days at bed")

[['Qty', 'Form', 'FOR', 'Duration', 'DurationUnit', 'AT', 'WHEN']]


In [30]:
predictions = predict("5 days 2 tabs at bed")

[['Qty', 'Form', 'Qty', 'Form', 'Frequency', 'WHEN']]


In [31]:
predictions = predict("3 tabs qid x 10 weeks")

[['Qty', 'Form', 'Frequency', 'FOR', 'Qty', 'Form']]


In [32]:
predictions = predict("x 30 days")

[['FOR', 'Duration', 'DurationUnit']]


In [33]:
predictions = predict("x 20 months")

[['FOR', 'Qty', 'Form']]


In [34]:
predictions = predict("take 2 tabs po tid for 10 days")

[['Method', 'Qty', 'Form', 'Period', 'PeriodUnit', 'FOR', 'Duration', 'DurationUnit']]


In [35]:
predictions = predict("take 2 capsules po every 6 hours")

[['Method', 'Qty', 'Form', 'Frequency', 'EVERY', 'Period', 'PeriodUnit']]


In [36]:
predictions = predict("inject 2 units pu tid")

[['Method', 'Qty', 'Form', 'Period', 'PeriodUnit']]


In [37]:
predictions = predict("swallow 3 caps tid by mouth")

[['Method', 'Qty', 'Form', 'Frequency', 'Period', 'PeriodUnit']]


In [38]:
predictions = predict("inject 3 units orally")

[['Method', 'Qty', 'Form', 'Frequency']]


In [39]:
predictions = predict("orally take 3 tabs tid")

[['Method', 'Method', 'Qty', 'Form', 'PeriodUnit']]


In [40]:
predictions = predict("by mouth take three caps")

[['Period', 'PeriodUnit', 'Method', 'Qty', 'Form']]


In [41]:
predictions = predict("take 3 tabs orally three times a day for 10 days at bedtime")

[['Method', 'Qty', 'Form', 'Method', 'Qty', 'Form', 'Period', 'PeriodUnit', 'FOR', 'Duration', 'DurationUnit', 'AT', 'WHEN']]


In [42]:
predictions = predict("take 3 tabs orally bid for 10 days at bedtime")

[['Method', 'Qty', 'Form', 'Frequency', 'Period', 'FOR', 'Duration', 'DurationUnit', 'AT', 'WHEN']]


In [43]:
predictions = predict("take 3 tabs bid orally at bed")

[['Method', 'Qty', 'Form', 'Frequency', 'Frequency', 'Period', 'PeriodUnit']]


In [44]:
predictions = predict("take 10 capsules by mouth qid")

[['Method', 'Qty', 'Form', 'Frequency', 'Period', 'PeriodUnit']]


In [45]:
predictions = predict("inject 10 units orally qid x 3 months")

[['FOR', 'Qty', 'Form', 'Frequency', 'Period', 'Method', 'Qty', 'Form']]


In [46]:
prediction = predict("please take 2 tablets per day for a month in the morning and evening each day")

[['Method', 'Method', 'Qty', 'Form', 'Period', 'PeriodUnit', 'FOR', 'Period', 'PeriodUnit', 'Frequency', 'Period', 'PeriodUnit', 'Period', 'PeriodUnit', 'Period', 'PeriodUnit']]


In [47]:
prediction = predict("Amoxcicillin QID 30 tablets")

[['Method', 'FOR', 'Qty', 'Form']]


In [48]:
prediction = predict("take 3 tabs TID for 90 days with food")

[['Method', 'Qty', 'Form', 'Frequency', 'FOR', 'Duration', 'Form', 'Period', 'PeriodUnit']]


In [49]:
prediction = predict("with food take 3 tablets per day for 90 days")

[['Period', 'PeriodUnit', 'Method', 'Qty', 'Form', 'Period', 'PeriodUnit', 'FOR', 'Duration', 'DurationUnit']]


In [50]:
prediction = predict("with food take 3 tablets per week for 90 weeks")

[['Period', 'PeriodUnit', 'Method', 'Qty', 'Form', 'Period', 'PeriodUnit', 'FOR', 'Duration', 'Form']]


In [51]:
prediction = predict("take 2-4 tabs")

[['Method', 'Qty', 'Form']]


In [52]:
prediction = predict("take 2 to 4 tabs")

[['Method', 'Qty', 'TO', 'Qty', 'Form']]


In [53]:
prediction = predict("take two to four tabs")

[['Method', 'Qty', 'TO', 'Qty', 'Form']]


In [54]:
prediction = predict("take 2-4 tabs for 8 to 9 days")

[['Method', 'Qty', 'Form', 'FOR', 'Duration', 'TO', 'Qty', 'Form']]


In [55]:
prediction = predict("take 20 tabs every 6 to 8 days")

[['FOR', 'Qty', 'Form', 'EVERY', 'Period', 'TO', 'Qty', 'Form']]


In [56]:
prediction = predict("take 2 tabs every 4 to 6 days")

[['Method', 'Qty', 'Form', 'EVERY', 'Period', 'TO', 'Qty', 'Form']]


In [57]:
prediction = predict("take 2 tabs every 2 to 10 weeks")

[['Method', 'Qty', 'Form', 'EVERY', 'Period', 'TO', 'Qty', 'Form']]


In [58]:
prediction = predict("take 2 tabs every 4 to 6 days")

[['Method', 'Qty', 'Form', 'EVERY', 'Period', 'TO', 'Qty', 'Form']]


In [59]:
prediction = predict("take 2 tabs every 2 to 10 months")

[['Method', 'Qty', 'Form', 'EVERY', 'Period', 'TO', 'Qty', 'Form']]


In [60]:
prediction = predict("every 60 mins")

[['AND', 'Qty', 'Form']]


In [61]:
prediction = predict("every 10 mins")

[['EVERY', 'Qty', 'Form']]


In [62]:
prediction = predict("every two to four months")

[['EVERY', 'Period', 'TO', 'Period', 'PeriodUnit']]


In [63]:
prediction = predict("take 2 tabs every 3 to 4 days")

[['Method', 'Qty', 'Form', 'EVERY', 'Period', 'TO', 'Qty', 'Form']]


In [64]:
prediction = predict("every 3 to 4 days take 20 tabs")

[['EVERY', 'Period', 'TO', 'Qty', 'Form', 'FOR', 'Qty', 'Form']]


In [65]:
prediction = predict("once in every 3 days take 3 tabs")

[['Frequency', 'Period', 'EVERY', 'Period', 'DurationUnit', 'Method', 'Qty', 'Form']]


In [66]:
prediction = predict("take 3 tabs once in every 3 days")

[['Method', 'Qty', 'Form', 'Frequency', 'Period', 'EVERY', 'Period', 'PeriodUnit']]


In [67]:
prediction = predict("orally take 20 tabs every 4-6 weeks")

[['Method', 'FOR', 'Qty', 'Form', 'EVERY', 'Period', 'PeriodUnit']]


In [68]:
prediction = predict("10 tabs x 2 days")

[['Qty', 'Form', 'Method', 'Qty', 'DurationUnit']]


In [69]:
prediction = predict("3 capsule x 15 days")

[['Qty', 'Form', 'FOR', 'Duration', 'DurationUnit']]


In [70]:
prediction = predict("10 tabs")

[['Qty', 'Form']]
