In [53]:
import os
import json
import random
import numpy as np
from six.moves import range

from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
import nltk

import torch
import torch.nn as nn
from torch.autograd import Variable
import torchvision

import lc_options
from utils import lc_utilities as utils
from rouge import Rouge
from similarity.normalized_levenshtein import NormalizedLevenshtein
from gensim.models import KeyedVectors
from scipy import spatial
word2vec = KeyedVectors.load_word2vec_format(
    'data/word2vec/GoogleNews-vectors-negative300.bin', binary=True)

In [109]:
params = {
    'inputJson': "data/processed_data/processed_data.json",
    'useGPU': False,
    # A-Bot checkpoint
    'startFrom': "./checkpoints/sl_1/abot_ep_50.vd",
    'qstartFrom': "./checkpoints/sl_1/qbot_ep_40.vd",
    'beamSize': 5,
}

params = {
    'inputJson': "data/processed_data/processed_data.json",
    'useGPU': False,
    # A-Bot checkpoint
    'startFrom': "./checkpoints/rl_leven/abot_ep_9.vd",
    'qstartFrom': "./checkpoints/rl_leven/qbot_ep_9.vd",
    'beamSize': 5,
}

params = {
    'inputJson': "data/processed_data/processed_data.json",
    'useGPU': False,
    # A-Bot checkpoint
    'startFrom': "./checkpoints/rl_rougel/abot_ep_19.vd",
    'qstartFrom': "./checkpoints/rl_rougel/qbot_ep_19.vd",
    'beamSize': 5,
}

# RNG seed
manualSeed = 1597
random.seed(manualSeed)
torch.manual_seed(manualSeed)
if params['useGPU']:
    torch.cuda.manual_seed_all(manualSeed)

print('Loading json file: ' + params['inputJson'])
with open(params['inputJson'], 'r') as fileId:
    info = json.load(fileId)

wordCount = len(info['word2ind'])
# Add <START> and <END> to vocabulary
info['word2ind']['<START>'] = wordCount + 1
info['word2ind']['<END>'] = wordCount + 2
startToken = info['word2ind']['<START>']
endToken = info['word2ind']['<END>']
# Padding token is at index 0
vocabSize = wordCount + 3
print('Vocab size with <START>, <END>: %d' % vocabSize)

# Construct the reverse map
info['ind2word'] = {
    int(ind): word
    for word, ind in info['word2ind'].items()
}

Loading json file: data/processed_data/processed_data.json
Vocab size with <START>, <END>: 4867


In [110]:
def loadModel(params, agent='abot'):
    # should be everything used in encoderParam, decoderParam below
    encoderOptions = [
        'encoder', 'vocabSize', 'embedSize', 'rnnHiddenSize', 'numLayers',
        'useHistory', 'numRounds', 'dropout', 'useSumm'
    ]
    decoderOptions = [
        'decoder', 'vocabSize', 'embedSize', 'rnnHiddenSize', 'numLayers',
        'dropout'
    ]
    modelOptions = encoderOptions + decoderOptions

    mdict = None
    gpuFlag = params['useGPU']
    startArg = 'startFrom' if agent == 'abot' else 'qstartFrom'
    assert params[startArg], "Need checkpoint for {}".format(agent)

    if params[startArg]:
        print('Loading model (weights and config) from {}'.format(
            params[startArg]))

        if gpuFlag:
            mdict = torch.load(params[startArg])
        else:
            mdict = torch.load(params[startArg],
                map_location=lambda storage, location: storage)

        # Model options is a union of standard model options defined
        # above and parameters loaded from checkpoint
        modelOptions = list(set(modelOptions).union(set(mdict['params'])))
        for opt in modelOptions:
            if opt not in params:
                params[opt] = mdict['params'][opt]

            elif params[opt] != mdict['params'][opt]:
                # Parameters are not overwritten from checkpoint
                pass

    # Initialize model class
    encoderParam = {k: params[k] for k in encoderOptions}
    decoderParam = {k: params[k] for k in decoderOptions}

    encoderParam['startToken'] = encoderParam['vocabSize'] - 2
    encoderParam['endToken'] = encoderParam['vocabSize'] - 1
    decoderParam['startToken'] = decoderParam['vocabSize'] - 2
    decoderParam['endToken'] = decoderParam['vocabSize'] - 1

    if agent == 'abot':
        encoderParam['type'] = params['encoder']
        decoderParam['type'] = params['decoder']
        encoderParam['isAnswerer'] = True
        from lc.models.lc_answerer import Answerer
        model = Answerer(encoderParam, decoderParam)

    elif agent == 'qbot':
        encoderParam['type'] = params['qencoder']
        decoderParam['type'] = params['qdecoder']
        encoderParam['isAnswerer'] = False
        encoderParam['useSumm'] = False
        from lc.models.lc_questioner import Questioner
        model = Questioner(
            encoderParam,
            decoderParam,
            summGenSize=60)

    if params['useGPU']:
        model.cuda()

    if mdict:
        model.load_state_dict(mdict['model'])
        
    print("Loaded agent {}".format(agent))
    return model

In [111]:
ind_map = lambda words: np.array([info['word2ind'].get(word, info['word2ind']['UNK']) 
                                  for word in words], dtype='int64')

tokenize = lambda string: ['<START>'] + word_tokenize(string) + ['<END>']

to_str_gt = lambda w: str(" ".join([info['ind2word'][x] for x in filter(
        lambda x:x>0,w.data.cpu().numpy())]))[8:-6]

to_str_pred = lambda w, l: str(" ".join([info['ind2word'][x] for x in list( filter(
        lambda x:x>0,w.data.cpu().numpy()))][:l.data.cpu()[0]]))[8:]

def var_map(tensor):
    return Variable(tensor.unsqueeze(0), volatile=True)

In [112]:
def string_conv (string):
    words = nltk.word_tokenize(string)
    words = [word.lower() for word in words if word.isalpha()]
    string = ' '.join(words)

    string_tokens = tokenize(string)
    string = ind_map(string_tokens)

    string_tensor = var_map(torch.from_numpy(string))
    string_lens = var_map(torch.LongTensor([len(string)]))
    
    return string_tensor, string_lens

In [113]:
doc_data = json.load(open('data/generated_data/gen_dataset.json'))['data']['dialogs']
summ_data = json.load(open('data/generated_data/summary_dataset.json'))

eval_data = []

for doc in doc_data:
    eval_data.append({
        'doc': doc['document'],
        'summ': summ_data[doc['summary']]
    })
    
eval_data = eval_data[::3]

In [114]:
print(len(eval_data))

52


In [30]:
# Data
summary = "asthma " * 100
document = "Burns Overview Burns can be minor medical problems or life-threatening emergencies. Many people die each year from fire-related burn injuries. Electricity and chemicals also cause severe burns. Scalding liquids are the most common cause of burns in children. Treatment of burns depends on the location and severity of the injury. Sunburns and small scalds can usually be treated at home. Deep or widespread burns need immediate medical attention. People with severe burns often require treatment at specialized burn centers. They may need skin grafts to cover large wounds or to minimize scarring with deep wounds. And they may need emotional support and months of follow-up care, such as physical therapy. Symptoms Burns don't affect the skin uniformly, so a single injury can reach varying depths. Distinguishing a minor burn from a more serious burn involves determining the extent of tissue damage. The following are three classifications of burns: - First-degree burn. This minor burn affects only the outer layer of the skin (epidermis). It may cause redness, swelling and pain. It usually heals with first-aid measures within several days to a week. Sunburn is a classic example. - Second-degree burn. This type of burn affects both the epidermis and the second layer of skin (dermis). It may cause red, white or splotchy skin, pain, and swelling. And the wound often looks wet or moist. Blisters may develop, and pain can be severe. Deep second-degree burns can cause scarring. - Third-degree burn. This burn reaches into the fat layer beneath the skin. Burned areas may be charred black or white. The skin may look waxy or leathery. Third-degree burns can destroy nerves, causing numbness. A person with this type of burn may also have difficulty breathing or experience smoke inhalation or carbon monoxide poisoning. When to see a doctor Seek emergency medical assistance for: - Burns that cover the hands, feet, face, groin, buttocks, a major joint or a large area of the body - Deep burns, which means burns affecting all layers of the skin and even other tissues - Burns caused by chemicals or electricity - Difficulty breathing or burns to the airway Minor burns can be cared for at home, but call your doctor if you experience: - Large blisters - Signs of infection, such as oozing from the wound, increased pain, redness and swelling - A burn or blister that doesn't heal in several weeks - New, unexplained symptoms - Significant scarring Causes Many things can cause burns, including: - Fire - Hot liquid or steam - Hot metal, glass or other objects - Electrical currents - Radiation from X-rays or radiation therapy to treat cancer - Sunlight or ultraviolet light from a sunlamp or tanning bed - Chemicals such as strong acids, lye, paint thinner or gasoline - Abuse Complications Deep or widespread burns can lead to many complications, including: - Infection. Burns can leave skin vulnerable to bacterial infection and increase your risk of sepsis. Sepsis is a life-threatening infection that travels through the bloodstream and affects your whole body. It progresses rapidly and can cause shock and organ failure. - Low blood volume. Burns can damage blood vessels and cause fluid loss. This may result in low blood volume (hypovolemia). Severe blood and fluid loss prevents the heart from pumping enough blood to the body. - Dangerously low body temperature. The skin helps control the body's temperature, so when a large portion of the skin is injured, you lose body heat. This increases your risk of a dangerously low body temperature (hypothermia). Hypothermia is a condition in which the body loses heat faster than it can produce heat. - Breathing problems. Breathing hot air or smoke can burn airways and cause breathing (respiratory) difficulties. Smoke inhalation damages the lungs and can cause respiratory failure. - Scarring. Burns can cause scars and ridged areas caused by an overgrowth of scar tissue (keloids). - Bone and joint problems. Deep burns can limit movement of the bones and joints. Scar tissue can form and cause shortening and tightening of skin, muscles or tendons (contractures). This condition may permanently pull joints out of position. Diagnosis During the physical exam, your doctor will examine your burned skin and determine what percentage of your total body surface area is involved. In general, an area of skin roughly equal to the size of your palm equals 1 percent of your total body surface area. For people ages 10 to 40, the American Burn Association defines a severe burn as one that involves 25 percent total body surface area or any burn involving the eyes, ears, face, hands, feet or groin. You'll also be examined for other injuries and to determine whether the burn has affected the rest of your body. You may need lab tests, X-rays or other diagnostic procedures. Treatment Treatment of burns depends on the type and extent of the injuries. Most minor burns can be treated at home using over-the-counter products or aloe. They usually heal within a few weeks. For serious burns, after appropriate first aid care and wound assessment, your treatment may involve medications, wound dressings, therapy and surgery. The goals of treatment are to control pain, remove dead tissue, prevent infection, reduce scarring, regain function and address emotional needs. You may need months of additional treatments and therapy. This may be done during a hospital stay, on an outpatient basis or at home. Factors affecting this choice include your wishes, other conditions and abilities, such as whether you're able to change bandages. Medications and wound healing products For major burns, various medications and products are used to encourage healing. - Water-based treatments. Your care team may use techniques such as ultrasound mist therapy to clean and stimulate the wound tissue. - Fluids to prevent dehydration. You may need intravenous (IV) fluids to prevent dehydration and organ failure. - Pain and anxiety medications. Healing burns can be incredibly painful. You may need morphine and anti-anxiety medications - particularly for dressing changes. - Burn creams and ointments. Your care team can select from a variety of topical products for wound healing. These help keep the wound moist, reduce pain, prevent infection and speed healing. - Dressings. Your care team may also use various specialty wound dressings. These create a moist environment that fights infection and helps the burn heal. - Drugs that fight infection. If you develop an infection, you may need IV antibiotics. - Tetanus shot. Your doctor might recommend a tetanus shot after a burn injury. Physical and occupational therapy If the burned area is large, especially if it covers any joints, you may need physical therapy exercises. These can help stretch the skin so the joints can remain flexible. Other types of exercises can improve muscle strength and coordination. And occupational therapy may help if you have difficulty doing your normal daily activities. Surgical and other procedures You may need one or more of the following procedures: - Breathing assistance. If you've been burned on the face or neck, your throat may swell shut. If that appears likely, your doctor may insert a tube down your windpipe (trachea) to keep oxygen supplied to your lungs. - Tube feeding. Your metabolism goes into overdrive when your body starts trying to heal your burns. To provide adequate nutrition for this task, you doctor may thread a feeding tube through your nose to your stomach. - Easing blood flow around the wound. If a burn scab (eschar) goes completely around a limb, it can tighten and cut off the blood circulation. A scab (eschar) that goes completely around the chest can make it difficult to breathe. Your doctor may cut the eschar in several places to relieve this pressure. This procedure is called decompression. - Skin grafts. A skin graft is a surgical procedure in which sections of your own healthy skin are used to replace the scar tissue caused by deep burns. Donor skin from cadavers or pigs can be used as a temporary solution. - Plastic surgery. Plastic surgery (reconstruction) can improve the appearance of burn scars and increase the flexibility of joints affected by scarring. Lifestyle and home remedies To treat minor burns, follow these steps: - Cool the burn. Run cool (not cold) tap water over the burn for 10 to 15 minutes or until the pain eases. Or apply a clean towel dampened with cool tap water. Don't use ice. Putting ice directly on a burn can cause further damage to the tissue. - Remove rings or other tight items from the burned area. Try to do this quickly and gently, before the area swells. - Don't break small blisters (no bigger than your little fingernail). If blisters break, gently clean the area with mild soap and water, apply an antibiotic ointment, and cover it with a nonstick gauze bandage. - Apply moisturizer or aloe vera lotion or gel. This may soothe the area and prevent dryness as the wound heals. - If needed, take an over-the-counter pain reliever. Nonprescription products include ibuprofen (Advil, Motrin IB, others), naproxen (Aleve) and acetaminophen (Tylenol, others). - Consider a tetanus shot. Make sure that your tetanus booster is up to date. Doctors recommend people get a tetanus shot at least every 10 years. Whether your burn was minor or serious, use sunscreen and moisturizer regularly once the wound is healed."

summary_tensor, summary_lens = string_conv(summary)
document_tensor, document_lens = string_conv(summary)

  return Variable(tensor.unsqueeze(0), volatile=True)


In [137]:
numRounds = 10
beamSize = 5

summs_eval = []
last_round_ans = []
last_round_ques = []

for eval in eval_data:
    # load data
    summary_tensor, summary_lens = string_conv(eval['summ'])
    document_tensor, document_lens = string_conv(eval['doc'])
    
    aBot = None
    qBot = None
    
    # load aBot
    if params['startFrom']:
        aBot = loadModel(params, 'abot')
        assert aBot.encoder.vocabSize == vocabSize, "Vocab size mismatch!"
        aBot.eval()
        
    # load qBot
    if params['qstartFrom']:
        qBot = loadModel(params, 'qbot')
        assert qBot.encoder.vocabSize == vocabSize, "Vocab size mismatch!"
        qBot.eval()


    # prepare for dialogue
    if aBot:
        aBot.eval(), aBot.reset()
        aBot.train(), aBot.reset()
        aBot.observe(-1, summary=summary_tensor, summaryLens=summary_lens, document=document_tensor,
                        documentLens=document_lens)

    if qBot:
        qBot.eval(), qBot.reset()
        qBot.observe(-1, document=document_tensor,
                        documentLens=document_lens)
    
    # dialogue before summ generation
    for round in range(numRounds):
        questions, quesLens = qBot.forwardDecode(
            beamSize=beamSize, inference='greedy')
        qBot.observe(round, ques=questions, quesLens=quesLens)
        aBot.observe(round, ques=questions, quesLens=quesLens)
        answers, ansLens = aBot.forwardDecode(
            beamSize=beamSize, inference='greedy')
        aBot.observe(round, ans=answers, ansLens=ansLens)
        qBot.observe(round, ans=answers, ansLens=ansLens)
        last_round_ans.append(to_str_pred(answers[0], ansLens))
        last_round_ques.append(to_str_pred(questions[0], quesLens))


        summ, summ_lens = qBot.predictSummary(inference='greedy')

    summs_eval.append({
        'gt': eval['summ'],
        'gen': to_str_pred(summ[0], summ_lens)
    })

  return Variable(tensor.unsqueeze(0), volatile=True)


Loading model (weights and config) from ./checkpoints/rl_rougel/abot_ep_19.vd
Encoder: hre-ques-lateim-hist
Decoder: gen
Loaded agent abot
Loading model (weights and config) from ./checkpoints/rl_rougel/qbot_ep_19.vd
Encoder: hre-ques-lateim-hist
Decoder: gen
Loaded agent qbot
Loading model (weights and config) from ./checkpoints/rl_rougel/abot_ep_19.vd
Encoder: hre-ques-lateim-hist
Decoder: gen
Loaded agent abot
Loading model (weights and config) from ./checkpoints/rl_rougel/qbot_ep_19.vd
Encoder: hre-ques-lateim-hist
Decoder: gen
Loaded agent qbot
Loading model (weights and config) from ./checkpoints/rl_rougel/abot_ep_19.vd
Encoder: hre-ques-lateim-hist
Decoder: gen
Loaded agent abot
Loading model (weights and config) from ./checkpoints/rl_rougel/qbot_ep_19.vd
Encoder: hre-ques-lateim-hist
Decoder: gen
Loaded agent qbot
Loading model (weights and config) from ./checkpoints/rl_rougel/abot_ep_19.vd
Encoder: hre-ques-lateim-hist
Decoder: gen
Loaded agent abot
Loading model (weights and 

In [138]:
print(len(set(last_round_ans)))
print(len(set(last_round_ques)))

329
18


In [116]:
def word2vec_transform(sequence):
    vectorized = []
    words = word_tokenize(sequence)
    seq_tokens = [word.lower() for word in words if word.isalpha()]
    
    for word in seq_tokens:
        try:
            vectorized.append(word2vec[word])
        except:
            vectorized.append(np.zeros(300,))
            
    return np.array(vectorized)


In [117]:
def similarity_cosine(vec1, vec2):
    cosine_distance = spatial.distance.cosine(vec1, vec2)
    return 1-cosine_distance

In [136]:
rouge_scores = []
levenshtein_similarities = []
word2vec_similarities = []


for pair in summs_eval:
    rouge_scores.append(Rouge().get_scores(pair['gt'], pair['gen'], avg=True))
    levenshtein_similarities.append(1 -  NormalizedLevenshtein().distance(pair['gt'], pair['gen']))
    word2vec_similarities.append(similarity_cosine(word2vec_transform(pair['gt']).mean(axis=0),word2vec_transform(pair['gen']).mean(axis=0)))
    
common_words = []
for pair in summs_eval:
    gt_summ = pair['gt'].lower()
    gen_summ = pair['gen'].lower()
    for word in pair['gen'].split():
        if word in pair['gt'].split():
            common_words.append(word)

set(common_words)

{'bipolar',
 'blood',
 'cancer',
 'common',
 'condition',
 'disorder',
 'doctor',
 'eventually',
 'exercising',
 'factors',
 'find',
 'first',
 'genetic',
 'growing',
 'help',
 'include',
 'main',
 'may',
 'pinpoint',
 'recommend',
 'sibling',
 'still',
 'trying',
 'uncertain'}

In [124]:
rouge_f1 = []
rouge_fl = []

for score in rouge_scores:
    rouge_f1.append(score['rouge-1']['f'])
    rouge_fl.append(score['rouge-l']['f'])
            

In [120]:
print(max(levenshtein_similarities))
print(min(levenshtein_similarities))
print(sum(levenshtein_similarities) / len(levenshtein_similarities))
print('------------------------------')
print(max(word2vec_similarities))
print(min(word2vec_similarities))
print(sum(word2vec_similarities) / len(word2vec_similarities))
print('------------------------------')
print(max(rouge_f1))
print(min(rouge_f1))
print(sum(rouge_f1) / len(rouge_f1))
print('------------------------------')
print(max(rouge_fl))
print(min(rouge_fl))
print(sum(rouge_fl) / len(rouge_fl))

0.32484848484848483
0.05728376327769347
0.20081246378301246
------------------------------
0.8500129744437055
0.260056192119841
0.626608367458359
------------------------------
0.2568807305108998
0.0
0.041572642321148864
------------------------------
0.2568807305108998
0.0
0.041572642321148864


In [140]:
aBot = None
qBot = None

summary_tensor, summary_lens = string_conv(eval_data[2]['summ'])
document_tensor, document_lens = string_conv(eval_data[3]['doc'])
    
# load aBot
if params['startFrom']:
    aBot = loadModel(params, 'abot')
    assert aBot.encoder.vocabSize == vocabSize, "Vocab size mismatch!"
    aBot.eval()
    
# load qBot
if params['qstartFrom']:
    qBot = loadModel(params, 'qbot')
    assert qBot.encoder.vocabSize == vocabSize, "Vocab size mismatch!"
    qBot.eval()



if aBot:
    aBot.eval(), aBot.reset()
    aBot.train(), aBot.reset()
    aBot.observe(-1, summary=summary_tensor, summaryLens=summary_lens, document=document_tensor,
                    documentLens=document_lens)

if qBot:
    qBot.eval(), qBot.reset()
    qBot.observe(-1, document=document_tensor,
                    documentLens=document_lens)

numRounds = 10
beamSize = 5
summ, summ_lens = qBot.predictSummary(inference='greedy')
print('-----------------------------------------------')
print(to_str_pred(summ[0], summ_lens))

print(Rouge().get_scores(eval_data[2]['summ'], to_str_pred(summ[0], summ_lens), avg=True))
print(1 -  NormalizedLevenshtein().distance(eval_data[2]['summ'], to_str_pred(summ[0], summ_lens)))
print(similarity_cosine(word2vec_transform(eval_data[2]['summ']).mean(axis=0),word2vec_transform(to_str_pred(summ[0], summ_lens)).mean(axis=0)))
print('*************************')
for round in range(numRounds):
    questions, quesLens = qBot.forwardDecode(
        beamSize=beamSize, inference='greedy')
    qBot.observe(round, ques=questions, quesLens=quesLens)
    aBot.observe(round, ques=questions, quesLens=quesLens)
    answers, ansLens = aBot.forwardDecode(
        beamSize=beamSize, inference='greedy')
    aBot.observe(round, ans=answers, ansLens=ansLens)
    qBot.observe(round, ans=answers, ansLens=ansLens)
    test.append(to_str_pred(questions[0], quesLens))
    # print("Q%d: "%(round+1), to_str_pred(questions[0], quesLens))
    # print("A%d: "%(round+1), to_str_pred(answers[0], ansLens))

summ, summ_lens = qBot.predictSummary(inference='greedy')
print('-----------------------------------------------')
print(to_str_pred(summ[0], summ_lens))

print(Rouge().get_scores(eval_data[2]['summ'], to_str_pred(summ[0], summ_lens), avg=True))
print(1 -  NormalizedLevenshtein().distance(eval_data[2]['summ'], to_str_pred(summ[0], summ_lens)))
print(similarity_cosine(word2vec_transform(eval_data[2]['summ']).mean(axis=0),word2vec_transform(to_str_pred(summ[0], summ_lens)).mean(axis=0)))

  return Variable(tensor.unsqueeze(0), volatile=True)


Loading model (weights and config) from ./checkpoints/rl_rougel/abot_ep_19.vd
Encoder: hre-ques-lateim-hist
Decoder: gen
Loaded agent abot
Loading model (weights and config) from ./checkpoints/rl_rougel/qbot_ep_19.vd
Encoder: hre-ques-lateim-hist
Decoder: gen
Loaded agent qbot
Q1:  what are the causes of pulmonary hypertension ?
A1:  the right side of the heart pumps blood through the lungs where it picks up oxygen blood returns to the left side of the left side of the left side of the left side of the left side of a
Q2:  what is the cause of the symptoms of the UNK of the body ?
A2:  the goals of UNK UNK in the blood sugar glucose level of UNK activity to help relax the risk of developing relax the child such as a relative such as a relative such as a relative such as the body
Q3:  what is the cause of the heart of UNK ?
A3:  most cases of symptoms that starts in the blood sugar glucose level of developing a role in the body when the left side of the left side of the left side of the 