In [1]:
import os
import time
import datetime

import pandas as pd
import seaborn as sns
import numpy as np
import random

import matplotlib.pyplot as plt


import torch
from torch.utils.data import Dataset, DataLoader, random_split, RandomSampler, SequentialSampler
torch.manual_seed(42)

from transformers import GPT2LMHeadModel,  GPT2Tokenizer, GPT2Config, GPT2LMHeadModel
from transformers import AdamW, get_linear_schedule_with_warmup


In [3]:
tokenizer = GPT2Tokenizer.from_pretrained('gpt2', bos_token='<|startoftext|>', eos_token='<|endoftext|>', pad_token='<|pad|>') #gpt2-medium


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


In [4]:
path = 'C:/Users/wjtay/Documents/GitHub/medicalbot/gpt_models/model_save_3'
model =  GPT2LMHeadModel.from_pretrained('C:/Users/wjtay/Documents/GitHub/medicalbot/gpt_models/model_save_3')
model.resize_token_embeddings(len(tokenizer))

Embedding(50259, 768)

In [9]:
model.eval()

q1 = "if i have trouble gaining weight because of my low appetite would a higher intake of the hormone leptin help? . i grew up always considered ""underweight"" for my age height and sex. people tell me it's just genes or i'm just a small eater but i think it's more than that. i'd like to gain more weight without being unhealthy about it. i have a low appetite and i just read an article on this website about leptin and leptin signals the body's energy-nourishment."
q2 = "i need a list of foods that i can eat and list of foods to avoid with pancreatitis"
q3 = "i made a mistake and i bought 9 to 24 months formula for my newborn. can he drink it?"
q4 = "i think i have bone cancer. what should i do?"
q5 = "i have covid-19 what should i do?"

prompt = f"<|startoftext|><|question|>{q5}<|answer|>"

generated = torch.tensor(tokenizer.encode(prompt)).unsqueeze(0)
# generated = generated.to(device)

print(generated)

sample_outputs = model.generate(
                                generated, 
                                #bos_token_id=random.randint(1,30000),
                                do_sample=True,   
                                top_k=50, 
                                max_length = 300,
                                top_p=0.95, 
                                num_return_sequences=3
                                )

for i, sample_output in enumerate(sample_outputs):
  print("{}: {}\n\n".format(i, tokenizer.decode(sample_output, skip_special_tokens=True)))

Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


tensor([[50257,    27,    91, 25652,    91,    29,    72,   423, 39849,   312,
            12,  1129,   644,   815,  1312,   466,    30,    27,    91, 41484,
            91,    29]])
0: <|question|>i have covid-19 what should i do?<|answer|>do not adjust your diet prior to surgery. a few times a week not sooner than one week. eat a well nourishing diet that is rich in vitamins and minerals. you will need to work with your healthcare provider to make sure you are getting enough vitamins and minerals. if you become unsure before your surgery call your healthcare provider.


1: <|question|>i have covid-19 what should i do?<|answer|>if you are looking for medical assistance with your age and sex then yes you may well qualify. some insurance plans don't cover it and medicare plans cover it. your doctor will have to determine the plans you will need to cover. to determine what type of coverage you will qualify for he or she will ask you and you their questions.


2: <|question|>i have covid-

In [8]:
configuration = GPT2Config.from_pretrained('gpt2', output_hidden_states=False)
model2 = GPT2LMHeadModel.from_pretrained("gpt2", config=configuration)
model2.resize_token_embeddings(len(tokenizer))

Embedding(50259, 768)

In [14]:
model2.eval()

prompt = "<|startoftext|>my son has add and mild autism. he has been successfully on concerta for 6+ years. can you help with his weight loss?"

generated = torch.tensor(tokenizer.encode(prompt)).unsqueeze(0)
# generated = generated.to(device)

print(generated)

sample_outputs = model2.generate(
                                generated, 
                                #bos_token_id=random.randint(1,30000),
                                do_sample=True,   
                                top_k=50, 
                                max_length = 300,
                                top_p=0.95, 
                                num_return_sequences=3
                                )

for i, sample_output in enumerate(sample_outputs):
  print("{}: {}\n\n".format(i, tokenizer.decode(sample_output, skip_special_tokens=True)))

Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


tensor([[50257,  1820,  3367,   468,   751,   290, 11607, 15998,    13,   339,
           468,   587,  7675,   319, 10010,    64,   329,   718,    10,   812,
            13,   460,   345,  1037,   351,   465,  3463,  2994,    30]])
0: my son has add and mild autism. he has been successfully on concerta for 6+ years. can you help with his weight loss?


1: my son has add and mild autism. he has been successfully on concerta for 6+ years. can you help with his weight loss?


2: my son has add and mild autism. he has been successfully on concerta for 6+ years. can you help with his weight loss?




In [2]:

import pandas as pd
import pickle
# with open('filename.pickle', 'wb') as handle:
#     pickle.dump(your_data, handle, protocol=pickle.HIGHEST_PROTOCOL)

# Load data (deserialize)
with open('gpu_data/MBERT_Lasse_FAISS_embeddings.pkl', 'rb') as handle:
    unserialized_data = pickle.load(handle)

training_df = pd.DataFrame(unserialized_data)


In [5]:
display(training_df.head())

Unnamed: 0,question,answer,question_embedding,answer_embedding
0,my 5 1/2-year-old son displays adhd symptoms f...,adhd and bipolar mood disorder (bmd) can coexi...,"[-0.413157, 0.05378814, -0.27316287, -0.008533...","[-0.20886117, -0.21162969, -0.19061936, -0.189..."
1,my son has add and mild autism. he has been su...,stimulants in general tend to decrease appetit...,"[-0.48283178, 0.02534586, 0.0103217205, -0.125...","[-0.1452014, 0.028581273, 0.019210964, -0.0560..."
2,my son is 13 and is depressed. he has been tak...,while any of the stimulant medications can inc...,"[-0.40460944, 0.017487632, -0.0425283, -0.1331...","[-0.3345335, 0.11818186, 0.08812475, -0.086428..."
3,my 17-year-old has stopped taking concerta aft...,seventy percent of teens diagnosed when they a...,"[-0.46335128, -0.007923237, 0.030742655, 0.043...","[-0.013330771, 0.08197601, 0.052254237, -0.010..."
4,i've been taking respa-ar for allergies. i can...,try claritin-d which is located behind the pha...,"[-0.21603523, 0.0058082193, -0.068554334, -0.0...","[-0.08699978, -0.19785826, 0.060930073, -0.063..."


In [5]:
print(training_df.iloc[-41]["question"])
print(training_df.iloc[-42]["question"])
print(training_df.iloc[-43]["question"])
print(training_df.iloc[-44]["question"])

why am i bleeding after taking contraceptive pill?
should i take some medicine to stop heavy bleeding after an i-pill?
can antibiotics interfere with the depo-provera birth control shot the way they do with oral contraceptives?
will antibiotics reduce the effect of oral contraceptive pills?


In [9]:
display(training_df[training_df["question"] == "should i take some medicine to stop heavy bleeding after an i-pill?"])

Unnamed: 0,question,answer,question_embedding,answer_embedding
29531,should i take some medicine to stop heavy blee...,the bleeding could be due to withdrawal effect...,"[-0.4998133, 0.21075934, -0.1710357, 0.1950991...","[-0.12213257, 0.26806858, 0.04654521, 0.139121..."
31743,should i take some medicine to stop heavy blee...,the extra bleeding could be due to the pill. h...,"[-0.4998133, 0.21075934, -0.1710357, 0.1950991...","[-0.2732135, -0.006451058, 0.19289777, 0.24179..."
31746,should i take some medicine to stop heavy blee...,the extra bleeding could be due to the pill. h...,"[-0.4232357, 0.27518612, -0.1223748, 0.1546501...","[-0.12213257, 0.26806858, 0.04654521, 0.139121..."


In [21]:
print(training_df.iloc[29531]["question_embedding"] == training_df.iloc[31743]["question_embedding"])

[ True  True  True  True  True  True  True  True  True  True  True  True
  True  True  True  True  True  True  True  True  True  True  True  True
  True  True  True  True  True  True  True  True  True  True  True  True
  True  True  True  True  True  True  True  True  True  True  True  True
  True  True  True  True  True  True  True  True  True  True  True  True
  True  True  True  True  True  True  True  True  True  True  True  True
  True  True  True  True  True  True  True  True  True  True  True  True
  True  True  True  True  True  True  True  True  True  True  True  True
  True  True  True  True  True  True  True  True  True  True  True  True
  True  True  True  True  True  True  True  True  True  True  True  True
  True  True  True  True  True  True  True  True  True  True  True  True
  True  True  True  True  True  True  True  True  True  True  True  True
  True  True  True  True  True  True  True  True  True  True  True  True
  True  True  True  True  True  True  True  True  T

In [19]:
print(training_df.iloc[29531]["question"])

should i take some medicine to stop heavy bleeding after an i-pill?
