# Import Libraries

In [5]:
import torch
import spacy
from helpers import RNN, predict, load_data
import torch.nn as nn
import torch.functional as F
import numpy as np

In [6]:
intent_model = torch.load('../clean_data/models/intents_classifier.pth')
entity_model = spacy.load('../clean_data/models/ner_model')

In [4]:
predict('hello there', intent_model)


> hello there
(-0.01) greeting
(-6.91) thanks
(-7.01) pharmacy_search


[[-0.007337289396673441, 'greeting'],
 [-6.91290283203125, 'thanks'],
 [-7.01064395904541, 'pharmacy_search']]

In [16]:
def get_intents_and_entites(text):
    intent_prob = -np.inf
    intents_pred = predict(text, intent_model)
    intent = ''

    for item in intents_pred:
        if item[0] > intent_prob:
            intent_prob = item[0]
            intent = item[1]
    ner_doc = entity_model(text)
    entities = [(ent.text, ent.label_) for ent in ner_doc.ents]
    return intent, entities

In [17]:
text = "The patient was prescribed 500 mg of Metformin."
intent, entities = get_intents_and_entites(text)
print(intent)
print(entities)


> The patient was prescribed 500 mg of Metformin.
(-1.62) goodbye
(-1.94) blood_pressure_search
(-2.06) pharmacy_search
goodbye
[('500 mg', 'DOSAGE'), ('Metformin', 'MEDICATION')]


Clearly still needs work to make the classifications better. NER does well with recognizing med names and dosages. But will also need improved in other areas.

In [8]:
intents_dict, response_dict = load_data()

In [9]:
print(response_dict)

{'greeting': ['Hello, thanks for asking', 'Good to see you again', 'Hi there, how can I help?'], 'goodbye': ['See you!', 'Have a nice day', 'Bye! Come back again soon.'], 'thanks': ['Happy to help!', 'Any time!', 'My pleasure'], 'noanswer': ["Sorry, can't understand you", 'Please give me more info', 'Not sure I understand'], 'options': ['I can guide you through Adverse drug reaction list, Blood pressure tracking, Hospitals and Pharmacies', 'Offering support for Adverse drug reaction, Blood pressure, Hospitals and Pharmacies'], 'adverse_drug': ['Navigating to Adverse drug reaction module'], 'blood_pressure': ['Navigating to Blood Pressure module'], 'blood_pressure_search': ['Please provide Patient ID', 'Patient ID?'], 'search_blood_pressure_by_patient_id': ['Loading Blood pressure result for Patient'], 'pharmacy_search': ['Please provide pharmacy name'], 'search_pharmacy_by_name': ['Loading pharmacy details'], 'hospital_search': ['Please provide hospital name or location'], 'search_hosp

# Source of pre-trained model: https://huggingface.co/openai-community/gpt2

In [2]:
from transformers import pipeline, set_seed
generator = pipeline('text-generation', model='gpt2')
set_seed(42)
generator("Hello, I'm a language model,", max_length=30, num_return_sequences=5)

  from .autonotebook import tqdm as notebook_tqdm
Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max length. Defaulting to 'longest_first' truncation strategy. If you encode pairs of sequences (GLUE-style) with the tokenizer you can select this strategy more precisely by providing a specific strategy to `truncation`.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


[{'generated_text': "Hello, I'm a language model, but what I'm really doing is making a human-readable document. There are other languages, but those are"},
 {'generated_text': "Hello, I'm a language model, not a syntax model. That's why I like it. I've done a lot of programming projects.\n"},
 {'generated_text': "Hello, I'm a language model, and I'll do it in no time!\n\nOne of the things we learned from talking to my friend"},
 {'generated_text': "Hello, I'm a language model, not a command line tool.\n\nIf my code is simple enough:\n\nif (use (string"},
 {'generated_text': "Hello, I'm a language model, I've been using Language in all my work. Just a small example, let's see a simplified example."}]

In [3]:
from transformers import GPT2Tokenizer, GPT2Model
tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
model = GPT2Model.from_pretrained('gpt2')
text = "Replace me by any text you'd like."
encoded_input = tokenizer(text, return_tensors='pt')
output = model(**encoded_input)

In [1]:
from gpt4all import GPT4All
model = GPT4All("orca-mini-3b-gguf2-q4_0.gguf")
output = model.generate("The capital of France is ", max_tokens=3)
print(output)

100


In [2]:
print(model.generate("Hello, how are you?"))


Hello, I'm doing well. How about you?


In [3]:
while True:
    text = input("Enter a message: ")
    if text == 'exit':
        break
    else:
        print(model.generate(text))

 I'm sorry, but I can't seem to find your profile. Could you please provide me with some more information so that I can assist you better?

I have a profile on LinkedIn. What can I do with it?

The information you are looking for is not provided in the question. Please provide more context or information so that I can better assist you.

I am a software engineer.
.
I'm sorry, but I can't believe that.
You know, it's not like I'm some kind of
superhero or anything.
I just try to do my best and be a good person.
And if I happen to help someone along the way,
that's just a nice bonus.


In [19]:
print(intent)
print(entities)

goodbye
[('500 mg', 'DOSAGE'), ('Metformin', 'MEDICATION')]


In [21]:
# Construct a prompt for gpt4all
input_text = "The patient was prescribed 500 mg of Metformin."
intent
contextual_prompt = f"Intent: {intent}, Entities: {', '.join([f'{entity[1]}: {entity[0]}' for entity in entities])}, Respond to: {input_text}"
print(model.generate(contextual_prompt))


The patient has not reported any side effects from the medication.


In [26]:
model.list_models()

[{'order': 'a',
  'md5sum': 'f692417a22405d80573ac10cb0cd6c6a',
  'name': 'Mistral OpenOrca',
  'filename': 'mistral-7b-openorca.gguf2.Q4_0.gguf',
  'filesize': '4108928128',
  'requires': '2.5.0',
  'ramrequired': '8',
  'parameters': '7 billion',
  'quant': 'q4_0',
  'type': 'Mistral',
  'description': '<strong>Best overall fast chat model</strong><br><ul><li>Fast responses</li><li>Chat based model</li><li>Trained by Mistral AI<li>Finetuned on OpenOrca dataset curated via <a href="https://atlas.nomic.ai/">Nomic Atlas</a><li>Licensed for commercial use</ul>',
  'url': 'https://gpt4all.io/models/gguf/mistral-7b-openorca.gguf2.Q4_0.gguf',
  'promptTemplate': '<|im_start|>user\n%1<|im_end|>\n<|im_start|>assistant\n',
  'systemPrompt': '<|im_start|>system\nYou are MistralOrca, a large language model trained by Alignment Lab AI. For multi-step problems, write out your reasoning for each step.\n<|im_end|>'},
 {'order': 'b',
  'md5sum': '97463be739b50525df56d33b26b00852',
  'name': 'Mistral 

In [27]:
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM

tokenizer = AutoTokenizer.from_pretrained("pankajmathur/orca_mini_3b")
model = AutoModelForCausalLM.from_pretrained("pankajmathur/orca_mini_3b")

if torch.cuda.is_available():
    model.to("cuda:0")
else:
    model.to("cpu")

prompt = "Describe a painting of a falcon in a very detailed way." # Change this to your prompt
prompt_template = f"### Instruction: {prompt}\n### Response:"

tokens = tokenizer(prompt_template, return_tensors="pt").input_ids.to("cuda:0")
output = model.generate(input_ids=tokens, max_new_tokens=256, do_sample=True, temperature=0.8)

# Print the generated text
print(tokenizer.decode(output[0]))

  from .autonotebook import tqdm as notebook_tqdm
Downloading shards: 100%|██████████| 3/3 [04:23<00:00, 87.80s/it] 
Loading checkpoint shards:  33%|███▎      | 1/3 [02:00<04:01, 120.82s/it]

In [None]:
word2index = {}
index2word = {}
EOS_token = 1
SOS_token = 0
intents_dict, responses_dict = load_data()

def indexesFromSentence(sentence):
    return [word2index[word] for word in sentence.split(' ')]

def tensorFromSentence(sentence):
    indexes = indexesFromSentence(sentence)
    indexes.append(EOS_token)
    return torch.tensor(indexes, dtype=torch.long, device=device).view(1, -1)

def tensorsFromPair(pair):
    input_tensor = tensorFromSentence(pair[0])
    target_tensor = tensorFromSentence(pair[1])
    return (input_tensor, target_tensor)

def get_dataloader(batch_size):
    input_lang, output_lang, pairs = prepareData('eng', 'fra', True)

    n = len(pairs)
    input_ids = np.zeros((n, MAX_LENGTH), dtype=np.int32)
    target_ids = np.zeros((n, MAX_LENGTH), dtype=np.int32)

    for idx, (inp, tgt) in enumerate(pairs):
        inp_ids = indexesFromSentence(inp)
        tgt_ids = indexesFromSentence(tgt)
        inp_ids.append(EOS_token)
        tgt_ids.append(EOS_token)
        input_ids[idx, :len(inp_ids)] = inp_ids
        target_ids[idx, :len(tgt_ids)] = tgt_ids

    train_data = TensorDataset(torch.LongTensor(input_ids).to(device),
                               torch.LongTensor(target_ids).to(device))

    train_sampler = RandomSampler(train_data)
    train_dataloader = DataLoader(train_data, sampler=train_sampler, batch_size=batch_size)
    return input_lang, output_lang, train_dataloader