In [3]:
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline, \
    Pipeline, PreTrainedTokenizer
from typing import Any
import torch
from configparser import ConfigParser

class ModelPipelineLoader:
    def __init__(self, model_id: str) -> None:
        """"""
        self._tokenizer: PreTrainedTokenizer = AutoTokenizer.\
            from_pretrained(model_id)
        self._model: Any = AutoModelForCausalLM.from_pretrained(
            model_id, load_in_8bit=True)
        self._pipeline: Pipeline | None = None

        # Read the .ini file
        config = ConfigParser()
        config.read('conf.ini')

        # Set the values in the parameters dictionary
        self._parameters: dict = {
            'torch_dtype': eval(config.get('pipeline.parameters', 'torch_dtype')),
            'device_map': config.get('pipeline.parameters', 'device_map'),
            'max_new_tokens': config.getint('pipeline.parameters', 'max_new_tokens'),
            'do_sample': config.getboolean('pipeline.parameters', 'do_sample'),
            'top_k': config.getint('pipeline.parameters', 'top_k'),
            'num_return_sequences': config.getint('pipeline.parameters', 'num_return_sequences')
        }
    
    def __repr__(self) -> str:
        return f'{self.model}, {self.tokenizer}'
    
    @property
    def model(self):
        return self._model

    @property
    def tokenizer(self):
        return self._tokenizer

    @property
    def pipeline(self):
        return self._pipeline
    
    @property
    def parameters(self):
        return self._parameters
    
    
    def get_pipeline(self) -> Pipeline:
        """"""
        if self.pipeline is None:
            # Instantiate the model once.
            self._pipeline = pipeline(
                "text-generation",
                model=self.model,
                tokenizer= self.tokenizer,
                torch_dtype = self.parameters['torch_dtype'],
                device_map=self.parameters['device_map'],
                max_new_tokens = self.parameters['max_new_tokens'],
                do_sample=self.parameters['do_sample'],
                top_k=self.parameters['top_k'],
                num_return_sequences=self.parameters['num_return_sequences'],
                eos_token_id=self._tokenizer.eos_token_id,
            )

        return self.pipeline

    def reset(self):
        try:
            del self._tokenizer
            del self._model
            torch.cuda.empty_cache()
            del self
        except:
            print("Already destroyed.")

    def __del__(self):
        try:
            del self._tokenizer
            del self._model
            torch.cuda.empty_cache()
            del self
        except:
            print("Already destroyed.")


In [5]:
config = ConfigParser()
config.read('conf.ini')

model_id = 'mistral_7b_v0.2'
model_id = config.get('huggingface.models', model_id)

pipe = ModelPipelineLoader(model_id)


Loading checkpoint shards: 100%|██████████| 3/3 [00:03<00:00,  1.15s/it]


In [6]:
import json

with open('text_8.txt', 'r') as f:
    text_1 = f.read()

with open('text_22.txt', 'r', errors='ignore') as f:
    text_2 = f.read()

with open('json8.json', 'r') as f:
    json_1 = json.load(f)

In [7]:
def to_str(json: dict):
    return f'{json_1}'.replace('\'', '\"')

In [8]:
B_INST, E_INST = "[INST]", "[/INST]"
B_SYS, E_SYS = "<<SYS>>\n", "\n<</SYS>>\n\n"
DEFAULT_SYSTEM_PROMPT = """\
You are a helpful, respectful and honest assistant. Always answer as helpfully as possible, while being safe. Your answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content. Please ensure that your responses are socially unbiased and positive in nature.

If a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don't know the answer to a question, please don't share false information."""


# Add system prompt and bring prompt in correct format
def get_prompt(instruction, new_system_prompt=DEFAULT_SYSTEM_PROMPT):
    SYSTEM_PROMPT = B_SYS + new_system_prompt + E_SYS
    prompt_template =  B_INST + SYSTEM_PROMPT + instruction + E_INST
    return prompt_template

In [9]:
system_prompt = """You are a helpful assistant that converts the given text .JSON format, preserving most of the information such that keys of JSON are the major entities present in the text,
while the values of JSON are the attributes associated with those entities. 
Only output the JSON without any text. Please, make good relations instead of writing entire sentences as keys and their values as either true or false"""

system_prompt2 = """Transform the following detailed text into a well-structured JSON format. Identify the key entities within the text and use them as JSON keys, 
and their detailed descriptions as values. Make sure that the JSON is a valid one and contains no syntax issue, and all the bracketes are aligned.
With the values, please also enlist the dates as well.
Ensure the JSON structure accurately reflects the hierarchical organization of the 
information in the text. Please be meticulous in filling out all values comprehensively and meaningfully, 
avoiding the use of boolean values (true or false) and empty fields. The JSON output should strictly 
contain relevant and accurate data from the text without any addition of extraneous or false information.

"""

# text = """
# CKD chronic labs drawn demonstrate SCr 1.64 --&gt; 1.57 (2/2019) --&gt; 1.54 (6/2019) --&gt; 1.78 (10/2019) --&gt; 1.49 (3/2020 Advised to increase fluid intake not currently taking any nephrotoxic agents monitor Discussed with patient. Due to chronic nature and progressive labs, think nephrology evaluation is prudent at this time. Also counseled on cocaine use This puts patient in CKD stage 3 No albuminuria Does not see kidney doctor Endorses drinking a lot of water daily Denies decreased urine output Eduated pt about current NSAID Use and to use tylenol instead Statesthat he has not been using any NSAIDs Does not use any illict drugs States that he has been drinking alot of water to keep his kidneys hydrated Drinks about 5-6 10oz glasses a day ---------------------------------------------------------------------------------------------------------------------------- ISOLATED ALK PHOS ELEVATIONS 144 --&gt; 141 (10/2019) --&gt; 192 (3/2020) Replete vit D PO. Liver enyzmes WNL on 12/29/2021 States that he was told that he had low vitamin D levels Plan Will check AST/ALT, and bilirubin levels Consider DEXA scan F/u in 1 week ------------------------------------------------------------------------------------------------------------------------- PREDM2 A1C 5.9 (10/2019) --&gt; 5.7 (4/2020) Educated pt on cutting back on sugar intake States that he has been cutting down on sweets as much as he can Reports that he has been so what minful of carbohydrrate intake ---------------------------------------------------------------------------------------------------------------------------------- ANEMIA, NORMOCYTIC Hgb 12.4, MCV 91 ? 2/2 CKD vs. malnutrition? He has a history of malnutrition Statesthat he has has some recent weight loss Does not take any iron tablets --------------------------------------------------------------------------------------------------------------------------------------- VITAMIN D DEF States that he has been drinking lots of milk and has been trying to get out side in the sun -------------------------------------------------------------------------------------------------------------------------------------- UNDERWEIGHT and calorie malnutrition likely from ostomy bag still not able to get much nutrition or absorb much nutrition also not much appetite on daily basis either cont. to encourage to eat more protein Lost 10 lbs since last visit. reports this is due to food insecurity given ongoing pandemic. has difficulty finding the food he wants in stock at the grocery store. Has a colostomy (hx of chron's disease) His weight today is 131lbs BMI 17.78 Plan Will discuss the need for nutritional supplement such as Nepro shake for patients with CKD ------------------------------------------------------------------------------------------------------------------ KNEE PAIN R&gt;&gt;L Chronic issues well controlled with pain meds cont. with this regiment for now a lot of knee stiffness --- Has been dealing with this for a long time with not much avail at this point with OTC meds Stretch exercises discussed Also discuss non-pharmacological approaches such as TENS UNIT, BACK BRACE, ACQUAPUNCTUER, ETC… States that he has been taking pain and OTC medications Pain has been tolerable Plan Continue with medications as prescribed ------------------------------------------------------------------------------------------------------------------- ABDOMINAL COLOSTOMY 30 years ago now used to it seen GI in past they are ok with his current situation 08/20/19 - States he has had Crohn's disease for 30 years, has an ileostomy - States the Sx are well controlled - Doesn't want to F/U with GI States that his ostomy is working fine Has no issues Stoma is red and beefy Denies any blood ---------------------------------------------------------------------------------------------------------------- PULMONARY EMBOLISM (Resolbved) was seeing heme onc DR. BERNARD was told his US legs are neg. he is off of COUMADIN now f/u q6months with them done with anti coags -------------------------------------------------------------------------------------------------------------- Vit D Deficiency Vitamin D Def. Recommend vitamin D DAILY Increase oral intake of dairy, Drink more milk if possible More sun if possible Plan More sun if possible. Increase dietary intake of vitamin D from sources such as: fatty fish such as salmon, tuna, and mackerel, beef liver, cheese, mushrooms, egg yolks, milk, breakfast cereals, orange juice, and other dairy products, such as yogurt, also soy drinks. Replete Vitamin D 50,000 IU weekly Consider DEXA, PTH assessment Denies feeling tiered Plan Will check B12 and Vit. D  4739
# """



In [10]:
text= "\n\n"+ 'TEXT 1:' + "\n\n" + text_1 + "\n\n" + 'JSON 1:' + "\n\n" + to_str(json_1) + "\n\n\n" + 'TEXT 2:' + "\n\n" + text_2 + "\n\n" + 'JSON 2 (your turn):' + "\n\n"

In [11]:
from langchain import HuggingFacePipeline
from langchain import PromptTemplate, LLMChain

llm = HuggingFacePipeline(pipeline = pipe.get_pipeline(), model_kwargs = {'temperature':0})
instruction="{text}"

template=get_prompt(instruction, system_prompt2)
print(template)
prompt = PromptTemplate(template=template, input_variables=["text"])
llm_chain = LLMChain(prompt=prompt, llm=llm, verbose=True)

[INST]<<SYS>>
Transform the following detailed text into a well-structured JSON format. Identify the key entities within the text and use them as JSON keys, 
and their detailed descriptions as values. Make sure that the JSON is a valid one and contains no syntax issue, and all the bracketes are aligned.
With the values, please also enlist the dates as well.
Ensure the JSON structure accurately reflects the hierarchical organization of the 
information in the text. Please be meticulous in filling out all values comprehensively and meaningfully, 
avoiding the use of boolean values (true or false) and empty fields. The JSON output should strictly 
contain relevant and accurate data from the text without any addition of extraneous or false information.


<</SYS>>

{text}[/INST]


In [12]:
llm_response = llm_chain({"text":text})
llm_response

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.




[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3m[INST]<<SYS>>
Transform the following detailed text into a well-structured JSON format. Identify the key entities within the text and use them as JSON keys, 
and their detailed descriptions as values. Make sure that the JSON is a valid one and contains no syntax issue, and all the bracketes are aligned.
With the values, please also enlist the dates as well.
Ensure the JSON structure accurately reflects the hierarchical organization of the 
information in the text. Please be meticulous in filling out all values comprehensively and meaningfully, 
avoiding the use of boolean values (true or false) and empty fields. The JSON output should strictly 
contain relevant and accurate data from the text without any addition of extraneous or false information.


<</SYS>>



TEXT 1:

HPI:
A 57 year old male patient presents with Breast Lump or Lumps.  Since last visit, it shows no change. D/W DR KUMTHEKAR, DR gabriel a

{'text': ' {"Patient": {"Age": "70", "Sex": "Male", "Medical_History": {"Conditions": ["HTN", "DM", "Diabetic_Retinopathy", "Asthma", "Hyperlipidemia", "Arrhythmia", "CKD_2", "Laryngeal_Reflux_Disease", "Congenital_Heart_Disease"], "Previous_Surgeries": ["Heart_Surgery", "Eye_Prosthesis"], "Immunizations": ["pneumonia_and_flu_shot_fall2019", "COVID_1All_Shots_Done_Moderna_112121_Booster"], "Last_Colonoscopy": {"Year": "2021"}}, "Allergies": {"Penicillin": {"Type": "Drug", "Reactions": ["Rash"]}}, "Food_Allergies": {"Type": "Food", "Foods": ["Fish", "Sea_Food"], "Reactions": ["Rashes"]}, "Environmental_Allergies": {"Type": "General", "Allergens": ["Dust"], "Reactions": ["Rashes"]}, "Perfume_Allergies": {"Type": "Drug", "Reactions": ["Rashes"]}}, "Current_Medications": {"Lancets_30G": {"Dose": "1 Lancet(s)", "Frequency": "every 12 hours", "Start_Date": "06/10/2022", "Quantity": "3 Box(es)", "Duration": "90 Day(s)", "Refill": "5"}, "Entresto": {"Dose": "24-26 MG", "Frequency": "twice a da

In [15]:
res = llm_response['text']

In [13]:
print(llm_response['text'])

 {"Patient": {"Age": "70", "Sex": "Male", "Medical_History": {"Conditions": ["HTN", "DM", "Diabetic_Retinopathy", "Asthma", "Hyperlipidemia", "Arrhythmia", "CKD_2", "Laryngeal_Reflux_Disease", "Congenital_Heart_Disease"], "Previous_Surgeries": ["Heart_Surgery", "Eye_Prosthesis"], "Immunizations": ["pneumonia_and_flu_shot_fall2019", "COVID_1All_Shots_Done_Moderna_112121_Booster"], "Last_Colonoscopy": {"Year": "2021"}}, "Allergies": {"Penicillin": {"Type": "Drug", "Reactions": ["Rash"]}}, "Food_Allergies": {"Type": "Food", "Foods": ["Fish", "Sea_Food"], "Reactions": ["Rashes"]}, "Environmental_Allergies": {"Type": "General", "Allergens": ["Dust"], "Reactions": ["Rashes"]}, "Perfume_Allergies": {"Type": "Drug", "Reactions": ["Rashes"]}}, "Current_Medications": {"Lancets_30G": {"Dose": "1 Lancet(s)", "Frequency": "every 12 hours", "Start_Date": "06/10/2022", "Quantity": "3 Box(es)", "Duration": "90 Day(s)", "Refill": "5"}, "Entresto": {"Dose": "24-26 MG", "Frequency": "twice a day", "Start

In [16]:
# res = ('{' + llm_response['text'].split('{', 1)[1])
res = res.rsplit('}', 1)[0] + '}'
print(res)

 {"Patient": {"Age": "70", "Sex": "Male", "Medical_History": {"Conditions": ["HTN", "DM", "Diabetic_Retinopathy", "Asthma", "Hyperlipidemia", "Arrhythmia", "CKD_2", "Laryngeal_Reflux_Disease", "Congenital_Heart_Disease"], "Previous_Surgeries": ["Heart_Surgery", "Eye_Prosthesis"], "Immunizations": ["pneumonia_and_flu_shot_fall2019", "COVID_1All_Shots_Done_Moderna_112121_Booster"], "Last_Colonoscopy": {"Year": "2021"}}, "Allergies": {"Penicillin": {"Type": "Drug", "Reactions": ["Rash"]}}, "Food_Allergies": {"Type": "Food", "Foods": ["Fish", "Sea_Food"], "Reactions": ["Rashes"]}, "Environmental_Allergies": {"Type": "General", "Allergens": ["Dust"], "Reactions": ["Rashes"]}, "Perfume_Allergies": {"Type": "Drug", "Reactions": ["Rashes"]}}, "Current_Medications": {"Lancets_30G": {"Dose": "1 Lancet(s)", "Frequency": "every 12 hours", "Start_Date": "06/10/2022", "Quantity": "3 Box(es)", "Duration": "90 Day(s)", "Refill": "5"}, "Entresto": {"Dose": "24-26 MG", "Frequency": "twice a day", "Start

In [17]:
with open('test.json', 'w') as f:
    f.write(res)


In [17]:
import json

def is_valid_json(json_string):
    try:
        json.loads(json_string)
        return True
    except json.JSONDecodeError:
        return False

# Example usage
print(f"The given json file is valid: {is_valid_json(res)}")


The given json file is valid: True


In [1]:
import clipboard

clipboard.copy(res)

NameError: name 'res' is not defined