In [63]:
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
from transformers import pipeline

In [64]:
model_id = "/pure-mlo-scratch/make_project/trial-runs/meditron-7b-direct/hf_checkpoint_new/"

# load model and tokenizer
model = AutoModelForCausalLM.from_pretrained(model_id, use_cache=True, device_map="auto")
tokenizer = AutoTokenizer.from_pretrained(model_id, use_cache=False)

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


In [5]:
len(tokenizer)

32019

In [6]:
# check the new special tokens in the tokenizer
print(tokenizer.special_tokens_map)

{'bos_token': '<|im_start|>', 'eos_token': '<|im_end|>', 'unk_token': '<unk>', 'sep_token': '<SEP>', 'pad_token': '<PAD>', 'cls_token': '<CLS>', 'mask_token': '<MASK>', 'additional_special_tokens': ['[bib_ref]', '[/bib_ref]', '[fig_ref]', '[/fig_ref]', '[bib]', '[/bib]', '[fig]', '[/fig]', '[table]', '[/table]', '[formula]', '[/formula]', '<|im_start|>', '<|im_end|>']}


In [7]:
# check the new special tokens in the model
print(model.config.vocab_size)

32019


In [8]:
# check the new special token idx in the tokenizer
print(tokenizer.convert_tokens_to_ids('<|im_end|>'))

32018


In [47]:
# check the last token idx in the tokenizer
print(tokenizer.convert_ids_to_tokens(13))

<0x0A>


In [10]:
print(model.config.vocab_size)

32019


In [11]:
tokenizer.eos_token_id

32018

In [60]:
pipe = pipeline("text-generation",
                model=model,
                tokenizer= tokenizer,    
                max_new_tokens = 2048,
                do_sample=True,
                top_k=10,
                num_return_sequences=1,
                eos_token_id=tokenizer.eos_token_id,
                return_full_text=True,
                )

In [61]:
prompt = '''<|im_start|>
Given the provided patient-doctor conversation, generate the corresponding clinical note as written by the physician.
Make sure to use all the information from the dialogue into the note, but do not add any new information.

Doctor: Good morning, how are you feeling today?
Patient: Not so well, doctor. I have been having severe pain in my lower abdomen for the past few days.
Doctor: I see. Can you describe the pain for me?
Patient: It's a constant pain and it's pretty severe. I would rate it an 8 on a scale of 1 to 10.
Doctor: Does the pain radiate anywhere else?
Patient: No, it doesn't.
Doctor: Have you had any other symptoms like fever, chills, nausea, or vomiting?
Patient: No, I haven't.
Doctor: How about urinary symptoms or vaginal discharge?
Patient: No, nothing like that.
Doctor: Have you been experiencing any bleeding?
Patient: No, I haven't.
Doctor: Can you tell me a little bit about your menstrual cycle? Are you taking any contraceptives?
Patient: I'm in the middle of my cycle and I'm not taking any contraceptives.
Doctor: I see. Have you reported any dysmenorrhea or painful intercourse in the past year?
Patient: Yes, I have. I also had a Chlamydia infection ten years ago.
Doctor: I understand. Can you tell me about your recent medical history?
Patient: Three days ago, I suddenly started experiencing severe pain in my right lower quadrant. I went to the emergency room and was treated for a suspected renal colic and urine infection. But the pain continued the next day.
Doctor: I see. And did you have any similar experiences in the past?
Patient: Yes, I was admitted to the surgical department for renal colic one year ago. During my hospitalization, I had a CT scan and they found a cystic mass on my right adnexa.
Doctor: Okay, I see. Let's do a physical examination now. Can you describe the pain when I press on your right lower quadrant?
Patient: It's pretty tender.
Doctor: Okay, I see. And what about your pelvic examination?
Patient: I have right ovarian tenderness, but no cervical motion tenderness.
Doctor: I see. We did some tests and your complete blood count and urinalysis are normal. But your CRP is 47. And your pregnancy test was negative.
Patient: Okay.
Doctor: We also did some endocervical swabs and they were negative.
Patient: Okay.
Doctor: We did a sonography examination and it showed a normal uterus and a normal left ovary. But there was a small amount of simple free fluid in the Douglas pouch.
Patient: Okay.
Doctor: And we found a cystic mass measuring 8 * 4 * 3.5 cm that is thought to arise from your right adnexa or ovary. The cystic structure is separated from your ovary and has two ecographic features. One is a fusiform, thick-walled cystic mass that resembles a hydrosalpinx. The other has the appearance of a cyst containing blood.
Patient: Hmm.
Doctor: The differential diagnosis includes isolated torsion of your Fallopian tube, paraovarian cyst, hydrosalpinx associated with a tube-ovarian abscess, or endometrioma. But the CT examination did not add any useful information.
Patient: Okay.
Doctor: Based on the results, we need to do further examination. I will refer you to a specialist for a more detailed examination.
Patient: Okay, doctor. Thank you.
Doctor: No problem. Please make sure to follow up with the specialist as soon as possible.
<|im_end|>

<|im_start|>
Now, generate the corresponding clinical note.'''


In [18]:
prompt_short = '''<|im_start|>
Given the provided patient-doctor conversation, generate the corresponding clinical note as written by the physician.
Make sure to use all the information from the dialogue into the note, but do not add any new information.

Doctor: Good morning, how are you feeling today?
Patient: Not so well, doctor. I have been having severe pain in my lower abdomen for the past few days.
Doctor: I see. Can you describe the pain for me?
Patient: It's a constant pain and it's pretty severe. I would rate it an 8 on a scale of 1 to 10.
Doctor: Does the pain radiate anywhere else?
Patient: No, it doesn't.
Doctor: Have you had any other symptoms like fever, chills, nausea, or vomiting?
Patient: No, I haven't.
<|im_end|>

<|im_start|>
Now, generate the corresponding clinical note.'''


In [65]:
# generate from prompt
generated = pipe(prompt)
print(generated)

Setting `pad_token_id` to `eos_token_id`:32018 for open-end generation.


[{'generated_text': "<|im_start|>\nGiven the provided patient-doctor conversation, generate the corresponding clinical note as written by the physician.\nMake sure to use all the information from the dialogue into the note, but do not add any new information.\n\nDoctor: Good morning, how are you feeling today?\nPatient: Not so well, doctor. I have been having severe pain in my lower abdomen for the past few days.\nDoctor: I see. Can you describe the pain for me?\nPatient: It's a constant pain and it's pretty severe. I would rate it an 8 on a scale of 1 to 10.\nDoctor: Does the pain radiate anywhere else?\nPatient: No, it doesn't.\nDoctor: Have you had any other symptoms like fever, chills, nausea, or vomiting?\nPatient: No, I haven't.\nDoctor: How about urinary symptoms or vaginal discharge?\nPatient: No, nothing like that.\nDoctor: Have you been experiencing any bleeding?\nPatient: No, I haven't.\nDoctor: Can you tell me a little bit about your menstrual cycle? Are you taking any cont

In [66]:
len(tokenizer.encode(generated[0]['generated_text']))

1642

In [26]:
len(generated[0]['generated_text'])

2076

In [27]:
len(generated[1]['generated_text'])

2203

In [22]:
text = ''' 
Given the provided patient-doctor conversation, generate the corresponding clinical note as written by the physician.
Make sure to use all the information from the dialogue into the note, but do not add any new information.

Doctor: Good morning, how are you feeling today?
Patient: Not so well, doctor. I have been having severe pain in my lower abdomen for the past few days.
Doctor: I see. Can you describe the pain for me?
Patient: It's a constant pain and it's pretty severe. I would rate it an 8 on a scale of 1 to 10.
Doctor: Does the pain radiate anywhere else?
Patient: No, it doesn't.
Doctor: Have you had any other symptoms like fever, chills, nausea, or vomiting?
Patient: No, I haven't.
<|im_end|>

<|im_start|>
Now, generate the corresponding clinical note.'''
text_encoded = tokenizer.encode(text, return_tensors='pt')

In [96]:
text_encoded

tensor([[32017,   259,    13, 29954,  5428,   278,  4944, 16500, 29899,  1867,
          2801, 14983, 29892,  5706,   278,  6590, 24899,   936,  4443,   408,
          3971,   491,   278,  4824,  8910, 29889,    13,  9984,  1854,   304,
           671,   599,   278,  2472,   515,   278,  7928,   434,   964,   278,
          4443, 29892,   541,   437,   451,   788,   738,   716,  2472, 29889,
            13,    13,  6132,  2801, 29901,  7197,  7250, 29892,   920,   526,
           366, 11223,  9826, 29973,    13, 11457,   993, 29901,  2216,   577,
          1532, 29892, 11619, 29889,   306,   505,  1063,  2534, 22261,  6788,
           297,   590,  5224,   633, 29881,  2770,   363,   278,  4940,  2846,
          3841, 29889,    13,  6132,  2801, 29901,   306,  1074, 29889,  1815,
           366,  8453,   278,  6788,   363,   592, 29973,    13, 11457,   993,
         29901,   739, 29915, 29879,   263,  4868,  6788,   322,   372, 29915,
         29879,  5051, 22261, 29889,   306,   723,  

In [97]:
len(text_encoded[0])

219

In [59]:
tokenizer.convert_ids_to_tokens(4868)

'▁constant'

In [79]:
# Define a function to perform inference and print logits scores
def generate_text_with_logits(prompt, max_length=1000, temperature=1.0):
    # Tokenize input prompt
    input_ids = tokenizer.encode(prompt, return_tensors="pt")

    # Generate text
    output = model.generate(
        input_ids,
        max_length=max_length,
        temperature=temperature,
        num_return_sequences=1,
        eos_token_id=tokenizer.eos_token_id,
    )

    # Decode generated text
    generated_text = tokenizer.decode(output[0], skip_special_tokens=True)

    # Get logits scores
    # logits = output.logits

    return generated_text, output

In [80]:
generated_text, output = generate_text_with_logits(text, max_length=1000, temperature=1.0)

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:32018 for open-end generation.


In [81]:
generated_text

" \nGiven the provided patient-doctor conversation, generate the corresponding clinical note as written by the physician.\nMake sure to use all the information from the dialogue into the note, but do not add any new information.\n\nDoctor: Good morning, how are you feeling today?\nPatient: Not so well, doctor. I have been having severe pain in my lower abdomen for the past few days.\nDoctor: I see. Can you describe the pain for me?\nPatient: It's a constant pain and it's pretty severe. I would rate it an 8 on a scale of 1 to 10.\nDoctor: Does the pain radiate anywhere else?\nPatient: No, it doesn't.\nDoctor: Have you had any other symptoms like fever, chills, nausea, or vomiting?\nPatient: No, I haven't.\n \n\n \nNow, generate the corresponding clinical note.\nA 38-year-old female with a past medical history of hypertension and a surgical history of a cesarean section 4 years ago, presented to the emergency department with severe constant pain in the suprapubic region. The pain started

In [82]:
len(generated_text)

2543

In [84]:
output.shape

torch.Size([1, 691])

In [85]:
output

tensor([[32017,   259,    13, 29954,  5428,   278,  4944, 16500, 29899,  1867,
          2801, 14983, 29892,  5706,   278,  6590, 24899,   936,  4443,   408,
          3971,   491,   278,  4824,  8910, 29889,    13,  9984,  1854,   304,
           671,   599,   278,  2472,   515,   278,  7928,   434,   964,   278,
          4443, 29892,   541,   437,   451,   788,   738,   716,  2472, 29889,
            13,    13,  6132,  2801, 29901,  7197,  7250, 29892,   920,   526,
           366, 11223,  9826, 29973,    13, 11457,   993, 29901,  2216,   577,
          1532, 29892, 11619, 29889,   306,   505,  1063,  2534, 22261,  6788,
           297,   590,  5224,   633, 29881,  2770,   363,   278,  4940,  2846,
          3841, 29889,    13,  6132,  2801, 29901,   306,  1074, 29889,  1815,
           366,  8453,   278,  6788,   363,   592, 29973,    13, 11457,   993,
         29901,   739, 29915, 29879,   263,  4868,  6788,   322,   372, 29915,
         29879,  5051, 22261, 29889,   306,   723,  

In [89]:
len(text)

773

In [98]:
text_example1 = '''Given the provided patient-doctor conversation, generate the corresponding clinical note as written by the physician. \nMake sure to use all the information from the dialogue into the note, but do not add any new information. \n\nDoctor: Hello, I am Dr. Smith. How are you doing today?\nPatient: I'm okay, thanks.\nDoctor: I understand that you were admitted to our hospital with a progressing two-vessel coronary heart disease. Can you tell me more about your symptoms?\nPatient: Hmm, yes. I have been feeling short of breath and chest pain under physical exertion in my daily life.\nDoctor: I see. And I have also reviewed your medical history, which included a Non-ST-elevation myocardial infarction and a percutaneous transluminal coronary angioplasty with a stent implantation in the past.\nPatient: Yes, that's correct.\nDoctor: Great. Now, you have been scheduled for a bypass surgery. Do you have any questions or concerns about the surgery?\nPatient: Well, to be honest, I'm a bit uncertain about the whole thing. I don't know what might have caused the worsening of my disease and how to positively influence the progression of the disease.\nDoctor: I understand your concerns. That's why we have conducted a LAVA assessment for you. The assessment showed that you rated eight terms as very important for your life, and the three topics you were most dissatisfied with were mobility, contact to nature, and the ability to make decisions independently.\nPatient: Hmm, yes, that's right.\nDoctor: The negative rating of mobility and contact with nature were probably influenced by your current situation in the hospital and your disease. However, your dissatisfaction with your ability to make decisions independently could be attributed to your lack of knowledge concerning your disease and the expected improvement of your condition and mobility after the bypass surgery.\nPatient: Okay.\nDoctor: That's why we suggest having a patient consultation that focuses on a shared decision and understanding of the link between the treatment aims and your current negative experiences in life. This could help you to reduce your uncertainty and feel more empowered to actively participate in your treatment.\nPatient: I see.\nDoctor: The application of the LAVA tool led to the conclusion that you have a need for more information about your disease and the planned surgical intervention. This could help you to better understand your condition and make informed decisions.\nPatient: Yes, I would like to know more about it.\nDoctor: Of course. I'll arrange a consultation with our specialist who can provide you with more information and answer any questions you may have.\nPatient: Thank you, doctor.\nDoctor: You're welcome. And don't hesitate to let us know if you have any other concerns.\nPatient: Okay, I will.","gold":"This patient was a male in his early sixties. He was admitted to an integrative ward of a University Hospital, with a progressing two-vessel coronary heart disease. The patient was scheduled for bypass surgery. The medical history of the patient comprised a Non-ST-elevation myocardial infarction. A percutaneous transluminal coronary angioplasty and a stent implantation had been conducted in the past.\\nIn a short interview preceding the LAVA assessment the patient reported that he had noticed a worsening of his coronary disease under physical exertion in his daily life before hospitalization. Nevertheless, the extent of deterioration that was diagnosed during his hospitalization, surprised him and he described an uncertainty because he did not know what might have caused the worsening of his disease nor how to positively influence the progression of the disease (e.g., life style modifications).\\nFigure shows the LAVA self-assessment of the patient.\\nWhen using the LAVA-Tool the patient rated eight terms as very important for his life. The three topics the patient was most dissatisfied with were mobility, contact to nature, and the ability to make decisions independently. The negative rating of mobility and contact with nature were probably influenced by the patient\u2019s current situation in the hospital and by his disease. However, his dissatisfaction with his ability to make decisions independently could be attributed to his lack of knowledge concerning his disease and the expected improvement of his condition and mobility after the bypass surgery. A patient consultation that focused on a shared decision and understanding of the link between the treatment aims and his current negative experiences in life could therefore be helpful.\\nFor this patient, the application of the LAVA tool led to the conclusion that the patient had a need for information. In this case, more information about the disease and the planned surgical intervention could probably help the patient to reduce his uncertainty and to feel more empowered to actively participate in his treatment. In the preceding interview the importance of the information need was not fully captured by the interviewer. The LAVA tool \u2013 forcing the patient to weigh his satisfaction with important aspects in his life \u2013 made it easier to capture his priorities, thereby showing a potential to improve his situation with a simple measure (i.e. patient information). One could argue that more information and a better understanding of the planned intervention would not have changed the somatic state of the patient. However, and most importantly, for patients with multiple conditions an improvement in well-being and a reduction of fear and uncertainty could make an important difference to their quality of life."}'''
print(len(text_example1.split()))

876


In [91]:
text_example2 = '''Given the provided patient-doctor dialogue, write the corresponding patient information summary in JSON format.\nMake sure to extract all the information from the dialogue into the template, but do not add any new information. \nIf a field is not mentioned, simply write \"feature\": \"None\".\n\nDoctor: Hello, how are you feeling today?\nPatient: I'm feeling okay, thank you.\nDoctor: I see from your medical history that you have stage 1 hypertension. Can you tell me more about your history?\nPatient: Yes, I've been taking medication for it for a while now.\nDoctor: Okay, thank you for letting me know. You were admitted to the hospital post cardiac arrest. Can you tell me what happened?\nPatient: I was at home and my husband found me unresponsive in the middle of the night.\nDoctor: I'm sorry to hear that. I see that you have a significant family history of premature coronary artery disease. Can you tell me more about that?\nPatient: Yes, my father had it in his 40's and my mother had a pacemaker inserted. I also have two first degree relatives who were diagnosed with early cardiac disease.\nDoctor: Thank you for letting me know. Prior to the cardiac arrest, did you experience any chest pain, dyspnea, dizziness, lightheadedness or any other constitutional symptoms?\nPatient: No, I didn't have any symptoms before the arrest.\nDoctor: I see. During the arrest, you were intubated and defibrillated five times for ventricular fibrillation. You were given a bolus of amiodarone. Do you remember any of this?\nPatient: No, I don't remember anything during that time.\nDoctor: Okay, that's understandable. You were eventually transported to the hospital and admitted to the coronary care unit where you were placed on targeted temperature management and maintained on amiodarone drip. Do you remember any of that?\nPatient: No, I don't remember any of that either.\nDoctor: That's okay. You eventually had a full neurologic recovery and were extubated. You were scheduled for cardiac catheterization, but you had a witnessed ventricular fibrillation arrest on the floor. Advanced cardiopulmonary resuscitation was initiated and you were revived immediately. Do you remember any of that?\nPatient: No, I don't remember that either.\nDoctor: Okay, that's understandable. Is there anyone in your family that you would like me to speak with about your condition?\nPatient: Yes, my husband and children would like to know more about my condition.\nDoctor: Okay, I will make sure to speak with them. In the meantime, we will continue to monitor your condition and keep you updated on any changes.","gold":"{\n\"visit motivation\": \"Cardiac arrest\",\n\"admission\": [\n{\n\"reason\": \"Post cardiac arrest\",\n\"date\": \"None\",\n\"duration\": \"None\",\n\"care center details\": \"Coronary care unit\"\n}\n],\n\"patient information\": {\n\"age\": \"51\",\n\"sex\": \"Female\",\n\"ethnicity\": \"None\",\n\"weight\": \"None\",\n\"height\": \"None\",\n\"family medical history\": \"Father had coronary artery disease in his 40\u2019s, mother had a pacemaker inserted, two first degree relatives diagnosed with early cardiac disease\",\n\"recent travels\": \"None\",\n\"socio economic context\": \"None\",\n\"occupation\": \"None\"\n},\n\"patient medical history\": {\n\"physiological context\": \"Stage 1 hypertension\",\n\"psychological context\": \"None\",\n\"vaccination history\": \"None\",\n\"allergies\": \"None\",\n\"exercise frequency\": \"Functionally active\",\n\"nutrition\": \"None\",\n\"sexual history\": \"None\",\n\"alcohol consumption\": \"Nonalcoholic drinker\",\n\"drug usage\": \"Denied recreational drug use\",\n\"smoking status\": \"Nonsmoker\"\n},\n\"surgeries\": \"None\",\n\"symptoms\": [\n{\n\"name of symptom\": \"Unresponsiveness\",\n\"intensity of symptom\": \"None\",\n\"location\": \"None\",\n\"time\": \"In the middle of the night\",\n\"temporalisation\": \"None\",\n\"behaviours affecting the symptom\": \"None\",\n\"details\": \"Patient was moaning before becoming unresponsive\"\n}\n],\n\"medical examinations\": [\n{\n\"name\": \"Electrocardiogram (ECG)\",\n\"result\": \"Sinus rhythm\",\n\"details\": \"None\"\n},\n{\n\"name\": \"Transthoracic echocardiogram\",\n\"result\": \"No valvular abnormalities, normal left atrium and left ventricular cavity size, borderline concentric left ventricular hypertrophy, ejection fraction of 56%, subtle regional wall motion abnormalities\",\n\"details\": \"None\"\n}\n],\n\"diagnosis tests\": \"None\",\n\"treatments\": [\n{\n\"name\": \"Cardiopulmonary resuscitation\",\n\"related condition\": \"Cardiac arrest\",\n\"dosage\": \"None\",\n\"time\": \"None\",\n\"frequency\": \"None\",\n\"duration\": \"None\",\n\"reason for taking\": \"To revive the patient\",\n\"reaction to treatment\": \"Return of spontaneous circulation\",\n\"details\": \"Started by husband, taken over by EMS\"\n},\n{\n\"name\": \"Intubation\",\n\"related condition\": \"Cardiac arrest\",\n\"dosage\": \"None\",\n\"time\": \"None\",\n\"frequency\": \"None\",\n\"duration\": \"None\",\n\"reason for taking\": \"To secure airway post cardiac arrest\",\n\"reaction to treatment\": \"None\",\n\"details\": \"None\"\n},\n{\n\"name\": \"Defibrillation\",\n\"related condition\": \"Ventricular fibrillation\",\n\"dosage\": \"None\",\n\"time\": \"None\",\n\"frequency\": \"Five times\",\n\"duration\": \"None\",\n\"reason for taking\": \"To treat ventricular fibrillation\",\n\"reaction to treatment\": \"None\",\n\"details\": \"Performed by EMS\"\n},\n{\n\"name\": \"Amiodarone\",\n\"related condition\": \"Cardiac arrest\",\n\"dosage\": \"Bolus followed by drip\",\n\"time\": \"None\",\n\"frequency\": \"None\",\n\"duration\": \"None\",\n\"reason for taking\": \"To manage cardiac arrhythmias\",\n\"reaction to treatment\": \"Severe bradycardia, leading to cessation of amiodarone\",\n\"details\": \"None\"\n},\n{\n\"name\": \"Targeted temperature management\",\n\"related condition\": \"Post cardiac arrest\",\n\"dosage\": \"None\",\n\"time\": \"About 12 hours after initiating\",\n\"frequency\": \"None\",\n\"duration\": \"None\",\n\"reason for taking\": \"To improve neurological outcome post cardiac arrest\",\n\"reaction to treatment\": \"Full neurologic recovery\",\n\"details\": \"None\"\n}\n],\n\"discharge\": {\n\"reason\": \"None\",\n\"referral\": \"None\",\n\"follow up\": \"None\",\n\"discharge summary\": \"Patient was extubated, transferred to the floor, and scheduled for cardiac catheterization but experienced a witnessed ventricular fibrillation arrest on the floor and was revived immediately\"\n}\n}"}'''
print(len(text_example2))  


6055


In [101]:
text_example3 = '''Given the provided patient-doctor conversation, generate the corresponding clinical note as written by the physician. \nMake sure to use all the information from the dialogue into the note, but do not add any new information. \n\nDoctor: Good morning, what brings you here today?\nPatient: Good morning, I have been having recurrent episodes of rhabdomyolysis since childhood.\nDoctor: I see, can you tell me more about your symptoms?\nPatient: Yes, I have been having muscle pain and weakness for many years.\nDoctor: Hmm, I understand. Can you tell me when was the first episode of rhabdomyolysis you had?\nPatient: The first episode was when I was 22 months old after a respiratory tract infection.\nDoctor: Okay, I see. Can you tell me about your family history?\nPatient: Yes, my parents are well, but my two siblings had sudden death more than 20 years ago following a short infectious illness. They both had severe muscle pain and evidence of rhabdomyolysis and myoglobinuria.\nDoctor: I see, that's very important information. Can you tell me more about the tests that were performed on your siblings?\nPatient: Yes, they had skeletal muscle histology and electron microscopy studies after their death, but the results were normal. However, the cardiac evaluation showed dilated cardiomyopathy.\nDoctor: Okay, I understand. What was your first presentation like?\nPatient: On my first presentation, I had no hypoglycemia or ketosis during the acute illness, but I was initially treated as a possible long chain fatty acid oxidation defect due to my family history.\nDoctor: I see. Can you tell me more about your subsequent admissions with similar presentations?\nPatient: Yes, I have had numerous admissions with extremely elevated CK concentration and muscle pain, despite aggressive carbohydrate supplementation including nocturnal cornstarch.\nDoctor: I see. Can you tell me more about your diet?\nPatient: Yes, my fat intake from food was continually restricted to approximately 40 g\/d, and my total fat intake was supplemented with MCT Oil and essential fatty acids in the form of walnut oil.\nDoctor: I see. And what about your exercise routine?\nPatient: I was advised to limit my exercise to 20 minutes per day and have a high-calorie drink prior to any physical activity.\nDoctor: I understand. Were there any definite results from the tests performed over the years?\nPatient: No, there was no definite etiology found from the extensive investigations that were performed. The serial urine organic acid profile and the acylcarnitine profiles were normal.\nDoctor: I see. I understand that this has been a long journey for you. We will do our best to help you.\nPatient: Thank you, doctor.","gold":"A 25-year-old female Irish patient presented with recurrent episodes of rhabdomyolysis since childhood. The first episode occurred at the age of 22 months following a respiratory tract infection. At that time, her creatine kinase (CK) serum concentration was noted to be 250 000 U\/L.\\nThe family history indicated that both parents were well. However, two siblings (brother and sister) had sudden death at the age of 2 and 4 years more than 20 years ago following a short infectious illness with sudden deterioration over a period of hours. In both cases, the children became progressively weak with severe muscle pain and had evidence of rhabdomyolysis and myoglobinuria of uncertain etiology. Skeletal muscle histology and electron microscopy studies at postmortem evaluation were normal in both children, cardiac evaluation demonstrated dilated cardiomyopathy. Notes pertaining to their clinical episodes were not available. On first presentation, our patient had no hypoglycemia and no ketosis during the acute illness, but was initially treated as a possible long chain fatty acid oxidation defect due to the family history.\\nThe patient had numerous subsequent admissions with similar presentations of extremely elevated CK concentration (1 000 000 U\/L at the age of 9) associated with muscle pain. This was despite aggressive carbohydrate supplementation including nocturnal cornstarch. Her fat intake from food was continually restricted to approximately 40 g\/d. Her total fat intake was supplemented with the use of MCT Oil (medium chain triglycerides supplement) and essential fatty acids in the form of walnut oil. Coenzyme Q10 at a dose of 100-200 mg daily was provided on an ongoing basis. The CK concentration was also raised between the episodes (500-2000 U\/L). She was advised to limit her exercise to 20 minutes per day and have high-calorie drink prior to any physical activity.\\nExtensive investigations were performed over the presenting years with the lack of a definite etiology. Serial urine organic acid profile and the acylcarnitine profiles were normal. A fibroblast fatty acid oxidation study showed normal myristate and palmitate oxidation studies in fibroblasts. Genetic analysis for fatty acid oxidation defect (LCHAD, MCAD, CPT I, and CPT II) and McArdle disease was uninformative.\\nAt the age of 16 years, mitochondrial respiratory chain activity measured in a muscle biopsy was normal, however morphological findings, such as intramyocellular lipid, were compatible with lipin-1 deficiency (Figure ). Her echocardiogram and electrocardiogram did not show any abnormal findings.\\nAt the age of 19 years, DNA sequence analysis of the LPIN1 gene (all coding exons and flanking intron boundaries corresponding to the canonical transcript variant NM_145693.3) revealed the presence of a common pathogenic intragenic deletion within the LPIN1 gene (c.2295-866_2410-30del1763) encompassing exon 18 (HGMD accession: CG085181). However, a second mutation could not be identified.\\nSubsequently, the LPIN1 coding region was analyzed by reverse-transcriptase PCR (RT-PCR) from total RNA isolated from muscle tissue and conventional DNA sequencing (Figure A,B). In addition to transcripts lacking exon 18 or exons 18-19 (corresponding to the allele harboring the genomic exon 18 deletion), an alternative exon spliced in between exon 5 and exon 6 was detected in a significant proportion of transcripts (Figure C,D). Because this alternative exon, named exon 5a, corresponded to an alternatively spliced in-frame exon annotated only in an N-terminal LPIN1 transcript variant (NM_001261428.2), further targeted DNA-based sequencing was performed. Indeed, this identified a second heterozygous variant (NC_000002.11:g.11916284C>A), which was formally regarded a nonsense mutation introducing a premature stop-codon within exon 5a (which would correspond to NM_001261428.2:c.942C>A, NP_001248357.1:p.[Cys314*]; Figure ). RT-PCR of total RNA isolated from healthy skeletal muscle detected exon 5a also in transcripts containing the first (noncoding) exon of the canonical isoform (data not shown). However, because exon 5a is currently not annotated to be contained in this main reference transcript (NM_145693.3), further studies will be needed to clearly establish pathogenicity of the novel variant.\\nSegregation analysis in the patient's family revealed that her father and younger sister are heterozygous for the common pathogenic intragenic deletion (NM_145693.3:c.2295-866_2410-30del1763), while her mother was a heterozygous carrier of the variant within the alternative exon. Thus, the results of these molecular genetic analyses were formally consistent with the clinical diagnosis of LPIN1-associated rhabdomyolysis, due to compound heterozygosity for a known pathogenic deletion and potentially pathogenic novel mutation.\\nAt the age of 25 years, the patient presented with acute muscle pain and weakness following prolonged fasting and strenuous exercise. A rhabdomyolysis crisis was confirmed with a CK of 500 000 U\/L. She was admitted to intensive care unit (ICU) for a 2 week period over which period she lost a significant amount of weight. During her critical illness, she received an intravenous infusion of 10% dextrose at 1.5 times maintenance with added electrolytes, sodium bicarbonate, morphine, and dexamethasone. Intravenous carnitine was also provided as the patient had previously observed this to be clinically beneficial. She was treated symptomatically, with shortened fasting periods, corn-starch at night and an \u201cunwell dietary regimes\u201d with a mild restriction of fat (40 g per day) and supplementation with MCT oil and walnut oil for essential fatty acids and Liquigen 5 g daily. On day 11, her CK was monitored 4 hourly and was 1248 U\/L. On discharge from ICU, this patient had generalized muscle weakness, stiff lower limb muscles (gastrocnemius), and bilateral drop foot requiring orthotic splints. Muscle weakness gradually improved after months of rehabilitative physiotherapy. Her drop foot has improved somewhat (power from 0\/5 to 3\/5 for dorsiflexion and extensor hallicis longus over 1 year) with areas of altered sensation anterolaterally below the knees consistent with bilateral residual common peroneal neuropathies (CPN). Serial nerve conduction\/electromyography studies demonstrated bilateral CPN palsies and a background generalized myopathy. The most likely cause of this patient's weakness was a critical care neuromyopathy, which has improved with time, in addition to CPN palsies related to significant weight loss while critically ill. However, we cannot exclude the possibility that both defects were related to her lipin-1 deficiency. While myopathy has been reported in a few cases, CPN damage is not a known association of this rare metabolic disorder."}'''
print(len(text_example3.split()))

1422


In [93]:
text_example4 = '''Given the provided'''
print(len(text_example4))

18


In [102]:
result = tokenizer.encode(text_example3, return_tensors='pt')
result.shape

torch.Size([1, 2597])

In [57]:
model.eval()

input_ids = tokenizer.encode(prompt, return_tensors='pt')
outputs = model(input_ids)
next_token_logits = outputs.logits[:, -1, :]

In [52]:
next_token_logits.shape

torch.Size([1, 32019])

In [34]:
outputs.keys()

odict_keys(['logits', 'past_key_values'])

In [53]:
outputs['past_key_values'][0][0].shape

torch.Size([1, 32, 947, 128])

In [58]:
top_k_values, top_k_indices = torch.topk(next_token_logits, k=10)
print("Top 10 Values:", top_k_values)
print("Top 10 Indices:", top_k_indices)

Top 10 Values: tensor([[16.2903, 14.9668, 14.4440, 14.1974, 13.9825, 13.4229, 12.7937, 12.6502,
         12.5661, 12.2277]], grad_fn=<TopkBackward0>)
Top 10 Indices: tensor([[ 4868,  5224,  6788,   322, 29892,  1274,  8327,   633,  1661,  9126]])
