# Environment setup

### Catch up the process here: https://colab.research.google.com/drive/1BsgOKowPKmHjhyxnuPh4O6X6LaeV8bkk?usp=sharing

In [None]:
!pip install -q -U transformers peft accelerate optimum evaluate
!pip install auto-gptq --extra-index-url https://huggingface.github.io/autogptq-index/whl/cu117/
!pip install langchain
!pip install einops

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m8.2/8.2 MB[0m [31m17.0 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m168.3/168.3 kB[0m [31m12.8 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m270.9/270.9 kB[0m [31m16.6 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m403.3/403.3 kB[0m [31m25.4 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m84.1/84.1 kB[0m [31m8.9 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m46.0/46.0 kB[0m [31m3.7 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m507.1/507.1 kB[0m [31m31.2 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m115.3/115.3 kB[0m [31m13.0 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━

In [None]:
!pip install rouge_score
!pip install bert_score

Collecting rouge_score
  Downloading rouge_score-0.1.2.tar.gz (17 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: rouge_score
  Building wheel for rouge_score (setup.py) ... [?25l[?25hdone
  Created wheel for rouge_score: filename=rouge_score-0.1.2-py3-none-any.whl size=24933 sha256=bc04e33b0894cebbd7bd8f834ef6d795f6fdaf135da4e559873b93100ca1d865
  Stored in directory: /root/.cache/pip/wheels/5f/dd/89/461065a73be61a532ff8599a28e9beef17985c9e9c31e541b4
Successfully built rouge_score
Installing collected packages: rouge_score
Successfully installed rouge_score-0.1.2


In [None]:
# !pip install huggingface-cli

# Environment settings

In [None]:
# !huggingface-cli login

## Import neccessary libraries

In [1]:
import time

import numpy as np
import math
from sklearn.metrics.pairwise import euclidean_distances, cosine_similarity

import torch
import transformers
from transformers import AutoTokenizer, AutoModelForCausalLM

from langchain import LLMChain, HuggingFacePipeline, PromptTemplate

## Configurations

In [11]:
config={
    "model_id": ["TheBloke/Llama-2-7b-Chat-GPTQ",
                 "TheBloke/Llama-2-7B-AWQ",
                 "TheBloke/Llama-2-7B-GGUF",
                 "TheBloke/Llama-2-7B-GGML",
                 "TheBloke/Llama-2-7B-fp16",
                 "TheBloke/Llama-2-7B-GPTQ",
                 "TheBloke/llama-2-7B-Guanaco-QLoRA-AWQ",
                 "TheBloke/Llama-2-7B-AWQ"],
    "hf_token": "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",
    "model": {
        "temperature": 0.7, # [0, 0.7, .0.9, 1.1, 1.3]  Testing iteratively.
        "max_length": 3000,
        "top_k": 10,
        "num_return": 1
    },
    "dataset": [
        "billsum",
        "cnn_dailymail",
        "big_patent"
    ],
    "eval": ['rouge', "bertscore", 'meteor']
}


In [None]:
from huggingface_hub import snapshot_download

model_id = 'TheBloke/Llama-2-7b-Chat-GPTQ'
snapshot_download(repo_id=model_id, 
                  local_dir='./models/v2')

# Utils

In [5]:
def call_parameter(model):
  pytorch_total_params = sum(p.numel() for p in model.parameters())
  trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
  untrainable_params = pytorch_total_params - trainable_params
  print(f'Model {model.__class__.__name__} has {pytorch_total_params} parameters in total\n'\
        f'Trainable parameters: {trainable_params}\nUntrainable parameters: {untrainable_params}')
  return pytorch_total_params

# Dataset

In [None]:
from datasets import load_dataset

billsum = load_dataset("billsum", split="ca_test")

In [None]:
billsum = billsum.train_test_split(test_size=0.2)

In [None]:
billsum['test']

Dataset({
    features: ['text', 'summary', 'title'],
    num_rows: 248
})

# Model architecture

In [None]:
def generate_model(model_id, config):
  print(f"Setting up model {model_id}")
  model = AutoModelForCausalLM.from_pretrained(model_id, token=config['hf_token'], use_safetensors=True,
                            device_map='auto', trust_remote_code=True)
  tokenizer = AutoTokenizer.from_pretrained(model_id, token=config['hf_token'],
                                            device_map='auto', trust_remote_code=True)
  return model, tokenizer

In [15]:
class Agent:
  def __init__(self, model, tokenizer):
    self.model = model
    self.tokenizer = tokenizer

  def __repr__(self):
    return f'Model {self.model.__class__.__name__}'

In [12]:
model_id = "./models/v2/Llama-2-7b-Chat-GPTQ/"
model = AutoModelForCausalLM.from_pretrained(model_id, use_safetensors=True, device_map='cuda', local_files_only=True)
tokenizer = AutoTokenizer.from_pretrained(model_id, device_map='cuda', local_files_only=True)
no_params = call_parameter(model)
print("====="*5)
print(f"Model {config['model_id'][0]} has {no_params} parameters.")

Model LlamaForCausalLM has 262410240 parameters in total
Trainable parameters: 262410240
Untrainable parameters: 0
Model TheBloke/Llama-2-7b-Chat-GPTQ has 262410240 parameters.


# Experiments

## Implementation

In [13]:
class Generator:
  def __init__(self, config, agent, template):
    self.agent = agent
    pipeline = transformers.pipeline(
        "text-generation",
        model=self.agent.model,
        tokenizer=self.agent.tokenizer,
        torch_dtype=torch.bfloat16,
        trust_remote_code=True,
        device_map="auto",
        max_length=config['model']['max_length'],
        do_sample=True,
        top_k=config['model']['top_k'],
        num_return_sequences=config['model']['num_return'],
        pad_token_id=tokenizer.eos_token_id
    )
    llm = HuggingFacePipeline(pipeline=pipeline, model_kwargs={'temperature': config['model']['temperature']})
    prompt = PromptTemplate(template=template, input_variables=["text"])
    self.llm_chain = LLMChain(prompt=prompt, llm=llm)

  def generate(self, text):
    result = self.llm_chain.invoke(text)
    return result

In [14]:
template = """
              Write a summary of the following text delimited by triple backticks.
              Return your response which covers the key points of the text.
              ```{text}```
              SUMMARY:
           """

In [16]:
agent = Agent(model, tokenizer)
llm_agent = Generator(config, agent, template)

In [17]:
doc = """
Patient Name: John Doe
Date of Birth: January 15, 1975
Gender: Male
Medical Record Number: 123456789
Date of Assessment: August 18, 2023

I. Chief Complaint:
The patient presents with complaints of persistent fatigue, unexplained weight loss, and intermittent abdominal pain over the past few months. He reports a gradual decrease in appetite and occasional nausea. The patient is seeking medical evaluation to determine the underlying cause of his symptoms.

II. Medical History:
The patient has a history of hypertension managed with medication for the past five years. He underwent an appendectomy in his late twenties and had a hernia repair surgery a decade ago. The patient reports a family history of diabetes on his maternal side.

III. Review of Systems:

General: The patient reports fatigue, unexplained weight loss of approximately 10 pounds over three months, and a decreased appetite.

Gastrointestinal: The patient experiences intermittent abdominal pain, predominantly in the right upper quadrant, without a clear trigger. He reports occasional nausea, and denies vomiting, diarrhea, or changes in bowel habits.

Cardiovascular: The patient's blood pressure has been well controlled with medication. He denies chest pain, palpitations, or shortness of breath.

Respiratory: The patient denies cough, wheezing, or shortness of breath.

Musculoskeletal: No significant joint pain or limitations in mobility reported.

Neurological: The patient denies headaches, dizziness, or changes in vision.

Psychological: The patient mentions occasional stress due to work-related factors but denies symptoms of depression or anxiety.

IV. Physical Examination:

Vital Signs: Blood pressure is 130/80 mmHg, heart rate is 78 beats per minute, respiratory rate is 16 breaths per minute, and temperature is 98.6°F (37°C).

General: The patient appears fatigued but alert and oriented to person, place, and time. He appears to have lost weight since his last visit.

Abdominal Examination: There is tenderness on palpation in the right upper quadrant of the abdomen. No palpable masses or organomegaly noted. Bowel sounds are normal.

Cardiovascular Examination: Regular rate and rhythm with no murmurs or abnormal sounds.

Respiratory Examination: Clear breath sounds bilaterally, no wheezing or crackles noted.

Neurological Examination: No focal neurological deficits observed.

V. Diagnostic Investigations:

Complete Blood Count (CBC): Within normal limits.
Comprehensive Metabolic Panel (CMP): Slight decrease in albumin levels.
Liver Function Tests (LFTs): Mild elevation in liver enzymes (AST and ALT).
Abdominal Ultrasound: No evidence of gallstones or other abnormalities. Liver appears slightly enlarged.
CT Scan of the Abdomen: Reveals a mass in the liver, approximately 5 cm in diameter. The mass appears to be enhancing on contrast.
VI. Assessment and Plan:

Based on the patient's symptoms, physical examination, and diagnostic investigations, the following assessment and plan have been formulated:

Assessment:

Unexplained weight loss and fatigue.
Right upper quadrant abdominal pain.
Elevated liver enzymes and an enlarging liver mass.
Plan:

Further Evaluation: Given the presence of an enlarging liver mass and elevated liver enzymes, the patient will be referred to a gastroenterologist for further evaluation, including possible liver biopsy.
Oncology Consultation: Given the possibility of malignancy, an oncology consultation will be sought to determine the nature of the liver mass and develop an appropriate treatment plan.
Symptom Management: The patient's abdominal pain will be managed with pain relievers as needed. Anti-nausea medication will be prescribed to alleviate nausea symptoms.
Nutritional Support: The patient's decreased appetite and weight loss will be addressed with dietary counseling and nutritional support to ensure adequate calorie intake.
Psychological Support: Given the patient's stress and anxiety related to his symptoms, psychological support and counseling will be offered to address emotional well-being.
VII. Follow-Up:

The patient is scheduled for a follow-up appointment in two weeks to discuss the results of further investigations, consultations, and to address any ongoing symptoms. The patient is advised to monitor his symptoms and report any significant changes or new symptoms before the next visit.

VIII. Prognosis and Discussion:

The presence of an enlarging liver mass raises concerns about potential malignancy. Further diagnostic testing and consultations with specialists are essential to determine the nature of the mass and develop an appropriate treatment plan. The patient's overall prognosis will be influenced by the nature of the liver mass and the success of treatment interventions. Close collaboration between the patient, medical team, and specialists will guide the management of his condition and optimize his chances of recovery.

IX. Patient Education:

The patient and his family will receive comprehensive education about the diagnostic process, potential treatment options, and the importance of adhering to medical recommendations. Emphasis will be placed on the need for regular follow-up appointments and open communication with the medical team to ensure the best possible outcomes.

X. Conclusion:

This patient report outlines the comprehensive health assessment of John Doe, a male patient presenting with unexplained weight loss, fatigue, and abdominal pain. The assessment encompasses a thorough medical history, review of systems, physical examination findings, diagnostic investigations, assessment, and a detailed plan for further evaluation and management. The collaboration between healthcare professionals and specialists will be crucial in guiding the patient's journey toward an accurate diagnosis, effective treatment, and optimal health outcomes.
"""

In [18]:
generated_sample = llm_agent.generate(doc)
generated_sample



KeyboardInterrupt: 

# Reference
+ https://colab.research.google.com/drive/1_TIrmuKOFhuRRiTWN94iLKUFu6ZX4ceb?usp=sharing#scrollTo=pDU1jZvWp7s5
+ https://huggingface.co/spaces/evaluate-metric/rouge
+ https://huggingface.co/spaces/evaluate-metric/bertscore
+ https://huggingface.co/spaces/evaluate-metric/meteor