# 部署训练好的LLAMA3model

In [1]:
from sagemaker.jumpstart.estimator import JumpStartEstimator

training_job_name = "llama-3-mak-10-epochs"
model_id = "meta-textgeneration-llama-3-8b-instruct"

model = JumpStartEstimator.attach(training_job_name, model_id)
# attached_estimator.logs()


sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /home/sagemaker-user/.config/sagemaker/config.yaml


Using model 'meta-textgeneration-llama-3-8b-instruct' with wildcard version identifier '*'. You can pin to version '2.0.2' for more stable results. Note that models may have different input/output signatures after a major version upgrade.



2024-07-29 18:24:56 Starting - Preparing the instances for training
2024-07-29 18:24:56 Downloading - Downloading the training image
2024-07-29 18:24:56 Training - Training image download completed. Training in progress.
2024-07-29 18:24:56 Uploading - Uploading generated training model
2024-07-29 18:24:56 Completed - Training job completed


In [2]:
instance_type="ml.g5.2xlarge"
predictor = model.deploy(instance_type=instance_type)

----------------!

In [3]:
with open("letter.txt", "r", encoding="utf-8") as f:
    letter = f.read()
# print(letter)

response = predictor.predict({'inputs': letter,
                             'parameters': {'max_new_tokens': 128}})
response = response[0] if isinstance(response, list) else response

print("Output:\n", response["generated_text"].strip(), end="\n\n\n")

Output:
 ### Response:
"is_Papilledema": False, "referral_content": "Despite adherence to current treatment protocols and lifestyle modifications, Mr. Clark's symptoms have progressively worsened. His recent spirometry results indicate a significant decline in pulmonary function, suggestive of potential chronic obstructive pulmonary disease (COPD) exacerbation."




In [5]:
!pip install nltk sacrebleu
!pip install rouge

Collecting sacrebleu
  Using cached sacrebleu-2.4.2-py3-none-any.whl.metadata (58 kB)
Collecting portalocker (from sacrebleu)
  Using cached portalocker-2.10.1-py3-none-any.whl.metadata (8.5 kB)
Using cached sacrebleu-2.4.2-py3-none-any.whl (106 kB)
Using cached portalocker-2.10.1-py3-none-any.whl (18 kB)
Installing collected packages: portalocker, sacrebleu
Successfully installed portalocker-2.10.1 sacrebleu-2.4.2
Collecting rouge
  Using cached rouge-1.0.1-py3-none-any.whl.metadata (4.1 kB)
Using cached rouge-1.0.1-py3-none-any.whl (13 kB)
Installing collected packages: rouge
Successfully installed rouge-1.0.1


In [6]:
import sacrebleu

# 参考文本（字符串）
reference_text = "The patient is referred due to the rapid progression of the tumor despite initial treatment modalities. Further evaluation and management from a specialized neuro-oncology team are essential for this recurrent and aggressive tumor."

# 生成的文本（字符串）
candidate_text = "The patient is referred due to the rapid progression of the tumor despite initial treatment modalities. Further evaluation and management from a specialized neuro-oncology team are essential for this recurrent and aggressive tumor."

# 计算BLEU分数，调整n-gram权重
# 默认权重是(0.25, 0.25, 0.25, 0.25)，分别对应1-gram到4-gram
# 我们可以调整权重，例如只考虑1-gram和2-gram，权重分别为(0.5, 0.5)
# 设置n-gram权重
weights = (0.5, 0.5, 0, 0)

# 创建BLEU对象，并传入自定义的权重
weights = (0.5, 0.5, 0, 0)

bleu = sacrebleu.corpus_bleu([candidate_text], [[reference_text]])
print(f"BLEU score (): {bleu.score}")
print(bleu)

BLEU score (): 100.00000000000004
BLEU = 100.00 100.0/100.0/100.0/100.0 (BP = 1.000 ratio = 1.000 hyp_len = 35 ref_len = 35)


In [7]:
from rouge import Rouge

# 计算ROUGE分数
rouge = Rouge()
scores = rouge.get_scores(candidate_text, reference_text, avg=True) # 由于只有一个 reference，所以 avg没有影响
print(f"ROUGE scores: {scores}")

ROUGE scores: {'rouge-1': {'r': 1.0, 'p': 1.0, 'f': 0.999999995}, 'rouge-2': {'r': 1.0, 'p': 1.0, 'f': 0.999999995}, 'rouge-l': {'r': 1.0, 'p': 1.0, 'f': 0.999999995}}


In [19]:
import sacrebleu
from rouge import Rouge

import json
import pandas as pd

import re

def replace_output_prefix(input_str):
    # 使用正则表达式进行替换，忽略大小写
    result = re.sub(r'(?i)^Output:\s*', '', input_str)
    return result


def extract_braced_content(input_str):
    # 使用正则表达式提取大括号及其内的内容
    match = re.search(r'\{[^{}]*\}', input_str)
    if match:
        # 返回包含大括号及其内容的匹配部分
        return match.group(0)
    else:
        # 如果没有匹配到内容，返回空字符串或其他标记
        return ''


def extract_referral_content_original(data_str):
    # 使用正则表达式提取referral_content的内容
    match = re.search(r'"referral_content":\s*(null|".*?")', data_str)
    if match:
        content = match.group(1)
        if content == "null":
            return ""
        else:
            # 去掉引号
            return content.strip('"')
    return ''


def extract_referral_content(data_str):
    # 使用正则表达式提取referral_content的内容
    match = re.search(r'"referral_content":\s*"(.*?)"', data_str)
    if match:
        return match.group(1)
    return ''
    

def evaluate_jsonl_with_llama3(predictor, path, csv_file_path):
    test_data_json = []
    with open(path, 'r', encoding='utf-8') as f:
        for line in f:
            test_data_json.append(json.loads(line.strip()))
    rouge_score_list = []
    bleu_score_list = []

    rouge = Rouge()

    evaluate_list = []

    for single_test in test_data_json:
        instruction = single_test["instruction"]
        whole_letter = single_test["whole_letter"]
        referral_content = single_test["referral_content"]
        prompt = f"{instruction}\n\n###\n\n{whole_letter}\n\n###"
        response = predictor.predict({'inputs': prompt, 'parameters': {'max_new_tokens': 128}})
        # print(prompt)
        print("----------------------------------------------------------------")

        response = response[0] if isinstance(response, list) else response
        # print("Output:\n", response["generated_text"].strip(), end="\n\n\n")

        reference_text = referral_content
        try:
            tmp_str = replace_output_prefix(response["generated_text"].strip())
            # print(tmp_str)
            # tmp_str = extract_braced_content(tmp_str)
            # # print(tmp_str)
            tmp = extract_referral_content(tmp_str)
            # print(tmp)
            candidate_text = tmp
        except Exception as err:
            # print(single_test["id"])
            # print(response["generated_text"].strip())
            # print()
            candidate_text = "extract failure"
        finally:

            evaluate_list.append(candidate_text)
            print("predict: " + candidate_text)
            print("real: " + reference_text)


            bleu = sacrebleu.corpus_bleu([candidate_text], [[reference_text]])
            bleu_score_list.append(bleu.score)
            print(bleu.score)
            single_test["bleu"] = bleu.score
            single_test["predict_referral_content"] = candidate_text
            print()
            
            # 计算ROUGE分数
            # scores = rouge.get_scores(candidate_text, reference_text) # 由于只有一个 reference，所以 avg没有影响
            # rouge_score_list.append(scores)
            
#     with open(output_jsonl_path, mode='w', encoding='utf-8') as f:
#         for single_test in test_data_json:
#             f.write(json.dumps(single_test, ensure_ascii=False) + '\n')

#     print(f"predicted data has been saved to {output_path}.")
    
    # 创建 CSV 文件
    csv_data = []

    for single_test in test_data_json:
        csv_data.append({
            # "id": single_test["id"],
            # "name": single_test["name"],
            "instruction": single_test["instruction"],
            "whole_letter": single_test["whole_letter"],
            "referral_content": single_test["referral_content"],
            "predict_referral_content": single_test["predict_referral_content"],
            "bleu": single_test["bleu"],
        })

    # 创建 DataFrame
    df = pd.DataFrame(csv_data)

    # 保存为 CSV 文件
    df.to_csv(csv_file_path, index=False, encoding='utf-8')

    print(f"CSV file has been saved to {csv_file_path}")
    
    return evaluate_list, bleu_score_list, rouge_score_list

In [20]:
test_evaluate_list, test_bleu_score_list, test_rouge_score_list = evaluate_jsonl_with_llama3(predictor, "test_data_735/test.jsonl", "llama3_funetune_test.csv")

----------------------------------------------------------------
predict: The reason for this referral is to further evaluate and manage Ms. Jane Doe's progressively worsening cardiopulmonary symptoms. Given her clinical presentation, further investigation is essential to rule out possible cardiac etiologies, including congestive heart failure or valvular disease.
real: The reason for this referral is to further evaluate and manage Ms. Jane Doe's progressively worsening cardiopulmonary symptoms. Given her clinical presentation, further investigation is essential to rule out possible cardiac etiologies, including congestive heart failure or valvular disease.
100.00000000000004

----------------------------------------------------------------
predict: Mr. Davis has presented with significant neck pain and dropped head syndrome without any neurological abnormalities. Despite physical therapy, including cervical spine traction and extensor muscle training, there has been no improvement. Im

In [21]:
def analyze_predict_data(bleu_score_list):
    # 统计大于100的个数
    count_gt_100 = sum(1 for score in bleu_score_list if score >= 100)

    # 统计大于70的个数
    count_gt_70 = sum(1 for score in bleu_score_list if score > 70)

    prob_gt_100 = count_gt_100 / len(bleu_score_list)
    prob_gt_70 = count_gt_70 / len(bleu_score_list)
    average_score = sum(bleu_score_list) / float(len(bleu_score_list))

    print(f"分数大于100的个数：{count_gt_100}, 占所有数据的百分比为： {prob_gt_100}")
    print(f"分数大于70的个数：{count_gt_70}, 占所有数据的百分比为： {prob_gt_70}")
    print(f"bleu平均分数: {average_score}")

In [22]:
analyze_predict_data(test_bleu_score_list)

分数大于100的个数：119, 占所有数据的百分比为： 0.85
分数大于70的个数：123, 占所有数据的百分比为： 0.8785714285714286
bleu平均分数: 89.56097533162449
