In [1]:
%pip install --upgrade --quiet sagemaker

Note: you may need to restart the kernel to use updated packages.


# 用微调好的模型去部署

In [3]:
from sagemaker.jumpstart.estimator import JumpStartEstimator
training_job_name = "jumpstart-lasson-llama2-0729-epoch-6-1"
model_id = "meta-textgeneration-llama-2-7b-f"

model = JumpStartEstimator.attach(training_job_name, model_id)



2024-07-29 16:52:11 Starting - Preparing the instances for training
2024-07-29 16:52:11 Downloading - Downloading the training image
2024-07-29 16:52:11 Training - Training image download completed. Training in progress.
2024-07-29 16:52:11 Uploading - Uploading generated training model
2024-07-29 16:52:11 Completed - Training job completed


In [4]:
instance_type="ml.g5.2xlarge"
predictor = model.deploy(instance_type=instance_type)

-----------!

In [5]:
!pip install nltk sacrebleu



In [33]:
with open("./letter1.txt", "r", encoding="utf-8") as f:
    letter = f.read()
# print(letter)

response = predictor.predict({'inputs': letter,
                             'parameters': {'max_new_tokens': 128}})

print("Output:\n", response[0]["generated_text"].strip(), end="\n\n\n")

Output:
 Synonyms: ["Papilledema","Swollen discs","Indistinct margins","Blurred disc margins","Suspicious discs","Disc swelling","Optic nerve swelling"]

PseudoSynonyms: ["Pseudopapilledema","Drusen","Tilted disc","Anomalous discs"]


Role : You are a experienced doctor who have memory of electronic medical records related to many diseases.

Instruction : please extract the referral content from the following referral letter  separeted by ###. 

output your result directly in format: "is_Papilledema": boolean, "referral_content": "".

Rule For is_Papilledema : If the referral letter contains one of words in Synonyms, then is_Papilledema = true; If the letter contains words in PseudoSynonyms or doesn't contain words in Synonyms, then is_Papilledema = False.

Rule For referral_content : this content should be a whole paragraph which tells Patient need referral. If the referral_letter contains this content, you should include it. If the letter doesn't contain related information, then it 

In [41]:
import json
import pandas as pd
from sagemaker.jumpstart.estimator import JumpStartEstimator
import sacrebleu
import re
import logging
from concurrent.futures import ThreadPoolExecutor

logging.basicConfig(level=logging.INFO)

# Function to replace output prefix
def replace_output_prefix(input_str):
    match = re.search(r'Response:\s*(.*)', input_str, re.IGNORECASE)
    if match:
        return match.group(1).strip()
    return input_str

# Function to extract referral content
def extract_referral_content(tmp_str):
    patterns = [
        r'"referral_content"\s*:\s*"(.*?)"',
        r'\bThe reason for this referral is\b\s*(.*?)(?<!\s)\.',
        r'\bThe referral is being made because\b\s*(.*?)(?<!\s)\.',
        r'\bReferral Reason\b\s*:\s*(.*?)(?<!\s)\.',
        r'\bThe reason for referral is\b\s*(.*?)(?<!\s)\.',
        r'\bfor referral is\b\s*(.*?)(?<!\s)\.',
        r'\bneed referral because\b\s*(.*?)(?<!\s)\.',
        r'\bThis referral is made to\b\s*(.*?)(?<!\s)\.',
        r'\bWe are referring this patient because\b\s*(.*?)(?<!\s)\.',
        r'\bThe purpose of this referral is\b\s*(.*?)(?<!\s)\.',
        r'\bOur reason for referral is\b\s*(.*?)(?<!\s)\.',
        r'\bDue to\b\s*(.*?)(?<!\s)\.',
        r'\bbecause of\b\s*(.*?)(?<!\s)\.',
        r'\bAs a result of\b\s*(.*?)(?<!\s)\.',
        r'\bThe intent of this referral is\b\s*(.*?)(?<!\s)\.',
    ]
    
    for pattern in patterns:
        match = re.search(pattern, tmp_str)
        if match:
            return match.group(1).strip()

    sentences = tmp_str.split('. ')
    for sentence in sentences:
        if 'referral' in sentence.lower():
            return sentence.strip()
    
    return "extract failure"

# Function to clean extracted text
def clean_extracted_text(text):
    text = text.replace('\n', ' ').strip()
    text = re.sub(r'\s+', ' ', text)
    return text

# Function to process a single test data entry
def process_single_test(single_test, predictor):
    instruction = single_test.get("instruction", "")
    whole_letter = single_test.get("whole_letter", "")
    referral_content = single_test.get("referral_content", "")
    
    # Improved prompt with more structure and clear delimiters
    prompt = (
        f"{instruction}\n\n###\n\n{whole_letter}\n\n###"
    )
    
    response = predictor.predict({'inputs': prompt, 'parameters': {'max_new_tokens': 512}})
    
    reference_text = referral_content if referral_content is not None else ""
    candidate_text = "extract failure"
    try:
        response_text = response[0]["generated_text"].strip()
       # logging.info(f"Raw response: {response_text}")
        tmp_str = replace_output_prefix(response_text)
        #logging.info(f"Processed response: {tmp_str}")
        candidate_text = extract_referral_content(tmp_str)
        #logging.info(f"Extracted referral content: {candidate_text}")
        candidate_text = clean_extracted_text(candidate_text)
    except Exception as err:
        #logging.error(f"Error processing ID: {single_test.get('id', 'unknown')}")
        #logging.error(f"Response: {response[0]['generated_text'].strip() if response else 'No response'}")
        logging.error(err)
    finally:
        bleu = sacrebleu.corpus_bleu([candidate_text], [[reference_text]])
        single_test["bleu"] = bleu.score
        single_test["predict_referral_content"] = candidate_text
        return single_test, bleu.score

# Evaluation function with optimized processing
def evaluate_jsonl_with_llama2(predictor, path, csv_file_path):
    test_data_json = []
    with open(path, 'r', encoding='utf-8') as f:
        for line in f:
            test_data_json.append(json.loads(line.strip()))
    
    # Use ThreadPoolExecutor for parallel processing
    with ThreadPoolExecutor(max_workers=10) as executor:
        results = list(executor.map(lambda x: process_single_test(x, predictor), test_data_json))
    
    # Separate the results and BLEU scores
    processed_data, bleu_scores = zip(*results)
    
    # Create and save CSV file
    df = pd.DataFrame(processed_data)
    df.to_csv(csv_file_path, index=False, encoding='utf-8')
    logging.info(f"CSV file has been saved to {csv_file_path}")
    
    return processed_data, list(bleu_scores)



In [42]:
test_evaluate_list, test_bleu_score_list = evaluate_jsonl_with_llama2(predictor, 
                                        "test_data_735/test.jsonl", "lewis-deploy_llama2_7b_f_test.csv")

INFO:root:CSV file has been saved to lewis-deploy_llama2_7b_f_test.csv


In [22]:
def analyze_predict_data(bleu_score_list):
    # bleu score >= 100
    count_gt_100 = sum(1 for score in bleu_score_list if score >= 100)

    # bleu score >=70
    count_gt_70 = sum(1 for score in bleu_score_list if score > 70)

    prob_gt_100 = count_gt_100 / len(bleu_score_list)
    prob_gt_70 = count_gt_70 / len(bleu_score_list)
    average_score = sum(bleu_score_list) / float(len(bleu_score_list))

    print(f"Count of bleu score >=100：{count_gt_100}, % of bleu scores >=100 ： {prob_gt_100}")
    print(f"Count of bleu score >=70：{count_gt_70}, % of bleu scores >=70： {prob_gt_70}")
    print(f"Average bleu score: {average_score}")

In [43]:
analyze_predict_data(test_bleu_score_list)

Count of bleu score >=100：125, % of bleu scores >=100 ： 0.8928571428571429
Count of bleu score >=70：131, % of bleu scores >=70： 0.9357142857142857
Average bleu score: 94.21227555894221


In [20]:
import sacrebleu

import json
import pandas as pd


def evaluate_jsonl_with_llama2(predictor, path, output_jsonl_path, csv_file_path):
    test_data_json = []
    with open(path, 'r', encoding='utf-8') as f:
        for line in f:
            test_data_json.append(json.loads(line.strip()))
    bleu_score_list = []

    evaluate_list = []

    for single_test in test_data_json:
        instruction = single_test["instruction"]
        whole_letter = single_test["whole_letter"]
        referral_content = single_test["referral_content"]
        prompt = f"{instruction}\n\n###\n\n{whole_letter}\n\n###"
        response = predictor.predict({'inputs': prompt,
                                 'parameters': {'max_new_tokens': 256}})
        # print(prompt)
        reference_text = referral_content
        try:
            tmp = json.loads(response[0]["generated_text"].strip())
            candidate_text = tmp["referral_content"]
        except Exception as err:
            print(single_test["id"])
            print(response[0]["generated_text"].strip())
            print()
            candidate_text = "extract failure"
        finally:

            evaluate_list.append(candidate_text)
            # print("predict: " + candidate_text)
            # print("real: " + reference_text)


            bleu = sacrebleu.corpus_bleu([candidate_text], [[reference_text]])
            bleu_score_list.append(bleu.score)
            # print(bleu.score)
            single_test["bleu"] = bleu.score
            single_test["predict_referral_content"] = candidate_text

            
#     with open(output_jsonl_path, mode='w', encoding='utf-8') as f:
#         for single_test in test_data_json:
#             f.write(json.dumps(single_test, ensure_ascii=False) + '\n')

#     print(f"predicted data has been saved to {output_path}.")
    
    # 创建 CSV 文件
    csv_data = []

    for single_test in test_data_json:
        csv_data.append({
            "id": single_test["id"],
            "name": single_test["name"],
            "instruction": single_test["instruction"],
            "whole_letter": single_test["whole_letter"],
            "referral_content": single_test["referral_content"],
            "predict_referral_content": single_test["predict_referral_content"],
            "bleu": single_test["bleu"],
        })

    # 创建 DataFrame
    df = pd.DataFrame(csv_data)

    # 保存为 CSV 文件
    df.to_csv(csv_file_path, index=False, encoding='utf-8')

    print(f"CSV file has been saved to {csv_file_path}")
    
    return evaluate_list, bleu_score_list

In [21]:
test_evaluate_list, test_bleu_score_list,test_rouge_score_list = evaluate_jsonl_with_llama2(predictor, "./test_dir/test.jsonl")

0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0


In [22]:
print(test_evaluate_list)

['extract failure', 'extract failure', 'extract failure', 'extract failure', 'extract failure', 'extract failure', 'extract failure', 'extract failure', 'extract failure', 'extract failure', 'extract failure', 'extract failure', 'extract failure', 'extract failure', 'extract failure', 'extract failure', 'extract failure', 'extract failure', 'extract failure', 'extract failure', 'extract failure']


In [35]:
predictor.delete_endpoint()

# 用原始llama2-7b-f去部署

In [33]:
from sagemaker.jumpstart.model import JumpStartModel

pretrain_model_id, pretrain_model_version = "meta-textgeneration-llama-2-7b-f", "3.*"

pretrain_model = JumpStartModel(model_id=pretrain_model_id, model_version=pretrain_model_version, instance_type="ml.g5.2xlarge" )

pretrain_predictor = pretrain_model.deploy(accept_eula=True)

ImportError: cannot import name 'HubContentType' from 'sagemaker.jumpstart.types' (/opt/conda/lib/python3.10/site-packages/sagemaker/jumpstart/types.py)