In [2]:
from sagemaker.jumpstart.estimator import JumpStartEstimator

training_job_name = "jumpstart-dft-huggingface-llm-gemma-20240728-235710"
model_id = "huggingface-llm-gemma-7b-instruct"

attached_estimator = JumpStartEstimator.attach(training_job_name, model_id)


sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /root/.config/sagemaker/config.yaml


Using model 'huggingface-llm-gemma-7b-instruct' with wildcard version identifier '*'. You can pin to version '1.2.0' for more stable results. Note that models may have different input/output signatures after a major version upgrade.



2024-07-29 01:07:36 Starting - Preparing the instances for training
2024-07-29 01:07:36 Downloading - Downloading the training image
2024-07-29 01:07:36 Training - Training image download completed. Training in progress.
2024-07-29 01:07:36 Uploading - Uploading generated training model
2024-07-29 01:07:36 Completed - Training job completed


In [3]:
instance_type = "ml.g5.12xlarge"

fine_tuned_model = attached_estimator.deploy(instance_type=instance_type)

----------!

In [13]:
import sacrebleu
from rouge import Rouge
import pandas as pd
import re
import json

def extract_referral_content(text):
    # 使用正则表达式匹配倒数第二个和最后一个双引号之间的内容
    pattern = r'"([^"]*)"\s*[^"]*$'
    match = re.search(pattern, text)
    # print(f"Extracting referral_content from text:\n{text}")  # 调试信息
    if match:
        return match.group(1)
    else:
        return "extract failure"
    
    
def extract_is_papilledema(prediction_text):
    try:
        # 使用正则表达式从生成的文本中提取最后一个 is_Papilledema 的值，不管中间有什么符号
        is_papilledema_matches = list(re.finditer(r'is_Papilledema.*?(True|False)', prediction_text, re.IGNORECASE))
        if is_papilledema_matches:
            last_match = is_papilledema_matches[-1]
            return "True" if last_match.group(1).lower() == 'true' else "False"
        else:
            return "extract failure"
    except Exception as err:
        return "extract failure"



    
def evaluate_testjsonl_with_gemma(reference_path, csv_file_path):
    
    test_data_json = []
    
    with open(reference_path, 'r', encoding='utf-8') as f:
        for line in f:
              test_data_json.append(json.loads(line.strip()))
    
    rouge_score_list = []
    bleu_score_list = []
              
    rouge = Rouge()
              
    prediction_list = []
              
    for single_test in test_data_json:
        instruction = single_test["instruction"]
        whole_letter = single_test["whole_letter"]
        referral_content = single_test["referral_content"]
        prompt = f"{instruction}\n\n###\n\n{whole_letter}\n\n###"
        response = fine_tuned_model.predict({'inputs': prompt, 'parameters': {'max_new_tokens': 256}})
        # response = origin_model.predict({prompt})

        reference_text = referral_content
        # print(response[0])
        try:
            # print("response: " + response)
            # response = reponse[0] if isinstance(response, list) else response
            # print("response: " + response)
            # prediction_dict = json.loads(response["generated_text"].strip())
            prediction_text = response[0]["generated_text"].strip()
            # print(prediction_text)
            
            extracted_referral_content = extract_referral_content(prediction_text)
            # print(extracted_referral_content)
            
            extracted_is_papilledema = extract_is_papilledema(prediction_text)
            # print(extracted_is_papilledema)
            
            
            single_test["predict_referral_content"] = extracted_referral_content
            single_test["predict_is_papilledema"] = extracted_is_papilledema
           
          
        except Exception as err:
            prediction_text = "extract failure"
        finally:
            # prediction_list.append(extracted_referral_content)
            print("is_papilledema_prediction: " + extracted_is_papilledema + "\n")
            print("referral_content_prediction: " + extracted_referral_content + "\n")
            print("ground_truth: " + reference_text)
            print("=============================")

        bleu_score = sacrebleu.corpus_bleu([extracted_referral_content], [[reference_text]])
        bleu_score_list.append(bleu_score.score)
        single_test["bleu"] = bleu_score.score

        rouge_score = rouge.get_scores(extracted_referral_content, reference_text)
        rouge_score_list.append(rouge_score)
        
        
        
    # 创建 CSV 文件
    csv_data = []

    for single_test in test_data_json:
        csv_data.append({
            # "id": single_test["id"],
            # "name": single_test["name"],
            "instruction": single_test["instruction"],
            "whole_letter": single_test["whole_letter"],
            "referral_content": single_test["referral_content"],
            "predict_referral_content": single_test["predict_referral_content"],
            "bleu": single_test["bleu"],
            "is_papilledema": single_test["predict_is_papilledema"],
        })

    # 创建 DataFrame
    df = pd.DataFrame(csv_data)

    # 保存为 CSV 文件
    df.to_csv(csv_file_path, index=False, encoding='utf-8')

    print(f"CSV file has been saved to {csv_file_path}")
        
    return bleu_score_list
              
    

In [14]:
bleu_score_list = evaluate_testjsonl_with_gemma("../test.jsonl", "./gemma_trainModel_result.csv")

is_papilledema_prediction: False

referral_content_prediction: The reason for this referral is to further evaluate and manage Ms. Jane Doe's progressively worsening cardiopulmonary symptoms. Given her clinical presentation, further investigation is essential to rule out possible cardiac etiologies, including congestive heart failure or valvular disease.

ground_truth: The reason for this referral is to further evaluate and manage Ms. Jane Doe's progressively worsening cardiopulmonary symptoms. Given her clinical presentation, further investigation is essential to rule out possible cardiac etiologies, including congestive heart failure or valvular disease.
is_papilledema_prediction: False

referral_content_prediction: Mr. Davis has presented with significant neck pain and dropped head syndrome without any neurological abnormalities. Despite physical therapy, including cervical spine traction and extensor muscle training, there has been no improvement.

ground_truth: Mr. Davis has presen

In [16]:
def analyze_predict_data(bleu_score_list):
    # 统计大于100的个数
    count_gt_100 = sum(1 for score in bleu_score_list if score >= 100)

    # 统计大于70的个数
    count_gt_70 = sum(1 for score in bleu_score_list if score > 70)

    prob_gt_100 = count_gt_100 / len(bleu_score_list)
    prob_gt_70 = count_gt_70 / len(bleu_score_list)
    average_score = sum(bleu_score_list) / float(len(bleu_score_list))

    print(f"分数大于100的个数：{count_gt_100}, 占所有数据的百分比为： {prob_gt_100}")
    print(f"分数大于70的个数：{count_gt_70}, 占所有数据的百分比为： {prob_gt_70}")
    print(f"bleu平均分数: {average_score}")

In [None]:
analyze_predict_data(bleu_score_list)

In [4]:
import sacrebleu
from rouge import Rouge
import pandas as pd
import re
import json

# def extract_content(response_text):
#     extract_array = response_text.split("###")
    
#     return extract_array[3]



    
def print_testjsonl_with_gemma(reference_path):
    
    test_data_json = []
    
    with open(reference_path, 'r', encoding='utf-8') as f:
        for line in f:
              test_data_json.append(json.loads(line.strip()))
    
    rouge_score_list = []
    bleu_score_list = []
              
    rouge = Rouge()
              
    prediction_list = []
              
    for single_test in test_data_json:
        instruction = single_test["instruction"]
        whole_letter = single_test["whole_letter"]
        referral_content = single_test["referral_content"]
        prompt = f"{instruction}\n\n###\n\n{whole_letter}\n\n###"
        response = fine_tuned_model.predict({'inputs': prompt, 'parameters': {'max_new_tokens': 256}})
        # response = origin_model.predict({prompt})

        reference_text = referral_content
        # print(response[0])
        try:
            # print("response: " + response)
            # response = reponse[0] if isinstance(response, list) else response
            # print("response: " + response)
            # prediction_dict = json.loads(response["generated_text"].strip())
            prediction_text = response[0]["generated_text"].strip()
            print(prediction_text + "\n")
            
            # extracted_referral_content = extract_content(prediction_text)
            # print(extracted_referral_content)
            
            # single_test["predict_referral_content"] = extracted_referral_content
           
          
        except Exception as err:
            prediction_text = "extract failure"
        finally:
            # prediction_list.append(extracted_referral_content)
            # print("prediction: " + extracted_referral_content + "\n")
            print("ground_truth: " + reference_text)
            print("=============================")

#         bleu_score = sacrebleu.corpus_bleu([extracted_referral_content], [[reference_text]])
#         bleu_score_list.append(bleu_score.score)
#         single_test["bleu"] = bleu_score.score

#         rouge_score = rouge.get_scores(extracted_referral_content, reference_text)
#         rouge_score_list.append(rouge_score)
        
        
        
#     # 创建 CSV 文件
#     csv_data = []

#     for single_test in test_data_json:
#         csv_data.append({
#             "id": single_test["id"],
#             "name": single_test["name"],
#             "instruction": single_test["instruction"],
#             "whole_letter": single_test["whole_letter"],
#             "referral_content": single_test["referral_content"],
#             "predict_referral_content": single_test["predict_referral_content"],
#             "bleu": single_test["bleu"],
#         })

#     # 创建 DataFrame
#     df = pd.DataFrame(csv_data)

#     # 保存为 CSV 文件
#     df.to_csv(csv_file_path, index=False, encoding='utf-8')

#     print(f"CSV file has been saved to {csv_file_path}")
        
#     return bleu_score_list
              
    

In [15]:
print_testjsonl_with_gemma("../test.jsonl")

Synonyms: ["Papilledema","Swollen discs","Indistinct margins","Blurred disc margins","Suspicious discs","Disc swelling","Optic nerve swelling"]

PseudoSynonyms: ["Pseudopapilledema","Drusen","Tilted disc","Anomalous discs"]


Role : You are a experienced doctor who have memory of electronic medical records related to many diseases.

Instruction : please extract the referral content from the following referral letter  separeted by ###. 

output your result directly in format: "is_Papilledema": boolean, "referral_content": "".

Rule For is_Papilledema : If the referral letter contains one of words in Synonyms, then is_Papilledema = true; If the letter contains words in PseudoSynonyms or doesn't contain words in Synonyms, then is_Papilledema = False.

Rule For referral_content : this content should be a whole paragraph which tells Patient need referral. If the referral_letter contains this content, you should include it. If the letter doesn't contain related information, then it should be

KeyboardInterrupt: 

In [None]:
# def analyze_predict_data(bleu_score_list):
#     # 统计大于100的个数
#     count_gt_100 = sum(1 for score in bleu_score_list if score >= 100)

#     # 统计大于70的个数
#     count_gt_70 = sum(1 for score in bleu_score_list if score > 70)

#     prob_gt_100 = count_gt_100 / len(bleu_score_list)
#     prob_gt_70 = count_gt_70 / len(bleu_score_list)
#     average_score = sum(bleu_score_list) / float(len(bleu_score_list))

#     print(f"分数大于100的个数：{count_gt_100}, 占所有数据的百分比为： {prob_gt_100}")
#     print(f"分数大于70的个数：{count_gt_70}, 占所有数据的百分比为： {prob_gt_70}")
#     print(f"bleu平均分数: {average_score}")

In [None]:
analyze_predict_data(bleu_score_list)

In [None]:
# import sacrebleu
# from rouge import Rouge


# import json

# def evaluate_testjsonl_with_gemma(path):
    
#     test_data_json = []
    
#     with open(path, 'r', encoding='utf-8') as f:
#         for line in f:
#               test_data_json.append(json.loads(line.strip()))
    
#     rouge_score_list = []
#     bleu_score_list = []
              
#     rouge = Rouge()
              
#     prediction_list = []
              
#     for single_test in test_data_json:
#         instruction = single_test["instruction"]
#         whole_letter = single_test["whole_letter"]
#         referral_content = single_test["referral_content"]
#         prompt = f"{instruction}\n\n###\n\n{whole_letter}\n\n###"
#         response = fine_tuned_model.predict({'inputs': prompt, 'parameters': {'max_new_tokens': 256}})
#         # response = fine_tuned_model.predict({prompt})

#         reference_text = referral_content
#         # print(response[0])
#         try:
#             # print("response: " + response)
#             # response = reponse[0] if isinstance(response, list) else response
#             # print("response: " + response)
#             # prediction_dict = json.loads(response["generated_text"].strip())
#             prediction_text = response[0]["generated_text"].strip()
#             # print(prediction_text)
#             # prediction_text = prediction_dict["referral_content"]
#         except Exception as err:
#             prediction_text = "extract failure"
#         finally:
#             prediction_list.append(prediction_text)
#             print("prediction: " + prediction_text)
#             print("ground_truth: " + reference_text)

#         bleu_score = sacrebleu.corpus_bleu([prediction_text], [[reference_text]])
#         bleu_score_list.append(bleu_score.score)

#         rouge_score = rouge.get_scores(prediction_text, reference_text)
#         rouge_score_list.append(rouge_score)
              
#     print(prediction_list)
#     print(rouge_score_list)
#     print(bleu_score_list)
    
    