In [None]:
# Mar 3rd: transformers 4.34.1 -> 4.36.0, flash_attn added
!pip install -q accelerate==0.21.0 peft==0.4.0 bitsandbytes==0.40.2 transformers==4.36.0 trl==0.4.7 flash_attn

In [None]:
!pip install --upgrade ipywidgets

In [None]:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, LlamaTokenizer, MixtralForCausalLM

model_id = "mistralai/Mixtral-8x7B-Instruct-v0.1"
tokenizer = LlamaTokenizer.from_pretrained(model_id)

model = MixtralForCausalLM.from_pretrained(
    model_id,
    load_in_4bit=True,
    torch_dtype=torch.bfloat16,
    device_map="auto"
    )

messages = [
    {"role": "user", "content": "What is your favourite condiment?"},
    {"role": "assistant", "content": "Well, I'm quite partial to a good squeeze of fresh lemon juice. It adds just the right amount of zesty flavour to whatever I'm cooking up in the kitchen!"},
    {"role": "user", "content": "Do you have mayonnaise recipes?"}
]

input_ids = tokenizer.apply_chat_template(messages, return_tensors="pt").to("cuda")

outputs = model.generate(input_ids, max_new_tokens=20)
print(tokenizer.decode(outputs[0], skip_special_tokens=True))

In [None]:
# (OPTIONAL)
# Read prompt data from excel file
import pandas

df = pandas.read_excel("起訴狀案例測試.xlsx")
test_prompt = df.loc[9, 'prompt-claude3-Opus']
print(test_prompt)

In [None]:
test_prompt = f"""請以中華民國律師的身分產生一份中華民國起訴書中請求「慰撫金」的內容，事件資料如下：
        原告: 陳○麗, 原告身分證明類型: 國民身份證, 原告證號:O778541223, 原告年齡: 22, 原告性別: 女, 原告現住地: 新北市中和區國凱街32之3號3樓, 原告電子郵件: a54@gmail.com,
        被告: 林O廉, 被告身分證明類型: 國民身份證, 被告證號:P224155693, 被告年齡: 30, 被告性別: 男, 被告現住地: 新北市板橋區龍泉街108巷9號2樓, 被告電子郵件: ac45@gmail.com,
        事件內容:
        醫療期間(天)：100
        日常生活影響：車禍致使原告腳部受傷，無法行走，生活難以自理
        身心健康影響：原告終日無神 經精神科鑑定為...
        家庭影響：事件導致被害人親屬離世
        離世被害人親屬：陳○○（父）
        事件經過描述：緣原告於民國（下同） 87年 8月 9日時遭被告駕駛車號QQQ-8888 汽車碰撞，致原告身體受傷及原告之親屬死亡。..."""
messages = [
    {"role": "user", "content": "你的中文能力如何?"},
    {"role": "assistant", "content": "我可以進行流暢的中文對話，請儘管發問!"},
    #{"role": "user", "content": "你知道一份法律起訴書是什麼樣的嗎？"},
    {"role": "user", "content": test_prompt}
]
input_ids = tokenizer.apply_chat_template(messages, return_tensors="pt").to("cuda")
outputs = model.generate(input_ids, max_new_tokens=500)
print(tokenizer.decode(outputs[0], skip_special_tokens=True))

In [None]:
# (OPTIONAL)
# Preprocess & use dataset from drive

import os
import json
import pandas as pd
from random import shuffle
from datasets import load_dataset

finetune_data = []

file_path = 'dataset_for_training/indictment_json/'
for file_name in [file for file in os.listdir(file_path) if file.endswith('.json')]:
    with open(os.path.join(file_path, file_name), "r") as json_file:
        data = json.load(json_file)
        finetune_data.append({"prompt":str(data),"response":"這是一份中華民國的起訴書"})

# Shuffle the combined data to ensure randomness
shuffle(finetune_data)

# Create a converted dataset with train_test split
# Define the split sizes (e.g., "80% train + 20% test" would be 0.8)
split_index = int(0.8 * len(finetune_data))

train_data = finetune_data[:split_index]
test_data = finetune_data[split_index:]

with open('train_dataset.jsonl', 'w') as train_file:
    for item in train_data:
        train_file.write(json.dumps(item) + '\n')

with open('test_dataset.jsonl', 'w') as test_file:
    for item in test_data:
        test_file.write(json.dumps(item) + '\n')

######

# Read dataset
train_dataset = load_dataset('json', data_files='train_dataset.jsonl', split="train")  # 從JSON文件中載入訓練數據集
valid_dataset = load_dataset('json', data_files='test_dataset.jsonl', split="train")  # 從JSON文件中載入驗證數據集

# preprocess dataset by text-pairing prompt and response
train_dataset = train_dataset.map(lambda examples: {'text': [prompt + response for prompt, response in zip(examples['prompt'], examples['response'])]}, batched=True)
valid_dataset = valid_dataset.map(lambda examples: {'text': [prompt + response for prompt, response in zip(examples['prompt'], examples['response'])]}, batched=True)


######

for i in train_dataset:
  print(i)

In [None]:
import os
import torch
from datasets import load_dataset
from transformers import (
    AutoModelForCausalLM,
    LlamaTokenizer, MixtralForCausalLM, # added on Feb 29th
    AutoTokenizer,
    BitsAndBytesConfig,
    HfArgumentParser,
    TrainingArguments,
    pipeline,
    logging,
)
from peft import LoraConfig, PeftModel
from trl import SFTTrainer

In [None]:
# The model that you want to train from the HuggingFace hub
###model_name = "NousResearch/Llama-2-7b-chat-hf"
###model_name = "meta-llama/Llama-2-7b-chat-hf"
###model_name = "TinyPixel/Llama-2-7B-bf16-sharded"
###model_name = "mistralai/Mixtral-8x7B-Instruct-v0.1"
model_name = "MediaTek-Research/Breeze-7B-Instruct-v1_0"

# The instruction dataset to use (Fetch from HuggingFace)
# Disable if using dataset from drive
###dataset_name = "mlabonne/guanaco-llama2-1k"
###train_dataset = load_dataset(dataset_name, split="train")

# Fine-tuned model name
new_model = "Llama-2-7b-chat-finetune"

################################################################################
# QLoRA parameters
################################################################################

# LoRA attention dimension
lora_r = 2

# Alpha parameter for LoRA scaling
lora_alpha = 4

# Dropout probability for LoRA layers
lora_dropout = 0.1

################################################################################
# bitsandbytes parameters
################################################################################

# Activate 4-bit precision base model loading
use_4bit = True

# Compute dtype for 4-bit base models
bnb_4bit_compute_dtype = "float16"

# Quantization type (fp4 or nf4)
bnb_4bit_quant_type = "nf4"

# Activate nested quantization for 4-bit base models (double quantization)
use_nested_quant = False

################################################################################
# TrainingArguments parameters
################################################################################

# Output directory where the model predictions and checkpoints will be stored
output_dir = "./results"

# Number of training epochs
num_train_epochs = 1

# Enable fp16/bf16 training (set bf16 to True with an A100)
fp16 = True
bf16 = False

# Batch size per GPU for training
per_device_train_batch_size = 4

# Batch size per GPU for evaluation
per_device_eval_batch_size = 4

# Number of update steps to accumulate the gradients for
gradient_accumulation_steps = 1

# Enable gradient checkpointing
gradient_checkpointing = True

# Maximum gradient normal (gradient clipping)
max_grad_norm = 0.3

# Initial learning rate (AdamW optimizer)
learning_rate = 2e-4

# Weight decay to apply to all layers except bias/LayerNorm weights
weight_decay = 0.001

# Optimizer to use
optim = "paged_adamw_32bit"

# Learning rate schedule
lr_scheduler_type = "cosine"

# Number of training steps (overrides num_train_epochs)
max_steps = -1

# Ratio of steps for a linear warmup (from 0 to learning rate)
warmup_ratio = 0.03

# Group sequences into batches with same length
# Saves memory and speeds up training considerably
group_by_length = True

# Save checkpoint every X updates steps
save_steps = 0

# Log every X updates steps
logging_steps = 25

################################################################################
# SFT parameters
################################################################################

# Maximum sequence length to use
max_seq_length = 2048

# Pack multiple short examples in the same input sequence to increase efficiency
packing = False

# Load the entire model on the GPU 0
device_map = {"": 0}

In [None]:
# Load tokenizer and model with QLoRA configuration
compute_dtype = getattr(torch, bnb_4bit_compute_dtype)

bnb_config = BitsAndBytesConfig(
    load_in_4bit=use_4bit,
    bnb_4bit_quant_type=bnb_4bit_quant_type,
    bnb_4bit_compute_dtype=compute_dtype,
    bnb_4bit_use_double_quant=use_nested_quant,
)

# Check GPU compatibility with bfloat16
if compute_dtype == torch.float16 and use_4bit:
    major, _ = torch.cuda.get_device_capability()
    if major >= 8:
        print("=" * 80)
        print("Your GPU supports bfloat16: accelerate training with bf16=True")
        print("=" * 80)

# Load base model
model = MixtralForCausalLM.from_pretrained( # Feb 29th: AutomodelForCausalLM -> MixtralForCausalLM
    model_name,
    quantization_config=bnb_config,
    device_map=device_map,
    use_flash_attention_2 = True # added on Feb 29th
)
model.config.use_cache = False
model.config.pretraining_tp = 1

# Load LLaMA tokenizer
tokenizer = LlamaTokenizer.from_pretrained(model_name, trust_remote_code=True) # Feb 29th: AutoTokenizer -> LlamaTokenizer
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right" # Fix weird overflow issue with fp16 training

# Load LoRA configuration
peft_config = LoraConfig(
    lora_alpha=lora_alpha,
    lora_dropout=lora_dropout,
    r=lora_r,
    target_modules=[
      "q_proj",
      "k_proj",
      "v_proj",
      "o_proj",
      "gate_proj",
      "up_proj",
      "down_proj",
      "lm_head",
      ], # March 7th
    bias="none",
    task_type="CAUSAL_LM",
)

# Set training parameters
training_arguments = TrainingArguments(
    output_dir=output_dir,
    num_train_epochs=num_train_epochs,
    per_device_train_batch_size=per_device_train_batch_size,
    gradient_accumulation_steps=gradient_accumulation_steps,
    optim=optim,
    save_steps=save_steps,
    logging_steps=logging_steps,
    learning_rate=learning_rate,
    weight_decay=weight_decay,
    fp16=fp16,
    bf16=bf16,
    max_grad_norm=max_grad_norm,
    max_steps=max_steps,
    warmup_ratio=warmup_ratio,
    group_by_length=group_by_length,
    lr_scheduler_type=lr_scheduler_type,
    report_to="tensorboard"
)

# Set supervised fine-tuning parameters
trainer = SFTTrainer(
    model=model,
    train_dataset=train_dataset,
    #test_dataset=test_dataset,
    peft_config=peft_config,
    dataset_text_field="text",
    max_seq_length=max_seq_length,
    tokenizer=tokenizer,
    args=training_arguments,
    packing=packing,
)

# Train model
trainer.train()

# Save trained model
trainer.model.save_pretrained(new_model)

In [None]:
# (OPTIONAL)
# Visualize training

%load_ext tensorboard
%tensorboard --logdir results/runs

In [None]:
# Ignore warnings
logging.set_verbosity(logging.CRITICAL)

# Run text generation pipeline with our next model
prompt = "請使用中文描述中華民國的起訴書之結構"
pipe = pipeline(task="text-generation", model=model, tokenizer=tokenizer, max_length=500)
result = pipe(f"[INST] {prompt} [/INST]")
print(result[0]['generated_text'])

In [None]:
# Empty VRAM
del model
del pipe
del trainer
import gc
gc.collect()
gc.collect()

In [None]:
# Reload model in FP16 and merge it with LoRA weights
base_model = MixtralForCausalLM.from_pretrained( # Feb 29th: AutomodelForCausalLM -> MixtralForCausalLM
    model_name,
    low_cpu_mem_usage=True,
    return_dict=True,
    torch_dtype=torch.float16,
    device_map=device_map,
)
model = PeftModel.from_pretrained(base_model, new_model)
model = model.merge_and_unload()

# Reload tokenizer to save it
tokenizer = LlamaTokenizer.from_pretrained(model_name, trust_remote_code=True) # Feb 29th: AutoTokenizer -> LlamaTokenizer
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"

In [None]:
# (OPTIONAL)
# Saving the fine-tuned model to Local file
model_save_path = '/app/my_model_directory'

# Create the save directory if it does not exist
if not os.path.exists(model_save_path):
    os.makedirs(model_save_path)

# Save the model and the tokenizer
model.save_pretrained(model_save_path)
tokenizer.save_pretrained(model_save_path)

In [None]:
# (OPTIONAL)
# Load the saved model from drive
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, LlamaTokenizer, MixtralForCausalLM, pipeline # Feb 29th

from google.colab import drive
drive.mount('/content/drive')

model_path = "/content/drive/My Drive/my_model_directory"  # 更改為您儲存路徑的模型

model = MixtralForCausalLM.from_pretrained(model_path, # Feb 29th
                         device_map="cuda",
                         offload_folder="offload",
                         torch_dtype=torch.float16)
tokenizer = LlamaTokenizer.from_pretrained(model_path) # Feb 29th

In [None]:
# (OPTIONAL)
# Upload the finetuned model to HuggingFace #Default
# Input interaction is known to not work well in Jupyter

import locale
locale.getpreferredencoding = lambda: "UTF-8"

!huggingface-cli login

model.push_to_hub("Llamarider222/Llama-2-7b-chat-hf", check_pr=True)

tokenizer.push_to_hub("Llamarider222/Llama-2-7b-chat-hf",check_pr=True)

In [None]:
# Upload the finetuned model to HuggingFace
# Workaround 01

import os

# Replace YOUR_TOKEN with your actual Hugging Face API token.
os.environ['HF_HOME'] = '/root/.cache/huggingface'  # Set cache directory if needed
os.environ['HUGGINGFACE_HUB_TOKEN'] = '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'

# Now you can push to the hub
model.push_to_hub("Llamarider222/llawma-2-7b-chat-hf", use_auth_token=True)
tokenizer.push_to_hub("Llamarider222/llawma-2-7b-chat-hf", use_auth_token=True)

In [None]:
# Upload the finetuned model to HuggingFace
# Workaround 02

from huggingface_hub import HfFolder

# Replace YOUR_TOKEN_HERE with your actual token
HfFolder.save_token('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')

# Now you can push to the hub
model.push_to_hub("Llamarider222/llama-2-7b-chat-hf", use_auth_token=True)
tokenizer.push_to_hub("Llamarider222/llama-2-7b-chat-hf", use_auth_token=True)

In [None]:
# Upload the finetuned model to HuggingFace
# Workaround 03

!pip install --upgrade transformers

# Reload model in FP16 and merge it with LoRA weights
base_model = AutoModelForCausalLM.from_pretrained(
    #model_path,
    model_name,
    low_cpu_mem_usage=True,
    return_dict=True,
    torch_dtype=torch.float16,
    device_map=device_map,
)
model = PeftModel.from_pretrained(base_model, new_model)
model = model.merge_and_unload()

# Reload tokenizer to save it
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"


model.push_to_hub("Llamarider222/llama-2-7b-chat-hf", use_auth_token=True)
tokenizer.push_to_hub("Llamarider222/llama-2-7b-chat-hf", use_auth_token=True)

In [None]:
# (OPTIONAL)
# load the public model from huggingface

import locale
locale.getpreferredencoding = lambda: "UTF-8"

!pip install transformers accelerate
!pip install sentencepiece

import transformers
from transformers import LlamaTokenizer, MixtralForCausalLM, AutoModel, AutoTokenizer # Feb 29th
import torch

model = "Llamarider222/Llama-2-7b-chat-hf"
prompt = "請描述中華民國的起訴書的內容,包含'indictNO','indictment','org','date','type','reason','relatedIssues'等標籤。"

tokenizer = LlamaTokenizer.from_pretrained(model)
pipeline = transformers.pipeline(
    "text-generation",
    model=model,
    torch_dtype=torch.float16,
    device_map="auto",
)

sequences = pipeline(
    f'[INST] {prompt} [/INST]',
    do_sample=True,
    top_k=10,
    num_return_sequences=1,
    eos_token_id=tokenizer.eos_token_id,
    max_length=200,
)
for seq in sequences:
    print(f"Result: {seq['generated_text']}")

__Test the Finetuned Model__

In [None]:
# Nth test-run
# postFT

prompt="請描述中華民國起訴書的內容，包含'indictNO','indictment','org','date','type','reason','relatedIssues'等標籤。"

gen = pipeline('text-generation', model=model, max_new_tokens= 200, tokenizer=tokenizer)
result = gen(prompt)
print(result[0]['generated_text'])

In [None]:
# Nth test-run
# preFT

prompt="請描述中華民國起訴書的內容，包含'indictNO','indictment','org','date','type','reason','relatedIssues'等標籤。"

gen = pipeline('text-generation', model=model, max_new_tokens= 2048, tokenizer=tokenizer)
result = gen(prompt)
print(result[0]['generated_text'])

__Gradio UI__

In [None]:
!pip install gradio

In [None]:
#CHAT UI v0.0.4(latest)
import os
import gradio as gr
from matplotlib.rcsetup import validate_fontsize_None
from pickle import FALSE

#帳密
acpw_dict = [{'ac': '2665', 'pw': '89944'}, {'ac': '2675', 'pw': '898s8s4'}, {'ac': 'lee', 'pw': '12345'}, {'ac': 'test', 'pw': '12345'}]
pw_show = False

with gr.Blocks() as acpw:
    #登入介面
    gr.Markdown("請輸入帳密")
    ac = gr.Textbox(label="帳號")
    pw = gr.Textbox(label="密碼", type="password")
    sysoutput = gr.Textbox(label="系統訊息", interactive=False)
    logi = gr.Button("登入")
    regi = gr.Button("註冊")
    logi_regi = gr.Button("註冊並登入")
    default_murder_input = gr.Button("預設資料(謀殺案)")
    default_burglary_input = gr.Button("預設資料(竊盜案)")
    #showing_pw = gr.Button("密碼顯示")

    #聊天介面&輸入基本資料介面(登入成功才顯示)
    #輸入原告資料
    with gr.Column(visible=False) as plaintiff_ui:
      # Add 5 input fields for personal data
      Plaintiff_name_input = gr.Textbox(label="原告")
      Plaintiff_ID_type_input = gr.Dropdown(label="身份證明文件", choices=["國民身份證", "護照", "居留證", "工作證", "營利事業登記"])
      Plaintiff_ID_input = gr.Textbox(label="證號")
      Plaintiff_age_input = gr.Textbox(label="年齡")
      Plaintiff_birth_input = gr.Textbox(label="生日")
      Plaintiff_gender_input = gr.Dropdown(label="性別", choices=["男", "女", "其他"])
      Plaintiff_location_input = gr.Textbox(label="現住地")
      Plaintiff_telephone_input = gr.Textbox(label="電話")
      Plaintiff_email_input = gr.Textbox(label="電子郵件")

    #輸入被告資料
    with gr.Column(visible=False) as defendant_ui:
      Defendant_name_input = gr.Textbox(label="被告")
      Defendant_ID_type_input = gr.Dropdown(label="身份證明文件", choices=["國民身份證", "護照", "居留證", "工作證", "營利事業登記"])
      Defendant_ID_input = gr.Textbox(label="證號")
      Defendant_age_input = gr.Textbox(label="年齡")
      Defendant_birth_input = gr.Textbox(label="生日")
      Defendant_gender_input = gr.Dropdown(label="性別", choices=["男", "女", "其他"])
      Defendant_location_input = gr.Textbox(label="現住地")
      Defendant_telephone_input = gr.Textbox(label="電話")
      Defendant_email_input = gr.Textbox(label="電子郵件")

    #輸入事發經過
    with gr.Column(visible=False) as detail_ui:
      detail_input = gr.Textbox(label="事件描述：")
      # Button to combine data
      combine_button = gr.Button("產生PROMPT")
      #combine_DEFAULT_button = gr.Button("產生預設PROMPT")
      # Chat Interface
      chatbot = gr.Chatbot()
      with gr.Row():
        msg = gr.Textbox(
            container=False,
            show_label=False,
            placeholder='Type a message...',
            scale=10,
        )
        submit_button = gr.Button('Submit',variant='primary',scale=1,min_width=0)


    def user(user_message, history):
      return "", history + [[user_message, None]]

    #整合資料
    def combine_data(Plaintiff_name, Plaintiff_id_type, Plaintiff_id, Plaintiff_age, Plaintiff_gender, Plaintiff_location, Plaintiff_email, Defendant_name, Defendant_id_type, Defendant_id, Defendant_age, Defendant_gender, Defendant_location, Defendant_email, detail):
      combinedtextdata = f"""請以中華民國律師的身分產生一份中華民國起訴書
      原告: {Plaintiff_name}, 原告身分證明類型: {Plaintiff_id_type}, 原告證號:{Plaintiff_id}, 原告年齡: {Plaintiff_age}, 原告性別: {Plaintiff_gender}, 原告現住地: {Plaintiff_location}, 原告電子郵件: {Plaintiff_email},
      被告: {Defendant_name}, 被告身分證明類型: {Defendant_id_type}, 被告證號:{Defendant_id}, 被告年齡: {Defendant_age}, 被告性別: {Defendant_gender}, 被告現住地: {Defendant_location}, 被告電子郵件: {Defendant_email},
      事件內容: {detail}"""
      return combinedtextdata
    #對話系統
    def bot(history):
      gen = pipeline('text-generation', model=model, max_new_tokens= 256, tokenizer=tokenizer)
      result = gen(history[-1][0])
      bot_message = result[0]['generated_text']
      history[-1][1] = ""
      for character in bot_message:
          history[-1][1] += character
          yield history

    #謀殺案 填入資料函式
    def default_murder_input_change_tetbox():
      return gr.update(value="陳○麗"),gr.update(value="國民身份證"),gr.update(value="O778541223"),gr.update(value="22"),gr.update(value="新北市中和區國凱街32之3號3樓"),gr.update(value="a54@gmail.com"),gr.update(value="林學廉"),gr.update(value="國民身份證"),gr.update(value="P224155693"),gr.update(value="30"),gr.update(value="新北市板橋區龍泉街108巷9號2樓"),gr.update(value="ac45@gmail.com"),gr.update(value="林學廉與陳○麗原為同居之男女朋友，屬家庭暴力防治法所稱之家庭成員，因感情糾紛。於106年9月27日0時16分，在屋內林學廉因感情之事與陳○麗起口角，林學廉心生不滿，走出屋外持事先預備之西瓜刀，插在腰間走入屋內，見陳○麗坐在床上直接過去砍她。")
      #gr.update(value="女"),
      #gr.update(value="男"),

    #竊盜案 填入資料函式
    def default_burglary_input_change_tetbox():
      return gr.update(value="許洋偉"),gr.update(value="國民身份證"),gr.update(value="O778541223"),gr.update(value="22"),gr.update(value="新北市中和區國凱街32之3號3樓"),gr.update(value="a54@gmail.com"),gr.update(value="黃奕凱"),gr.update(value="國民身份證"),gr.update(value="P224155693"),gr.update(value="30"),gr.update(value="新北市板橋區龍泉街108巷9號2樓"),gr.update(value="ac45@gmail.com"),gr.update(value = "黃奕凱於107年6月5日凌晨1時48分左右，駕駛 自用小客車，行經國凱街旁邊空地，見四周無人，竟意圖為自己不法之所有，基於竊盜之犯意，以自備吸油工具插入許洋偉停放於該處之自用小貨車油箱內，將新臺幣300元之95無鉛汽油10公升抽出置入自備油桶，再加入其駕駛之自用小客車油箱內，旋即往國凱街方向逃逸。許洋偉發覺其車輛油箱蓋遭開啟且油表之油量減少，發覺遭竊，報警處理。" )
      #gr.update(value="女"),
      #gr.update(value="男"),







    #AI對話方法(留存備用)
    """
    def chat_with_ai(message, history):
      gen = pipeline('text-generation', model=model, max_new_tokens= 256, tokenizer=tokenizer)
      result = gen(message)
      yield result[0]['generated_text']
    """
    #Combine data
    combine_button.click(fn=combine_data, inputs=[Plaintiff_name_input,
                    Plaintiff_ID_type_input,
                    Plaintiff_ID_input,
                    Plaintiff_age_input,
                    Plaintiff_gender_input,
                    Plaintiff_location_input,
                    Plaintiff_email_input,
                    Defendant_name_input,
                    Defendant_ID_type_input,
                    Defendant_ID_input,
                    Defendant_age_input,
                    Defendant_gender_input,
                    Defendant_location_input,
                    Defendant_email_input,
                    detail_input],
                    outputs=[msg] )



    #謀殺案 預設填入資料
    default_murder_input.click(default_murder_input_change_tetbox , inputs=[],

                    outputs=[Plaintiff_name_input,Plaintiff_ID_type_input,Plaintiff_ID_input,Plaintiff_age_input,Plaintiff_location_input,Plaintiff_email_input, Defendant_name_input,Defendant_ID_type_input,Defendant_ID_input,Defendant_age_input,Defendant_location_input,Defendant_email_input,detail_input


                    #Plaintiff_gender_input,

                    #Defendant_gender_input,
                    ])
    #竊盜案 預設填入資料
    default_burglary_input.click(default_burglary_input_change_tetbox , inputs=[],

                    outputs=[Plaintiff_name_input,Plaintiff_ID_type_input,Plaintiff_ID_input,Plaintiff_age_input,Plaintiff_location_input,Plaintiff_email_input, Defendant_name_input,Defendant_ID_type_input,Defendant_ID_input,Defendant_age_input,Defendant_location_input,Defendant_email_input,detail_input


                    #Plaintiff_gender_input,

                    #Defendant_gender_input,
                    ])

    """
    #預設內容
    combine_DEFAULT_button.click(fn=combine_data, inputs=[
                    "陳○麗",
                    "國民身份證",
                    "O778541223",
                    "22",
                    "女",
                    "新北市中和區國凱街32之3號3樓",
                    "a5541105114@gmail.com",
                    "林學廉",
                    "國民身份證",
                    "P224155693",
                    "30",
                    "男",
                    "新北市板橋區龍泉街108巷9號2樓",
                    "c885445@gmail.com",
                    "林學廉與陳○麗原為同居之男女朋友，屬家庭暴力防治法所稱之家庭成員，前因感情糾紛。嗣於106年9月27日0時16分許，在上址屋內，林學廉酒後因 感情之事與陳○麗起口角，林學廉因而心生不滿，竟走出屋外持其事先預備之西瓜刀，插在腰間走入屋內，見陳○麗坐在床上直接過去砍她。"],
                    outputs=[msg] )
    """
    #Submit
    msg.submit(user, [msg, chatbot], [msg, chatbot], queue=False).then(fn=bot, inputs=chatbot, outputs=chatbot)
    submit_button.click(user, [msg, chatbot], [msg, chatbot], queue=False).then(fn=bot, inputs=chatbot, outputs=chatbot)

    #密碼輸入長度
    def welcome(pw):
      pw_leng = len(pw)
      return "Password Length:" + str(pw_leng)

    #登入判斷
    def confirm_ac_pw(ac, pw):
      for i in acpw_dict:
          if i['ac'] == ac:
              if i['pw'] == pw:
                return {sysoutput:"登入成功", plaintiff_ui:gr.Column(visible=True), defendant_ui:gr.Column(visible=True), detail_ui:gr.Column(visible=True)}
          else:
              print()
      return {sysoutput:"登入失敗"}
    #註冊
    def register_ac_pw(ac, pw):
      acpw_dict.append({'ac': ac, 'pw': pw})
      return "註冊成功"


    #註冊並登入(測試用)
    def logi_register_ac_pw(ac, pw):
      acpw_dict.append({'ac': ac, 'pw': pw})
      return {sysoutput:"註冊且登入成功", plaintiff_ui:gr.Column(visible=True), defendant_ui:gr.Column(visible=True), detail_ui:gr.Column(visible=True)}

    #登入介面功能
    logi.click(confirm_ac_pw, [ac, pw], [sysoutput, plaintiff_ui, defendant_ui, detail_ui])
    regi.click(register_ac_pw, [ac, pw], [sysoutput])
    logi_regi.click(logi_register_ac_pw, [ac, pw], [sysoutput, plaintiff_ui, defendant_ui, detail_ui])
    pw.change(welcome, pw , sysoutput)


acpw.queue()
if __name__ == "__main__":
  #啟動UI
  acpw.launch(show_api=False)

In [5]:
import torch
print(torch.__version__)
print(torch.cuda.is_available())
print(torch.version.cuda)
print(torch.backends.cudnn.version())

2.2.1+cpu
False
None
None


In [2]:
import tensorflow as tf
print(tf.__version__)
print(tf.config.list_physical_devices('GPU'))

2.12.0
[]


In [3]:
# check the GPU in-use & specs
!nvidia-smi

Wed Mar 20 20:03:35 2024       
+---------------------------------------------------------------------------------------+
| NVIDIA-SMI 531.79                 Driver Version: 531.79       CUDA Version: 12.1     |
|-----------------------------------------+----------------------+----------------------+
| GPU  Name                      TCC/WDDM | Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf            Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                                         |                      |               MIG M. |
|   0  NVIDIA GeForce RTX 4090       WDDM | 00000000:01:00.0 Off |                  Off |
|  0%   40C    P8               14W / 450W|      0MiB / 24564MiB |      0%      Default |
|                                         |                      |                  N/A |
+-----------------------------------------+----------------------+----------------------+
                                                                    

In [2]:
!pip install transformers torch accelerate



In [3]:
!apt-get update && apt-get install -y git

'apt-get' ���O�����Υ~���R�O�B�i���檺�{���Χ妸�ɡC


In [7]:
!pip install packaging ninja
!pip install flash-attn

Collecting ninja
  Obtaining dependency information for ninja from https://files.pythonhosted.org/packages/b6/2f/a3bc50fa63fc4fe9348e15b53dc8c87febfd4e0c660fcf250c4b19a3aa3b/ninja-1.11.1.1-py2.py3-none-win_amd64.whl.metadata
  Downloading ninja-1.11.1.1-py2.py3-none-win_amd64.whl.metadata (5.4 kB)
Downloading ninja-1.11.1.1-py2.py3-none-win_amd64.whl (312 kB)
   ---------------------------------------- 0.0/313.0 kB ? eta -:--:--
   --------- ------------------------------ 71.7/313.0 kB 2.0 MB/s eta 0:00:01
   ---------------------------------------- 313.0/313.0 kB 4.8 MB/s eta 0:00:00
Installing collected packages: ninja
Successfully installed ninja-1.11.1.1
Collecting flash-attn
  Downloading flash_attn-2.5.6.tar.gz (2.5 MB)
     ---------------------------------------- 0.0/2.5 MB ? eta -:--:--
      --------------------------------------- 0.0/2.5 MB 991.0 kB/s eta 0:00:03
     ---- ----------------------------------- 0.3/2.5 MB 3.8 MB/s eta 0:00:01
     ------------ -----------------

  error: subprocess-exited-with-error
  
  × python setup.py egg_info did not run successfully.
  │ exit code: 1
  ╰─> [22 lines of output]
      fatal: not a git repository (or any of the parent directories): .git
      
      
      torch.__version__  = 2.2.1+cpu
      
      
      Traceback (most recent call last):
        File "<string>", line 2, in <module>
        File "<pip-setuptools-caller>", line 34, in <module>
        File "C:\Users\Miz\AppData\Local\Temp\pip-install-28cma6uk\flash-attn_c792f56c13704d4d81c3d9d79928eb37\setup.py", line 133, in <module>
          CUDAExtension(
        File "C:\Users\Miz\anaconda3\Lib\site-packages\torch\utils\cpp_extension.py", line 1074, in CUDAExtension
          library_dirs += library_paths(cuda=True)
                          ^^^^^^^^^^^^^^^^^^^^^^^^
        File "C:\Users\Miz\anaconda3\Lib\site-packages\torch\utils\cpp_extension.py", line 1208, in library_paths
          paths.append(_join_cuda_home(lib_dir))
                       ^^

In [None]:
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch

# Instruction Model
model = AutoModelForCausalLM.from_pretrained(
    "MediaTek-Research/Breeze-7B-Instruct-v1_0",
    device_map="auto",
    torch_dtype=torch.float16,
    # attn_implementation="flash_attention_2" # optional
)

# Basemodel
model = AutoModelForCausalLM.from_pretrained(
    "MediaTek-Research/Breeze-7B-Base-v1_0",
    device_map="auto",
    torch_dtype=torch.float16,
    # attn_implementation="flash_attention_2" # optional
)

In [None]:
from transformers import AutoTokenizer
tokenizer = AutoTokenizer.from_pretrained("MediaTek-Research/Breeze-7B-Instruct-v1_0")
chat = [
  {"role": "user", "content": "你好，請問你可以完成什麼任務？"},
  {"role": "assistant", "content": "你好，我可以幫助您解決各種問題、提供資訊和協助您完成許多不同的任務。例如：回答技術問題、提供建議、翻譯文字、尋找資料或協助您安排行程等。請告訴我如何能幫助您。"},
  {"role": "user", "content": "太棒了！"},
]
tokenizer.apply_chat_template(chat, tokenize=False)

In [None]:
outputs = model.generate(tokenizer.apply_chat_template(chat, return_tensors="pt"),
                         # adjust below parameters if necessary 
                         max_new_tokens=128,
                         top_p=0.01,
                         top_k=85,
                         repetition_penalty=1.1,
                         temperature=0.01)
                         
print(tokenizer.decode(outputs[0]))

In [None]:
# (OPTIONAL)
# Read prompt data from excel file
import pandas

df = pandas.read_excel("gen_sample.xlsx")
test_prompt = df.loc[9, 'prompt-claude3-Opus']
print(test_prompt)

In [None]:
# Inference based of the excel file
messages = [
    {"role": "user", "content": "你的中文能力如何?"},
    {"role": "assistant", "content": "我可以進行流暢的中文對話，請儘管發問!"},
    {"role": "user", "content": "你知道一份法律起訴書是什麼樣的嗎？"}
    {"role": "user", "content": test_prompt}
]
input_ids = tokenizer.apply_chat_template(messages, return_tensors="pt").to("cuda")
outputs = model.generate(input_ids, max_new_tokens=500)
print(tokenizer.decode(outputs[0], skip_special_tokens=True))