In [8]:
import torch
torch.cuda.empty_cache()


In [1]:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer

# 加載模型和分詞器
model_name = "meta-llama/Llama-3.1-8B"
model = AutoModelForCausalLM.from_pretrained(model_name)
tokenizer = AutoTokenizer.from_pretrained(model_name)

# 將模型移動到 GPU
model.to("cuda")

# 準備輸入數據並將其移動到 GPU
input_text = "Hello, how are you?"
input_tensor = tokenizer(input_text, return_tensors="pt").input_ids
input_tensor = input_tensor.to("cuda")  # 將張量移動到 GPU

# 使用模型進行推理
with torch.no_grad():
    output = model(input_tensor)

# 解析輸出
output_text = tokenizer.decode(output.logits.argmax(dim=-1).squeeze().tolist(), skip_special_tokens=True)
print(output_text)


OSError: You are trying to access a gated repo.
Make sure to have access to it at https://huggingface.co/meta-llama/Llama-3.1-8B.
401 Client Error. (Request ID: Root=1-672a5474-3113fa440a865c2477bd13e0;160c74ba-87bc-412b-9f2a-c0e7da8d5ae7)

Cannot access gated repo for url https://huggingface.co/meta-llama/Llama-3.1-8B/resolve/main/config.json.
Access to model meta-llama/Llama-3.1-8B is restricted. You must have access to it and be authenticated to access it. Please log in.

In [6]:
import torch

print(torch.cuda.is_available())  # 如果返回 True，表示 CUDA 已可用
print(torch.cuda.device_count())  # 查看可用 GPU 的數量


True
1


In [4]:
import torch
from datasets import load_dataset
from transformers import AutoTokenizer, AutoModelForCausalLM, get_linear_schedule_with_warmup
from peft import get_peft_model, LoraConfig, TaskType
from safetensors.torch import load_file
import os
from tqdm import tqdm
from torch.utils.data import DataLoader

def prepare_dataset(file_path, tokenizer, max_length=512):
    dataset = load_dataset('json', data_files=file_path)['train']
    
    def tokenize_function(examples):
        if 'prompt' in examples and 'completion' in examples:
            prompts = [f"Human: {q}\nBart:" for q in examples['prompt']]
            responses = examples['completion']
        else:
            raise KeyError("The dataset must have 'prompt' and 'completion' fields.")
        
        inputs = tokenizer(prompts, responses,
                           truncation=True, max_length=max_length,
                           padding="max_length", return_tensors="pt")
        inputs["labels"] = inputs["input_ids"].clone()
        
        for i, prompt in enumerate(prompts):
            prompt_length = len(tokenizer(prompt, return_tensors="pt")["input_ids"][0])
            inputs["labels"][i][:prompt_length] = -100
        
        return inputs
    
    tokenized_dataset = dataset.map(tokenize_function, batched=True, remove_columns=dataset.column_names)
    
    # Convert to torch tensors
    tokenized_dataset.set_format(type='torch')
    
    # # Debug print
    # print("Dataset structure:")
    # print(tokenized_dataset[0])
    # print("Dataset length:", len(tokenized_dataset))
    
    return tokenized_dataset

def load_model(model_name, peft_config, peft_model_path=None):
    base_model = AutoModelForCausalLM.from_pretrained(
        model_name,
        torch_dtype=torch.float16,
        device_map="auto"
    )
    if peft_model_path and os.path.exists(os.path.join(peft_model_path, "adapter_model.safetensors")):
        print(f"Loading PEFT model from {peft_model_path}")
        model = get_peft_model(base_model, peft_config)
        adapter_weights = load_file(os.path.join(peft_model_path, "adapter_model.safetensors"))
        model.load_state_dict(adapter_weights, strict=False)
    else:
        print("Creating new PEFT model")
        model = get_peft_model(base_model, peft_config)
    
    model.print_trainable_parameters()
    return model

def fine_tune(model, dataset, output_dir, num_epochs, batch_size=1, learning_rate=1e-4):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.to(device)
    model.train()

    optimizer = torch.optim.AdamW(model.parameters(), lr=learning_rate)
    dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True)
    scheduler = get_linear_schedule_with_warmup(
        optimizer, num_warmup_steps=100, num_training_steps=len(dataloader) * num_epochs
    )

    for epoch in range(num_epochs):
        total_loss = 0
        progress_bar = tqdm(dataloader, desc=f"Epoch {epoch+1}/{num_epochs}")
        for batch in progress_bar:
            # Move batch to device
            batch = {k: v.to(device) for k, v in batch.items()}
            
            outputs = model(**batch)
            loss = outputs.loss
            total_loss += loss.item()

            loss.backward()
            optimizer.step()
            scheduler.step()
            optimizer.zero_grad()

            progress_bar.set_postfix({"loss": loss.item()})

        avg_loss = total_loss / len(dataloader)
        print(f"Epoch {epoch+1}/{num_epochs}, Average Loss: {avg_loss:.4f}")

        # Save the model after each epoch
        model.save_pretrained(os.path.join(output_dir, f"checkpoint-epoch-{epoch+1}"))

    return model

def main():
    model_name = "C:/Users/hoisaline/Meta-Llama-3.1-8B-bnb-4bit"
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    tokenizer.pad_token = tokenizer.eos_token

    peft_config = LoraConfig(
        task_type=TaskType.CAUSAL_LM,
        r=8,
        lora_alpha=32,
        target_modules=["q_proj", "v_proj"],
        lora_dropout=0.05,
    )

    # Stage 1: Informal language fine-tuning
    informal_model_path = "./informal_finetuned"
    if not os.path.exists(os.path.join(informal_model_path, "adapter_model.safetensors")):
        print("Stage 1: Informal language fine-tuning")
        informal_dataset = prepare_dataset('informal.jsonl', tokenizer)
        model = load_model(model_name, peft_config)
        model = fine_tune(model, informal_dataset, informal_model_path, num_epochs=3)
    else:
        print("Skipping Stage 1: Informal fine-tuned model already exists")

    # Stage 2: Bart Simpson-specific fine-tuning
    print("Stage 2: Bart Simpson-specific fine-tuning")
    bart_dataset = prepare_dataset('bart.jsonl', tokenizer)
    
    bart_model_path = "./bart_finetuned"
    model = load_model(model_name, peft_config, informal_model_path)
    model = fine_tune(model, bart_dataset, bart_model_path, num_epochs=5)

    # Save the final model
    model.save_pretrained(bart_model_path)
    tokenizer.save_pretrained(bart_model_path)

    print("Fine-tuning completed. Model saved.")

if __name__ == "__main__":
    main()

Stage 1: Informal language fine-tuning


Unused kwargs: ['_load_in_4bit', '_load_in_8bit', 'quant_method']. These kwargs are not used in <class 'transformers.utils.quantization_config.BitsAndBytesConfig'>.


RuntimeError: Failed to import transformers.integrations.bitsandbytes because of the following error (look up to see its traceback):
[WinError 193] %1 不是有效的 Win32 應用程式。

In [None]:
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
from peft import PeftModel, PeftConfig
import os

def load_model_and_tokenizer(model_path):
    # Load the configuration
    config = PeftConfig.from_pretrained(model_path)
    
    # Load the base model
    base_model = AutoModelForCausalLM.from_pretrained(
        config.base_model_name_or_path,
        torch_dtype=torch.float16,
        device_map="auto"
    )
    
    # Load the fine-tuned model
    model = PeftModel.from_pretrained(base_model, model_path)
    
    # Load the tokenizer
    tokenizer = AutoTokenizer.from_pretrained(config.base_model_name_or_path)
    tokenizer.pad_token = tokenizer.eos_token
    
    return model, tokenizer

def generate_response(model, tokenizer, prompt, max_length=200):
    # Prepare the input
    input_ids = tokenizer.encode(f"Human: {prompt}\nBart:", return_tensors="pt").to(model.device)
    
    # Generate the response
    with torch.no_grad():
        output = model.generate(
            input_ids,
            max_length=max_length,
            num_return_sequences=1,
            no_repeat_ngram_size=2,
            temperature=0.7
        )
    
    # Decode the response
    response = tokenizer.decode(output[0], skip_special_tokens=True)
    
    # Extract only the model's response
    response = response.split("Bart:")[-1].strip()
    
    return response

def main():
    # Path to your fine-tuned model
    model_path = "./bart_finetuned"
    
    # Load the model and tokenizer
    model, tokenizer = load_model_and_tokenizer(model_path)
    
    print("Model loaded. You can now ask Docker-related questions. Type 'exit' to quit.")
    
    while True:
        user_input = input("\nYou: ")
        if user_input.lower() == 'exit':
            break
        
        response = generate_response(model, tokenizer, user_input)
        print(f"Bart: {response}")

if __name__ == "__main__":
    main()

Unused kwargs: ['_load_in_4bit', '_load_in_8bit', 'quant_method']. These kwargs are not used in <class 'transformers.utils.quantization_config.BitsAndBytesConfig'>.


Model loaded. You can now ask Docker-related questions. Type 'exit' to quit.



You:  what is Docker


Bart: "Docker? It's like a box that stuff lives in, but it's virtual. You can move it around and stuff. It sounds dangerous, though."



You:  What's a Docker container, dude?


Bart: It's like a little boat, man. You put stuff in it, and it floats around in the big Docker ocean.



You:  How do you start a Docker image?


Bart: You gotta use the Dockerfile, man. It's like your recipe for building the image. You tell Docker what's in there, and it's magic!



You:  Can you explain Docker volumes to me?


Bart: Sure thing, mate. Docker Volumes are like secret storage spaces where you can keep your important files safe. Think of them as a locker in a busy mall, where all your stuff is hidden from prying eyes. You can store your files in the volume, and Docker will take care of keeping them safe and sound.



You:  What's the difference between Docker and a virtual machine?


Bart: Umm... virtual machines are like, the old-school way of doing things, man. Docker is the new, cooler way to do containerization. It's like... having your own private spaceship, but you can fly it on the Internet!



You:  How do you delete a Docker container?


Bart: Hold up, mate. You want to delete something?
