In [None]:
!pip install transformers
!pip install torch
!pip install huggingface_hub

In [None]:
!pip install --upgrade transformers
!pip install --upgrade torch

In [None]:
# Prompt to enter your Hugging Face token securely
import os
os.environ["HF_TOKEN"] = input("Enter your Hugging Face token: ")

In [None]:
from huggingface_hub import login
login(token=os.getenv("HF_TOKEN"), add_to_git_credential=True)

In [None]:
import torch
import json
from transformers import AutoTokenizer, AutoModelForCausalLM

# ===== Model Initialization =====
model_id = "meta-llama/Llama-3.2-3B-Instruct"
tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype=torch.float16, device_map="auto")

# Set padding token
tokenizer.pad_token = tokenizer.eos_token

# ===== Global Variables =====
order_session = []
conversation_history = []
order_confirmed = False

# ===== Helper Functions =====

# Generate a prompt with dynamic context
def generate_prompt(user_input):
    formatted_history = "\n".join(conversation_history[-5:])  # Use last 5 entries for context
    prompt = (
        f"현재 대화 기록:\n{formatted_history}\n"
        f"고객의 새로운 입력: '{user_input}'\n"
        "주문을 처리하고, 수정, 추가 요청을 반영하세요. "
        "JSON 형식으로 출력하고, 사용자에게 한국어로 자연스러운 응답을 생성하세요."
    )
    return prompt

# Model inference to generate response
def model_inference(prompt):
    inputs = tokenizer(prompt, return_tensors="pt", padding=True, truncation=True)
    inputs = inputs.to("cuda" if torch.cuda.is_available() else "cpu")

    with torch.no_grad():
        output = model.generate(
            inputs.input_ids,
            attention_mask=inputs.attention_mask,
            max_length=300,
            temperature=0.7,
            pad_token_id=tokenizer.eos_token_id
        )
    response = tokenizer.decode(output[0], skip_special_tokens=True)
    return response.strip()

# Extract JSON data from model output
def extract_json_response(model_response):
    try:
        response_data = model_response.split("\n")[-1]  # Assume JSON is the last line of response
        return json.loads(response_data)
    except (json.JSONDecodeError, IndexError):
        return None

# Update order session based on extracted JSON
def update_order_session(json_data):
    global order_session
    action = json_data.get("action")

    if action == "create_order" or action == "add_item":
        order_session.extend(json_data["order_items"])
        return "새로운 주문이 추가되었습니다."

    elif action == "modify_order":
        for item in order_session:
            if item["drink"] == json_data["old_drink"]:
                item.update({
                    "drink": json_data["new_drink"],
                    "size": json_data["size"],
                    "temperature": json_data["temperature"],
                    "quantity": json_data["quantity"],
                    "add_ons": json_data.get("add_ons", []),
                    "extra_shots": json_data.get("extra_shots", 0)
                })
        return "주문이 수정되었습니다."

    elif action == "cancel_order":
        order_session = []
        return "주문이 취소되었습니다."

    elif action == "recommend_closest_item":
        return f"죄송합니다, '{json_data['requested_item']}'은(는) 메뉴에 없습니다. 대신 '{json_data['recommended_item']}'을 추천드립니다."

    elif action == "show_order_summary":
        return summarize_order()

    elif action == "complete_order":
        return "주문이 완료되었습니다. 결제는 카드리더기를 사용해주세요. 감사합니다."

    return "알 수 없는 요청입니다."

# Summarize the current order session
def summarize_order():
    if not order_session:
        return "현재 주문 내역이 없습니다."

    summary = "지금까지 주문하신 내용은 다음과 같습니다:\n"
    for idx, item in enumerate(order_session, start=1):
        summary += (
            f"{idx}. {item['drink']} ({item['temperature']}, {item['size']}) "
            f"{item['quantity']}잔"
        )
        if item.get("add_ons"):
            summary += f" - 추가 옵션: {', '.join(item['add_ons'])}"
        if item.get("extra_shots"):
            summary += f" - 샷 추가: {item['extra_shots']}샷"
        summary += "\n"

    summary += "추가 주문이 있으면 말씀해주세요, 아니면 결제를 진행할까요?"
    return summary

# Main function to handle user input
def handle_order(user_input):
    global conversation_history
    conversation_history.append(f"사용자: {user_input}")

    prompt = generate_prompt(user_input)
    model_response = model_inference(prompt)
    json_data = extract_json_response(model_response)

    if json_data:
        response_message = update_order_session(json_data)
        conversation_history.append(f"키오스크: {response_message}")
        return response_message

    return "죄송합니다, 요청을 이해하지 못했습니다. 다시 말씀해주세요."

# Main application loop
def main():
    print("가상 커피 키오스크에 오신 것을 환영합니다!")
    print("원하시는 주문을 말씀해주세요.")

    while not order_confirmed:
        user_input = input("고객: ")
        response = handle_order(user_input)
        print("키오스크:", response)

    print("주문이 완료되었습니다. 결제는 카드리더기를 사용해주세요. 감사합니다!")

# Execute the main application
if __name__ == "__main__":
    main()


In [None]:
# ===== Step 1: Install Required Libraries =====
!pip install torch transformers datasets accelerate bitsandbytes
!pip install transformers accelerate datasets huggingface_hub

In [3]:
# ===== Step 1: Install Required Libraries =====
!pip install transformers datasets accelerate huggingface_hub --upgrade



In [8]:
# ===== Step 2: Import Libraries =====
from transformers import LlamaForCausalLM, AutoTokenizer, Trainer, TrainingArguments
from datasets import load_dataset
from huggingface_hub import login
import pandas as pd
import torch

# ===== Step 3: Login to Hugging Face =====
login(token="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", add_to_git_credential=True)

# ===== Step 4: Load and Preprocess Dataset =====
data_path = "/content/dataset.txt"

# Read the dataset
with open(data_path, "r", encoding="utf-8") as f:
    lines = f.readlines()

# Convert to a DataFrame
data = pd.DataFrame(lines, columns=["text"])

# Convert to Hugging Face Dataset
dataset = load_dataset("text", data_files=data_path)

# Use AutoTokenizer for compatibility
model_name = "meta-llama/Llama-3.2-3B-Instruct"
tokenizer = AutoTokenizer.from_pretrained(model_name)

# Set padding token if not defined
if tokenizer.pad_token is None:
    tokenizer.add_special_tokens({"pad_token": "[PAD]"})

# Tokenize the dataset
def tokenize_data(example):
    tokenized = tokenizer(example["text"], truncation=True, padding="max_length", max_length=128)
    tokenized["labels"] = tokenized["input_ids"].copy()  # Add labels for computing the loss
    return tokenized

tokenized_dataset = dataset.map(tokenize_data, batched=True)

# Split the dataset into train and evaluation sets
train_dataset = tokenized_dataset["train"].shuffle(seed=42).select(range(int(len(tokenized_dataset["train"]) * 0.8)))
eval_dataset = tokenized_dataset["train"].shuffle(seed=42).select(range(int(len(tokenized_dataset["train"]) * 0.8), len(tokenized_dataset["train"])))

# ===== Step 5: Initialize Model and Training Arguments =====
model = LlamaForCausalLM.from_pretrained(model_name)

# Resize model embeddings to accommodate new special tokens
model.resize_token_embeddings(len(tokenizer))

# Define training arguments
training_args = TrainingArguments(
    output_dir="/content/llama-fine-tuned",
    per_device_train_batch_size=4,
    per_device_eval_batch_size=4,
    learning_rate=1e-5,
    num_train_epochs=3,
    evaluation_strategy="epoch",
    save_strategy="epoch",
    logging_dir="/content/logs",
    push_to_hub=True,
    hub_model_id="nishthalath/llama-3b-fine-tuned-kiosk",
    hub_token="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",
)

# Initialize the Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=eval_dataset,
    tokenizer=tokenizer,
)

# ===== Step 6: Fine-Tune the Model =====
print("Starting model fine-tuning...")
trainer.train()

# ===== Step 7: Save and Push the Model to Hugging Face Hub =====
print("Uploading the model to Hugging Face Hub...")

# Push the model and tokenizer to the Hugging Face Hub
model.push_to_hub("nishthalath/llama-3b-fine-tuned-kiosk", use_auth_token="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")
tokenizer.push_to_hub("nishthalath/llama-3b-fine-tuned-kiosk", use_auth_token="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")

print("Model uploaded successfully to Hugging Face Hub!")


Map:   0%|          | 0/319 [00:00<?, ? examples/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

  trainer = Trainer(


Starting model fine-tuning...


OutOfMemoryError: CUDA out of memory. Tried to allocate 96.00 MiB. GPU 0 has a total capacity of 39.56 GiB of which 42.81 MiB is free. Process 146371 has 39.51 GiB memory in use. Of the allocated memory 38.57 GiB is allocated by PyTorch, and 456.88 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)