In [1]:
!pip install -q bitsandbytes accelerate transformers datasets peft trl


[notice] A new release of pip is available: 25.1 -> 25.1.1
[notice] To update, run: C:\Users\tmdgn\AppData\Local\Microsoft\WindowsApps\PythonSoftwareFoundation.Python.3.12_qbz5n2kfra8p0\python.exe -m pip install --upgrade pip


In [1]:
from huggingface_hub import login
login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [2]:
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig, TrainingArguments, Trainer
from peft import prepare_model_for_kbit_training, LoraConfig, get_peft_model
from datasets import load_dataset
import torch

MODEL_ID = "capston-team-5/finma-7b-4bit-quantized"
DATA_URL = ["https://raw.githubusercontent.com/Stock-XAI/Data/refs/heads/main/regression/kospi_10_days_reg_output_.jsonl",
            "https://raw.githubusercontent.com/Stock-XAI/Data/refs/heads/main/regression/nasdaq_10_days_reg_output.jsonl"]

In [4]:
dataset = load_dataset("json", data_files=DATA_URL, split="train")

# 간단한 포맷 확인
print(dataset[0])

# 텍스트 필드를 'prompt'로 추정 (예시로 "instruction", "output" 등의 필드를 하나로 합침)
def format_prompt(example):
    return {
        "text": f"### Input:\n{example['instruction']}\n\n### Output:\n{example['output']}"
    }

dataset = dataset.map(format_prompt)

{'instruction': 'Using the context below, estimate the rate of change in the closing price of 삼성전자 on 2022-03-20.\n    Return the expected value of change as a decimal.\n    \n    Context: date, open, high, low, close, volume, change.\n    2022-01-09, 79400, 79800, 76400, 78300, 79495879, -0.012771\n2022-01-16, 78100, 79600, 77100, 77300, 58155173, -0.021992\n2022-01-23, 77600, 77800, 74700, 75600, 54459256, -0.030423\n2022-01-30, 75400, 75800, 71200, 73300, 88076792, 0.00955\n2022-02-06, 74900, 74900, 73300, 74000, 30474755, 0.012162\n2022-02-13, 73500, 75800, 72400, 74900, 65628066, -0.008011\n2022-02-20, 74400, 75600, 73100, 74300, 57808614, -0.032301\n2022-02-27, 73200, 74300, 71300, 71900, 61507917, -0.005563\n2022-03-06, 71100, 73100, 71000, 71500, 56905485, -0.020979\n2022-03-13, 70000, 71200, 68700, 70000, 71392310, 0.01\n    Answer:', 'output': -0.01273}


In [18]:
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_use_double_quant=True,
    bnb_4bit_compute_dtype=torch.float16
)

tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, use_fast=True)
tokenizer.pad_token = tokenizer.eos_token  # Padding 설정

model = AutoModelForCausalLM.from_pretrained(
    MODEL_ID,
    quantization_config=bnb_config,
    device_map="auto",
    trust_remote_code=True
)

model = prepare_model_for_kbit_training(model)

ValueError: Some modules are dispatched on the CPU or the disk. Make sure you have enough GPU RAM to fit the quantized model. If you want to dispatch the model on the CPU or the disk while keeping these modules in 32-bit, you need to set `llm_int8_enable_fp32_cpu_offload=True` and pass a custom `device_map` to `from_pretrained`. Check https://huggingface.co/docs/transformers/main/en/main_classes/quantization#offload-between-cpu-and-gpu for more details. 

In [9]:
from transformers import AutoTokenizer, AutoModelForCausalLM
from peft import PeftModel, PeftConfig

# 양자화 설정
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_use_double_quant=True,
    bnb_4bit_compute_dtype=torch.float16
)

# 토크나이저 불러오기
tokenizer = AutoTokenizer.from_pretrained("./finma-7b-lora", use_fast=True)
tokenizer.pad_token = tokenizer.eos_token

# LoRA base 모델 로드
config = PeftConfig.from_pretrained("./finma-7b-lora")
base_model = AutoModelForCausalLM.from_pretrained(
    MODEL_ID,
    quantization_config=bnb_config,
    device_map="auto",
    trust_remote_code=True
)

# LoRA 가중치 적용
model = PeftModel.from_pretrained(base_model, "./finma-7b-lora")

model = prepare_model_for_kbit_training(model)

In [10]:
lora_config = LoraConfig(
    r=32,
    lora_alpha=64,
    target_modules=["q_proj", "v_proj", "k_proj", "o_proj", "gate_proj", "up_proj", "down_proj"],
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM"
)

model = get_peft_model(model, lora_config)
model.print_trainable_parameters()

trainable params: 79,953,920 || all params: 6,818,369,536 || trainable%: 1.1726


In [11]:
def tokenize_fn(example):
    return tokenizer(example["text"], truncation=True, padding="max_length", max_length=512)

tokenized_dataset = dataset.map(tokenize_fn, remove_columns=["text"])

from transformers import DataCollatorForLanguageModeling
data_collator = DataCollatorForLanguageModeling(tokenizer, mlm=False)

In [12]:
training_args = TrainingArguments(
    output_dir="./finma-7b-lora",
    per_device_train_batch_size=2,
    gradient_accumulation_steps=8,
    logging_steps=100,
    num_train_epochs=3,
    learning_rate=2e-4,
    lr_scheduler_type="cosine",
    warmup_ratio=0.1,
    fp16=True,
    gradient_checkpointing=True,
    save_strategy="epoch",
    save_total_limit=2,
    save_safetensors=True,
    report_to="none",
    optim="paged_adamw_8bit",
    torch_compile=False
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_dataset,
    tokenizer=tokenizer,
    data_collator=data_collator
)

trainer.train()

  trainer = Trainer(
No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


Step,Training Loss
100,1.2131
200,0.8442
300,0.8211
400,0.7927
500,0.7883
600,0.7614
700,0.7666
800,0.7626
900,0.7447
1000,0.7543


TrainOutput(global_step=3222, training_loss=0.4983611863862647, metrics={'train_runtime': 49868.4175, 'train_samples_per_second': 1.035, 'train_steps_per_second': 0.065, 'total_flos': 1.0594630917936906e+18, 'train_loss': 0.4983611863862647, 'epoch': 2.9991859518548667})

In [3]:
DATA_URL = ["https://raw.githubusercontent.com/Stock-XAI/Data/refs/heads/main/regression/kospi_10_weeks_reg_output_.jsonl",
            "https://raw.githubusercontent.com/Stock-XAI/Data/refs/heads/main/regression/nasdaq_10_weeks_reg_output.jsonl",
            "https://raw.githubusercontent.com/Stock-XAI/Data/refs/heads/main/regression/kospi_10_months_reg_output_.jsonl",
            "https://raw.githubusercontent.com/Stock-XAI/Data/refs/heads/main/regression/nasdaq_10_months_reg_output.jsonl"]

In [None]:
DATA_URL = ["https://raw.githubusercontent.com/Stock-XAI/Data/refs/heads/main/regression/kospi_10_months_reg_output_.jsonl",
            "https://raw.githubusercontent.com/Stock-XAI/Data/refs/heads/main/regression/nasdaq_10_months_reg_output.jsonl"]

In [13]:
model.save_pretrained("./finma-7b-lora")
tokenizer.save_pretrained("./finma-7b-lora")

('./finma-7b-lora\\tokenizer_config.json',
 './finma-7b-lora\\special_tokens_map.json',
 './finma-7b-lora\\tokenizer.model',
 './finma-7b-lora\\added_tokens.json',
 './finma-7b-lora\\tokenizer.json')

In [None]:
!pip install -q finance-datareader

[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/48.2 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m48.2/48.2 kB[0m [31m2.0 MB/s[0m eta [36m0:00:00[0m
[?25h

In [15]:
import FinanceDataReader as fdr
from datetime import datetime, timedelta

# 날짜 설정
today = datetime.today()
start_date = (today - timedelta(days=14)).strftime('%Y-%m-%d')

# 삼성전자 데이터 로딩
# df = fdr.DataReader("005930", start=start_date)
df = fdr.DataReader("000660", start=start_date)

# 수익률 계산 (전일 대비 종가 변화율)
df['Change'] = df['Close'].pct_change().fillna(0) * 100

# 최근 10일 데이터만 추출
last_10 = df.tail(10)

# context 문자열 생성
context = "\n".join([
    f"{idx.strftime('%Y-%m-%d')}, {int(row['Open'])}, {int(row['High'])}, {int(row['Low'])}, "
    f"{int(row['Close'])}, {int(row['Volume'])}, {row['Change']:.2f}"
    for idx, row in last_10.iterrows()
])

In [16]:
prompt = (
    "Using the context below, estimate the rate of change in the closing price of SK하이닉스 on 2025-05-24.\n"
    "Return the expected value of change as a decimal.\n\n"
    "Context: date, open, high, low, close, volume, change.\n"
    f"{context}\n\nAnswer:"
)
print(prompt)

Using the context below, estimate the rate of change in the closing price of SK하이닉스 on 2025-05-24.
Return the expected value of change as a decimal.

Context: date, open, high, low, close, volume, change.
2025-05-27, 200500, 203500, 200000, 202500, 1424916, 0.00
2025-05-28, 206500, 209500, 206000, 208000, 3161188, 2.72
2025-05-29, 214500, 214500, 207500, 212000, 3827689, 1.92
2025-05-30, 210000, 210000, 203000, 204500, 4826628, -3.54
2025-06-02, 205000, 208500, 203000, 207500, 1775752, 1.47
2025-06-04, 218000, 223000, 216500, 217500, 5108674, 4.82
2025-06-05, 225500, 230000, 222000, 224500, 5768506, 3.22
2025-06-09, 233000, 233500, 228000, 229000, 3666084, 2.00
2025-06-10, 231000, 232500, 227500, 230500, 3217557, 0.66

Answer:


In [17]:
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
from peft import PeftModel
import torch

base_model_path = "capston-team-5/finma-7b-4bit-quantized"  # 4bit 양자화 기반 모델
lora_model_path = "./finma-7b-lora"  # fine-tuned LoRA 모델 경로

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.float16
)

# tokenizer & base model
tokenizer = AutoTokenizer.from_pretrained(base_model_path)
model = AutoModelForCausalLM.from_pretrained(
    base_model_path,
    quantization_config=bnb_config,
    device_map="auto"
)

# LoRA 적용
model = PeftModel.from_pretrained(model, lora_model_path)
model.eval()



ValueError: Some modules are dispatched on the CPU or the disk. Make sure you have enough GPU RAM to fit the quantized model. If you want to dispatch the model on the CPU or the disk while keeping these modules in 32-bit, you need to set `llm_int8_enable_fp32_cpu_offload=True` and pass a custom `device_map` to `from_pretrained`. Check https://huggingface.co/docs/transformers/main/en/main_classes/quantization#offload-between-cpu-and-gpu for more details. 

In [None]:
inputs = tokenizer(prompt, return_tensors="pt").to("cuda")
with torch.no_grad():
    output = model.generate(
        **inputs,
        max_new_tokens=50,
        do_sample=True,
        temperature=0.7,
        top_p=0.95,
        eos_token_id=tokenizer.eos_token_id
    )

generated = tokenizer.decode(output[0], skip_special_tokens=True)
print("\n===== Model Output =====\n")
print(generated.replace(prompt, "").strip())


===== Model Output =====

-0.013746
2025-05-24, 199800, 204000, 199800, 1971


In [14]:
from huggingface_hub import HfApi, HfFolder
from transformers import AutoTokenizer
from peft import PeftModel

# 업로드할 디렉토리 (LoRA 가중치가 저장된 경로)
lora_output_dir = "./finma-7b-lora"

# Hugging Face에 생성할 모델 이름
repo_name = "finma-7b-lora-regression-v2"  # 원하는 이름으로 변경 가능
hf_username = "capston-team-5"     # 본인의 HF 유저 이름으로 변경

# ✅ 토크나이저 저장 (선택적)
tokenizer.save_pretrained(lora_output_dir)

# ✅ 허깅페이스에 업로드
api = HfApi()

# 리포지토리 생성 (이미 존재하면 생략 가능)
api.create_repo(
    repo_id=f"{hf_username}/{repo_name}",
    private=True  # public으로 만들고 싶다면 False로 설정
)

# 업로드
from huggingface_hub import upload_folder

upload_folder(
    repo_id=f"{hf_username}/{repo_name}",
    folder_path=lora_output_dir,
    path_in_repo=".",  # 루트에 업로드
    commit_message="Upload LoRA 4bit adapter and tokenizer",
    # ignore_patterns=["*.ipynb", "*.pt", "*.bin"],  # 불필요한 파일 제외
)

Upload 16 LFS files:   0%|          | 0/16 [00:00<?, ?it/s]

adapter_model.safetensors:   0%|          | 0.00/320M [00:00<?, ?B/s]

adapter_model.safetensors:   0%|          | 0.00/320M [00:00<?, ?B/s]

optimizer.pt:   0%|          | 0.00/163M [00:00<?, ?B/s]

scaler.pt:   0%|          | 0.00/988 [00:00<?, ?B/s]

rng_state.pth:   0%|          | 0.00/14.2k [00:00<?, ?B/s]

scheduler.pt:   0%|          | 0.00/1.06k [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/500k [00:00<?, ?B/s]

training_args.bin:   0%|          | 0.00/5.30k [00:00<?, ?B/s]

adapter_model.safetensors:   0%|          | 0.00/320M [00:00<?, ?B/s]

optimizer.pt:   0%|          | 0.00/163M [00:00<?, ?B/s]

rng_state.pth:   0%|          | 0.00/14.2k [00:00<?, ?B/s]

scaler.pt:   0%|          | 0.00/988 [00:00<?, ?B/s]

scheduler.pt:   0%|          | 0.00/1.06k [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/500k [00:00<?, ?B/s]

training_args.bin:   0%|          | 0.00/5.30k [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/500k [00:00<?, ?B/s]

CommitInfo(commit_url='https://huggingface.co/capston-team-5/finma-7b-lora-regression-v2/commit/d1659fd9e7fa801c7804cacfdea42875e56005f4', commit_message='Upload LoRA 4bit adapter and tokenizer', commit_description='', oid='d1659fd9e7fa801c7804cacfdea42875e56005f4', pr_url=None, repo_url=RepoUrl('https://huggingface.co/capston-team-5/finma-7b-lora-regression-v2', endpoint='https://huggingface.co', repo_type='model', repo_id='capston-team-5/finma-7b-lora-regression-v2'), pr_revision=None, pr_num=None)