In [1]:
!pip install -q -U torch datasets transformers tensorflow langchain playwright html2text sentence_transformers faiss-cpu
!pip install -q accelerate==0.21.0 peft==0.4.0 bitsandbytes==0.40.2 trl==0.4.7

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m755.5/755.5 MB[0m [31m1.6 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m536.6/536.6 kB[0m [31m42.6 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m8.4/8.4 MB[0m [31m41.4 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m475.2/475.2 MB[0m [31m2.9 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m815.9/815.9 kB[0m [31m46.9 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m37.4/37.4 MB[0m [31m35.7 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m132.8/132.8 kB[0m [31m16.8 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m17.6/17.6 MB[0m [31m35.6 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━

In [2]:
import os
import torch
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    pipeline
)
from datasets import load_dataset
from peft import LoraConfig, PeftModel

from langchain.text_splitter import CharacterTextSplitter
from langchain.document_transformers import Html2TextTransformer
from langchain.document_loaders import AsyncChromiumLoader

from langchain.embeddings.huggingface import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS

from langchain.prompts import PromptTemplate
from langchain.schema.runnable import RunnablePassthrough
from langchain.llms import HuggingFacePipeline
from langchain.chains import LLMChain

In [15]:
#################################################################
# Tokenizer
#################################################################

model_name='maywell/Synatra-7B-v0.3-base'

tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"

#################################################################
# bitsandbytes parameters
#################################################################

# Activate 4-bit precision base model loading
use_4bit = True

# Compute dtype for 4-bit base models
bnb_4bit_compute_dtype = "float16"

# Quantization type (fp4 or nf4)
bnb_4bit_quant_type = "nf4"

# Activate nested quantization for 4-bit base models (double quantization)
use_nested_quant = False

#################################################################
# Set up quantization config
#################################################################
compute_dtype = getattr(torch, bnb_4bit_compute_dtype)

bnb_config = BitsAndBytesConfig(
    load_in_4bit=use_4bit,
    bnb_4bit_quant_type=bnb_4bit_quant_type,
    bnb_4bit_compute_dtype=compute_dtype,
    bnb_4bit_use_double_quant=use_nested_quant,
)

# Check GPU compatibility with bfloat16
if compute_dtype == torch.float16 and use_4bit:
    major, _ = torch.cuda.get_device_capability()
    if major >= 8:
        print("=" * 80)
        print("Your GPU supports bfloat16: accelerate training with bf16=True")
        print("=" * 80)

#################################################################
# Load pre-trained config
#################################################################
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    quantization_config=bnb_config,
)

tokenizer_config.json:   0%|          | 0.00/1.64k [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/493k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/174 [00:00<?, ?B/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


config.json:   0%|          | 0.00/622 [00:00<?, ?B/s]

pytorch_model.bin.index.json:   0%|          | 0.00/23.9k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/2 [00:00<?, ?it/s]

pytorch_model-00001-of-00002.bin:   0%|          | 0.00/9.94G [00:00<?, ?B/s]

pytorch_model-00002-of-00002.bin:   0%|          | 0.00/4.54G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/137 [00:00<?, ?B/s]

In [16]:
def print_number_of_trainable_model_parameters(model):
    trainable_model_params = 0
    all_model_params = 0
    for _, param in model.named_parameters():
        all_model_params += param.numel()
        if param.requires_grad:
            trainable_model_params += param.numel()
    return f"trainable model parameters: {trainable_model_params}\nall model parameters: {all_model_params}\npercentage of trainable model parameters: {100 * trainable_model_params / all_model_params:.2f}%"

print(print_number_of_trainable_model_parameters(model))

trainable model parameters: 262426624
all model parameters: 3752087552
percentage of trainable model parameters: 6.99%


In [17]:
text_generation_pipeline = pipeline(
    model=model,
    tokenizer=tokenizer,
    task="text-generation",
    temperature=0.2,
    repetition_penalty=1.1,
    return_full_text=True,
    max_new_tokens=1000,
)

In [18]:
ko_llm = HuggingFacePipeline(pipeline=text_generation_pipeline)

In [7]:
!playwright install
!playwright install-deps

Downloading Chromium 121.0.6167.57 (playwright build v1097)[2m from https://playwright.azureedge.net/builds/chromium/1097/chromium-linux.zip[22m
[1G152.8 MiB [] 0% 10.2s[0K[1G152.8 MiB [] 0% 49.3s[0K[1G152.8 MiB [] 0% 32.1s[0K[1G152.8 MiB [] 0% 19.8s[0K[1G152.8 MiB [] 0% 14.6s[0K[1G152.8 MiB [] 0% 12.5s[0K[1G152.8 MiB [] 1% 11.0s[0K[1G152.8 MiB [] 1% 8.9s[0K[1G152.8 MiB [] 1% 7.9s[0K[1G152.8 MiB [] 2% 7.1s[0K[1G152.8 MiB [] 2% 6.8s[0K[1G152.8 MiB [] 2% 6.9s[0K[1G152.8 MiB [] 3% 6.7s[0K[1G152.8 MiB [] 3% 6.4s[0K[1G152.8 MiB [] 4% 5.9s[0K[1G152.8 MiB [] 5% 5.5s[0K[1G152.8 MiB [] 6% 5.4s[0K[1G152.8 MiB [] 6% 5.2s[0K[1G152.8 MiB [] 7% 4.9s[0K[1G152.8 MiB [] 7% 4.8s[0K[1G152.8 MiB [] 8% 4.6s[0K[1G152.8 MiB [] 8% 4.5s[0K[1G152.8 MiB [] 9% 4.4s[0K[1G152.8 MiB [] 10% 4.2s[0K[1G152.8 MiB [] 11% 4.1s[0K[1G152.8 MiB [] 11% 3.9s[0K[1G152.8 MiB [] 12% 3.8s[0K[1G152.8 MiB [] 13% 3.7s[0K[1G152.8 MiB [] 14% 3.6s[0K[1G152.8 MiB [] 15% 3.5s[0

In [19]:
import nest_asyncio
nest_asyncio.apply()

# Articles to index
articles = ["https://www.motivewith.com/ko/blog/types-and-characteristics-of-seismic-isolation-devices",
            "https://namu.wiki/w/%EB%82%B4%EC%A7%84%EC%84%A4%EA%B3%84",
            "https://ko.wikipedia.org/wiki/%EC%B2%A0%EA%B3%A8_%EA%B5%AC%EC%A1%B0",]
# Scrapes the blogs above
loader = AsyncChromiumLoader(articles)
docs = loader.load()

In [20]:
# Converts HTML to plain text
html2text = Html2TextTransformer()
docs_transformed = html2text.transform_documents(docs)

# Chunk text
text_splitter = CharacterTextSplitter(chunk_size=100,
                                      chunk_overlap=0)
chunked_documents = text_splitter.split_documents(docs_transformed)

# Load chunked documents into the FAISS index
db = FAISS.from_documents(chunked_documents,
                          HuggingFaceEmbeddings(model_name='sentence-transformers/all-mpnet-base-v2'))

retriever = db.as_retriever()



In [21]:
prompt_template = """
### [INST] Instruction: 건축 및 도배에 대한 지식을 기반으로 아래의 질문에 대답하세요. 다음에 주어진 컨텍스트를 참고해보세요.:

{context}

### QUESTION:
{question} [/INST]
 """

# Create prompt from prompt template
prompt = PromptTemplate(
    input_variables=["context", "question"],
    template=prompt_template,
)

# Create llm chain
llm_chain = LLMChain(llm=ko_llm, prompt=prompt)

In [26]:
llm_chain.invoke({"context": "", "question": "면진 장치가 뭐야?"})

Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.


{'context': '',
 'question': '면진 장치가 뭐야?',
 'text': '\nA. 건물 내부에서 사용되는 조명 장치\nB. 건물 외부에서 사용되는 조명 장치\nC. 건물 내부와 외부에서 모두 사용되는 조명 장치\nD. 건물 내부나 외부에서 사용되지 않는 조명 장치\n\n정답은? C. 건물 내부와 외부에서 모두 사용되는 조명 장치'}

In [27]:
rag_chain = (
 {"context": retriever, "question": RunnablePassthrough()}
    | llm_chain
)

result = rag_chain.invoke("면진 장치가 뭐야?")

Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.


In [28]:
result['context']

[Document(page_content='그러면 일반적인 고무제품과 적층형 고무받침의 차이점에는 어떤 것이 있을까?\n\n가장 중요한 차이점은 크기이며 볼륨이다.', metadata={'source': 'https://www.motivewith.com/ko/blog/types-and-characteristics-of-seismic-isolation-devices'}),
 Document(page_content='우리나라에서 디스크 받침으로 알려지고 널리 보급된 면진제품도 있다.', metadata={'source': 'https://www.motivewith.com/ko/blog/types-and-characteristics-of-seismic-isolation-devices'}),
 Document(page_content='인장강도, 기계적 강도, 경도가 매우 높으나 취성파괴가 있어 펜들럼의 마찰재로서는 적당하지 않다고 규명되었다.', metadata={'source': 'https://www.motivewith.com/ko/blog/types-and-characteristics-of-seismic-isolation-devices'}),
 Document(page_content='이 위키백과에서 언어 링크는 문서 제목 건너편의 문서 최상단에 있습니다. 최상단으로 이동합니다.\n\n검색\n\n검색\n\n  * 계정 만들기\n  * 로그인\n\n개인 도구', metadata={'source': 'https://ko.wikipedia.org/wiki/%EC%B2%A0%EA%B3%A8_%EA%B5%AC%EC%A1%B0'})]

In [29]:
print(result['text'])


면진 장치는 지진 등 진동을 줄여주는 역할을 하는 고무 또는 플라스틱 장치입니다. 이 장치는 건물이나 기타 구조물에 설치되어 지진 발생 시 진동을 완화하는데 사용됩니다. 면진 장치는 일반적으로 고무 또는 플라스틱으로 만들어져 있으며, 이는 진동을 흡수하고 전달하는 능력이 좋기 때문에 효과적입니다.
