In [1]:
import os
import sys
import numpy as np

import utils
import config

from llm import llm_wrapper
from embedding import embedder
from chain import rag_chain

import torch
from transformers import pipeline
from langchain_community.llms import HuggingFacePipeline

from matplotlib import pyplot as plt

In [9]:
import importlib
importlib.reload(utils)
importlib.reload(config)
importlib.reload(llm_wrapper)
importlib.reload(embedder)
importlib.reload(rag_chain)

import warnings
warnings.filterwarnings('ignore')

In [3]:
## Initialization of sub-components
my_device = "cuda" if torch.cuda.is_available() else "cpu"

## Dataset Loader
embedder_name = config.EMBED_MODEL
data_dir = config.DATA_DIR
documents = config.DOCUMENTS

# Embedder class, which is responsible to init, create, load dataset
my_embedder = embedder.Embedder(embedder_name, data_dir, documents, chunk_length=512, overlap=32, save_vec_db=True)

doc_index = 0 # the index of the document that we want to do RAG
loaded_doc = my_embedder.pdf_data_loader(doc_index)
cleaned_docs = utils.clean_business_conduct_policy(loaded_doc, n_remove_first_lines=3, n_discard_pages=[1, 2, 20])
chunks = my_embedder.text_splitter(cleaned_docs)
vector_db, retriever = my_embedder.create_vector_database(chunks, top_k_doc=5)

## LLM Loader
base_model_id = config.LLM_MODEL_ID
my_llm_wrapper = llm_wrapper.SmolLLMWrapper(base_model_id, max_length=256, 
                                            temperature=0.3, top_p=0.9, top_k=50, repetition_penalty=1.2, do_sample=True, truncation=True)
llm, llm_tokenizer = my_llm_wrapper.get_llm()

pipe = pipeline("text-generation", model=llm, tokenizer=llm_tokenizer, max_new_tokens=256,
                 temperature=0.3, top_p=0.9, top_k=50, repetition_penalty=1.2, do_sample=True, truncation=True)
llm_hg_pipeline = HuggingFacePipeline(pipeline=pipe)

## RAG Chain Loader
rag_chain_train = rag_chain.RAGChainBuilder(llm_hg_pipeline, retriever)
qa_bot_chain = rag_chain_train.build_chain()

Initializing Embedding Model
--------------------
Initializing LLM Model
--------------------


`config.hidden_act` is ignored, you should use `config.hidden_activation` instead.
Gemma's activation function will be set to `gelu_pytorch_tanh`. Please, use
`config.hidden_activation` if you want to override this behaviour.
See https://github.com/huggingface/transformers/pull/29402 for more details.


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Initializing Retrieval Chain
--------------------


In [4]:
# Sample Test Question with RAG - LangChain Pipeline
question = "What should an employee do if they suspect a policy violation?"

# Perform a query using the QA bot chain
response = qa_bot_chain.run(question)

print(f"{response}")

Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.



        ### Context:
        •  Speak up. If you see or hear of any violation of Apple’s Business Conduct Policy, other Apple policies, or legal or 
regulatory requirements, you must notify either your manager, People Team, Legal, or Business Conduct. 
•  Use good judgment and ask questions. Apply Apple’s principles of business conduct, and review our policies and legal 
requirements. When in doubt about how to proceed, discuss it with your manager, your People Business Partner, Legal, or 
Business Conduct.

•  Reported or participated in the investigation of a potential violation of our policies or the law; or
• Engaged in legally protected activity, including related to leaves of absence or job accommodations, or forming or joining 
(or refraining from joining) labor organizations of an employee’s choice in a lawful manner.

Everything we do is a reflection of Apple. We expect you to:
• Follow the Policy and exhibit appropriate workplace behavior. Comply with the letter and spirit o

In [12]:
# Sample Test Question with RAW LLM. 
# IT MEANS THE OUTPUT IS SOLELY BASED ON WITOUT RAG - LangChain Pipeline
question = "What should an employee do if they suspect a policy violation?"

# Perform a query using pure LLM
raw_llm_templaate = my_llm_wrapper.llm_template(question, include_system=False)

print(f"Question Template (Input of LLM):\n{raw_llm_templaate}")
print("-"*20)

raw_llm_output = my_llm_wrapper.llm_generate_output(raw_llm_templaate)
cleaned_raw_llm_output = utils.clean_gemma_response(raw_llm_output, question)


print(f"LLM Response:\n{cleaned_raw_llm_output}")

Question Template (Input of LLM):
[{'role': 'user', 'content': 'What should an employee do if they suspect a policy violation?'}]
--------------------
LLM Response:
* **Report the violation to their supervisor or manager.**
* **Document the incident.**
* **Gather evidence.**
* **Talk to the person involved.**
* **Follow up with the supervisor or manager.**

**Additional tips for reporting a policy violation:**

* **Be objective and factual in your report.**
* **Provide as much detail as possible.**
* **Be aware of your company's reporting procedures.**
* **Do not make any promises or guarantees.**
* **Be respectful and professional in your communication.**
