In [1]:
from dotenv import load_dotenv
import os
from transformers import AutoModel
import torch
from huggingface_hub import notebook_login, login
from transformers import AutoTokenizer, AutoModelForCausalLM, StoppingCriteria

load_dotenv()
login(token=os.getenv("HF_TOKEN"))
local_dir = "models/transformers/"
#device = "cuda" if torch.cuda.is_available() else "cpu"
#torch.device(device)

model = AutoModel.from_pretrained("jinaai/jina-embeddings-v4", cache_dir=local_dir , trust_remote_code=True, dtype = torch.float16, device_map = "cuda", offload_buffers=True)


model_name = "VietnamAIHub/Vietnamese_llama2_7B_8K_SFT_General_domain"

## Loading Base LLaMa model weight and Merge with Adapter Weight wiht the base model
m = AutoModelForCausalLM.from_pretrained(
    model_name,
    load_in_8bit=True,
    llm_int8_enable_fp32_cpu_offload=True,
    torch_dtype=torch.float16,  
    pretraining_tp=1,
    # use_auth_token=True,
    # trust_remote_code=True,
    offload_folder="offload",
    cache_dir=local_dir,
    device_map="auto",
    offload_buffers=True,
)

tok = AutoTokenizer.from_pretrained(
    model_name,
    cache_dir=local_dir,
    padding_side="right",
    use_fast=False,
)
tok.bos_token_id = 1
stop_token_ids = [0]

class StopOnTokens(StoppingCriteria):
    def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor, **kwargs) -> bool:
        for stop_id in stop_token_ids:
            if input_ids[0][-1] == stop_id:
                return True
        return False

generation_config = dict(
        temperature=0.2,
        top_k=20,
        top_p=0.9,
        do_sample=True,
        num_beams=1,
        repetition_penalty=1.2,
        max_new_tokens=200,
        early_stopping=True,

    )

Note: Environment variable`HF_TOKEN` is set and is the current active token independently from the token you've just configured.
`torch_dtype` is deprecated! Use `dtype` instead!


Fetching 2 files:   0%|          | 0/2 [00:00<?, ?it/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Fetching 2 files:   0%|          | 0/2 [00:00<?, ?it/s]

`torch_dtype` is deprecated! Use `dtype` instead!
The `load_in_4bit` and `load_in_8bit` arguments are deprecated and will be removed in the future versions. Please, pass a `BitsAndBytesConfig` object in `quantization_config` argument instead.


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

The following generation flags are not valid and may be ignored: ['temperature', 'top_p']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
Some parameters are on the meta device because they were offloaded to the cpu.


tokenizer.model:   0%|          | 0.00/500k [00:00<?, ?B/s]

To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development


added_tokens.json:   0%|          | 0.00/21.0 [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/322 [00:00<?, ?B/s]

In [2]:
#!/usr/bin/python
from configparser import ConfigParser


def config(filename='database.ini', section='postgresql'):
    # create a parser
    parser = ConfigParser()
    # read config file
    parser.read(filename)

    # get section, default to postgresql
    db = {}
    if parser.has_section(section):
        params = parser.items(section)
        for param in params:
            db[param[0]] = param[1]
    else:
        raise Exception('Section {0} not found in the {1} file'.format(section, filename))

    return db

In [3]:
#!/usr/bin/python
import psycopg2
def connect():
    """ Connect to the PostgreSQL database server """
    conn = None
    try:
        # read connection parameters
        params = config()

        # connect to the PostgreSQL server
        print('Connecting to the PostgreSQL database...')
        conn = psycopg2.connect(**params)

        # create a cursor
        cur = conn.cursor()

        return cur, conn
    except (Exception, psycopg2.DatabaseError) as error:
        print(error)

if __name__ == '__main__':
    cur, conn = connect()

Connecting to the PostgreSQL database...


In [4]:
import numpy as np
import psycopg2
from master.config import config
from langchain_classic.chains import create_retrieval_chain


def read_column(cur):
    cur.execute('select id, name, description, price, image_url from Flower where vector IS NULL')
    if cur is None:
        print('No data found')
    return cur

read_column(cur)

def embedded_column(cur, conn):
    try:
        rows = cur.fetchall()
        print(f"ðŸ§© Found {len(rows)} rows to embed...")
        for row in rows:
            flower_id, name, description, price, image = row

            text = text = f"{name}. {description or ''}. Price: ${price:.2f}"

            text_embedding = model.encode_text(
                texts = text,
                task = "retrieval",
                return_numpy = True,
            )
            text_embedding = text_embedding.squeeze().astype(np.float16).tolist()

            cur.execute('Update Flower Set vector = %s where id = %s', (text_embedding, flower_id))
    except(Exception, psycopg2.DatabaseError) as error:
        print(error)
    finally:
        conn.commit()
        conn.close()
        cur.close()
        print('Database connection closed.')
    return 0

def cosine_similarity(a, b):
    a = np.array(a)
    b = np.array(b)
    return np.dot(a, b.T) / (np.linalg.norm(a, axis=1)[:, None] * np.linalg.norm(b, axis=1))

embedded_column(cur, conn)

ðŸ§© Found 0 rows to embed...
Database connection closed.


0

In [5]:
import ast
def retrieval_vector(query):
    try:
        cur, conn = connect()
        query_embedding = np.array(model.encode_text(
            texts = query,
            task = "retrieval",
            return_numpy = True,
        )).reshape(1, -1)

        cur.execute("select vector from Flower where vector IS NOT NULL")

        rows = cur.fetchall()

        flower_vectors = []
        for row in rows:
            vector = np.array(ast.literal_eval(row[0])).reshape(-1)
            flower_vectors.append(vector)

        flower_vectors = np.array(flower_vectors)

        top_3_index = np.array(np.argpartition(np.array(cosine_similarity(query_embedding, flower_vectors)).flatten(), -3)[-3:]) + 1
        top_3_index = top_3_index[::1]

        result = []
        for index in top_3_index:
            cur.execute("select name, description, price, image_url from Flower where id = %s", (int(index),))
            result.append(cur.fetchone())

        return result
    except(Exception, psycopg2.DatabaseError) as error:
        print(error)
    finally:
        conn.commit()
        conn.close()
        cur.close()
        print('Database connection closed.')

    return 0

In [6]:

# --- Build the context ---
query = "I want to buy some flowers for Thanksgiving Day's with less than 5$"
product_list = retrieval_vector(query)

model.to("cpu")
model_name.to("cuda")
tokenizer = AutoTokenizer.from_pretrained(model_name)
def build_context(products):
    context = "\n".join([
        f"- {p[0]} (${p[2]}): {p[1]}"
        for p in products
    ])
    return context



context = build_context(product_list)
messages = [
    {"role": "system", "content": "You are a helpful flower shop assistant."
                "Provide one Answer ONLY the following query based on the context provided below. "
                "Do not generate or answer any other questions. "
                "Do not make up or infer any information that is not directly stated in the context. "
                "Provide a concise answer."
                f"{context}"},
    {"role": "user", "content": query},
]

response = pipe(messages, max_new_tokens=128)[-1]["generated_text"][-1]["content"]
print(f"Query: \n\t{query}")
print(f"Context: \n\t{context}")
print(f"Answer: \n\t{response}")

Connecting to the PostgreSQL database...


Encoding texts...: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 1/1 [00:10<00:00, 10.50s/it]


Database connection closed.


AttributeError: 'str' object has no attribute 'to'