In [1]:
import requests

# Define your API endpoint and API key
api_url = "http://127.0.0.1:5000/v1/completions"
api_key = "1cd8eaf71b54f5d12e64510bdbe0a008"

# Set up headers
headers = {
    "Content-Type": "application/json",
    "Authorization": f"Bearer {api_key}"
}

# Define your inference payload
html_content = """<!DOCTYPE html>
<html lang="en">
<head>
    <meta charset="UTF-8">
    <meta name="viewport" content="width=device-width, initial-scale=1.0">
    <title>Innovatech Solutions Inc.</title>
</head>
<body>
    <header>
        <h1>Innovatech Solutions Inc.</h1>
        <p><strong>Category:</strong> Technology & Software Development</p>
        <p><strong>Country of Origin:</strong> Canada</p>
    </header>

    <section id="description">
        <h2>About Us</h2>
        <p>Innovatech Solutions specializes in custom software, AI integration, cloud solutions, and digital transformation services tailored for businesses aiming for accelerated growth and operational efficiency.</p>
    </section>

    <section id="office-locations">
        <h2>Our Offices</h2>
        <ul>
            <li>Toronto, Canada</li>
            <li>Vancouver, Canada</li>
            <li>Berlin, Germany</li>
        </ul>
    </section>

    <section id="staff">
        <h2>Meet Our Team</h2>
        
        <div class="staff-member">
            <h3>Dr. Emily Porter</h3>
            <p><strong>Position:</strong> Chief Technology Officer (CTO)</p>
            <p><strong>Email:</strong> eporter@innovatech.ca</p>
            <p><strong>Phone:</strong> +1-416-555-0145</p>
            <p><strong>Social Media:</strong>
                <a href="https://linkedin.com/in/emilyporter">LinkedIn</a>,
                <a href="https://twitter.com/emilyporter">Twitter</a>
            </p>
        </div>

        <div class="staff-member">
            <h3>Mark Reynolds</h3>
            <p><strong>Position:</strong> Lead AI Engineer</p>
            <p><strong>Email:</strong> mreynolds@innovatech.ca</p>
            <p><strong>Phone:</strong> +1-604-555-0221</p>
            <p><strong>Social Media:</strong>
                <a href="https://linkedin.com/in/markreynolds">LinkedIn</a>
            </p>
        </div>

        <div class="staff-member">
            <h3>Sophia Khan</h3>
            <p><strong>Position:</strong> Head of Digital Transformation</p>
            <p><strong>Email:</strong> skhan@innovatech.ca</p>
            <p><strong>Phone:</strong> +49-30-555-0877</p>
            <p><strong>Social Media:</strong>
                <a href="https://linkedin.com/in/sophiakhan">LinkedIn</a>,
                <a href="https://twitter.com/sophiakhan">Twitter</a>,
                <a href="https://instagram.com/sophiakhan">Instagram</a>
            </p>
        </div>
    </section>
</body>
</html>
"""
sql_schema = """CREATE TABLE businesses (
    business_id SERIAL PRIMARY KEY,
    name VARCHAR(255) UNIQUE NOT NULL,
    description TEXT,
    category VARCHAR(100),
    country_of_origin VARCHAR(100)
);

CREATE TABLE office_locations (
    location_id SERIAL PRIMARY KEY,
    business_id INTEGER REFERENCES businesses(business_id),
    city VARCHAR(100),
    country VARCHAR(100)
);

CREATE TABLE staff (
    staff_id SERIAL PRIMARY KEY,
    business_id INTEGER REFERENCES businesses(business_id),
    full_name VARCHAR(100),
    position VARCHAR(100),
    email VARCHAR(100),
    phone VARCHAR(30)
);

CREATE TABLE staff_social_media (
    social_id SERIAL PRIMARY KEY,
    staff_id INTEGER REFERENCES staff(staff_id),
    platform VARCHAR(50),
    url VARCHAR(255)
);
"""

# prompt = f"""You are tasked to meticulously analyze the provided HTML webpage content, identifying and extracting relevant business and professional staff details to create valid SQL insert commands.\n
# Extract precisely:\n
# Business name, description, category, country of origin.\n
# Each office location city and country.\n
# Each staff member’s name, position, email, phone, and social media accounts.\n
# Follow the given SQL schema strictly and only provide a ready-to-execute PostgreSQL command. Ensure accuracy and consistency; the command should execute correctly without modifications from the first attempt.\n
# Please provide only one executable PostgreSQL command. Nothing else.\n
# HTML Content:\n
# {html_content}\n
# SQL Schema:\n
# {sql_schema}"""

prompt = f"""
You are given HTML webpage content. Extract the details precisely:

- Business name, description, category, country of origin.
- Each office location city and country.
- Staff member details: name, position, email, phone, and social media accounts.

Assume the PostgreSQL tables (`businesses`, `office_locations`, `staff`, `staff_social_media`) are already created with the following schema:

{sql_schema}

Do NOT include any CREATE TABLE commands. Assume `business_id` and `staff_id` are SERIAL PRIMARY KEYS, retrieved using RETURNING clauses in PostgreSQL.

Provide exactly ONE executable PostgreSQL transaction (wrapped in BEGIN; ... COMMIT;) containing only valid INSERT commands, precisely formatted and ready to run through a Python script executing SQL commands. No additional explanations or formatting outside the SQL code.

HTML Content:
{html_content}
"""

# # print(prompt)
# data = {
#     "prompt": prompt,
#     "max_tokens": 2048,
#     "temperature": 0.1,
#     "top_p": 0.8,
#     "top_k": 20,
#     "frequency_penalty": 0.05,
#     "presence_penalty": 0.05,
#     "stream": False
# }

# # Send POST request to the API
# response = requests.post(api_url, headers=headers, json=data)

# # Check and print the response
# if response.status_code == 200:
#     result = response.json()
#     generated_text = result['choices'][0]['text']
#     print("Generated Text:\n", generated_text)
# else:
#     print("Error:", response.status_code, response.text)


In [2]:
import os
import json
import requests
from pydantic import BaseModel

# Keep your singleton definition as it is
class Singleton(type):
    _instances = {}
    def __call__(cls, *args, **kwargs):
        if cls not in cls._instances:
            cls._instances[cls] = super(Singleton, cls).__call__(*args, **kwargs)
        return cls._instances[cls]

# Define your class for handling LLM interactions
class LLMWrapper(metaclass=Singleton):
    def __init__(self, config_file="ollama_config.json"):
        self.loaded = False
        self.config_data = self.load_config(config_file)
        self.load_response = self.load_model(self.config_data["LLM"])
        if self.load_response.status_code == 200:
            self.loaded = True

    def load_config(self, config_file):
        with open(config_file, 'r') as f:
            config_data = json.load(f)
        for key, value in config_data.items():
            os.environ[key] = str(value)
        return config_data

    def load_model(self, model_name: str):
        url = self.config_data["LOAD_MODEL_API_PATH"]
        payload = {"model": model_name}
        response = requests.post(url, json=payload)
        return response

    def run(self, prompt: str, chat=True):
        if self.loaded:
            url = self.config_data["CHAT_API_PATH"] if chat else self.config_data["GENERATE_API_PATH"]
            payload = {
                "model": self.config_data["LLM"],
                "messages": [{"role": "user", "content": prompt}],
                "stream": False,
                "options": {"temperature": self.config_data["TEMPERATURE"]}
            }
            response = requests.post(url, json=payload)
            if response.ok:
                return response.json()
            else:
                print(f"Request failed: {response.status_code} - {response.text}")
                return None
        else:
            print("Model not loaded.")
            return None

# Example of using the above class
llm = LLMWrapper()

print(f"Prompt: {prompt}")
response = llm.run(prompt)
if response:
    generated_sql = response['message']['content']
    print(f"Generated SQL: {generated_sql}")
else:
    print("Failed to get response from LLM.")

Prompt: 
You are given HTML webpage content. Extract the details precisely:

- Business name, description, category, country of origin.
- Each office location city and country.
- Staff member details: name, position, email, phone, and social media accounts.

Assume the PostgreSQL tables (`businesses`, `office_locations`, `staff`, `staff_social_media`) are already created with the following schema:

CREATE TABLE businesses (
    business_id SERIAL PRIMARY KEY,
    name VARCHAR(255) UNIQUE NOT NULL,
    description TEXT,
    category VARCHAR(100),
    country_of_origin VARCHAR(100)
);

CREATE TABLE office_locations (
    location_id SERIAL PRIMARY KEY,
    business_id INTEGER REFERENCES businesses(business_id),
    city VARCHAR(100),
    country VARCHAR(100)
);

CREATE TABLE staff (
    staff_id SERIAL PRIMARY KEY,
    business_id INTEGER REFERENCES businesses(business_id),
    full_name VARCHAR(100),
    position VARCHAR(100),
    email VARCHAR(100),
    phone VARCHAR(30)
);

CREATE TAB

In [None]:
# prompt = f"""You are tasked to meticulously analyze the provided HTML webpage content, identifying and extracting relevant business and professional staff details to create valid SQL insert commands.\n
# Extract precisely:\n
# Business name, description, category, country of origin.\n
# Each office location city and country.\n
# Each staff member’s name, position, email, phone, and social media accounts.\n
# Follow the given SQL schema strictly and only provide a ready-to-execute PostgreSQL command. Ensure accuracy and consistency; the command should execute correctly without modifications from the first attempt.\n
# Please provide only one executable PostgreSQL command. Nothing else.\n
# HTML Content:\n
# {html_content}\n
# SQL Schema:\n
# {sql_schema}"""

# prompt = f"""
# You are given HTML webpage content. Extract the details precisely:

# - Business name, description, category, country of origin.
# - Each office location city and country.
# - Staff member details: name, position, email, phone, and social media accounts.

# Assume the PostgreSQL tables (`businesses`, `office_locations`, `staff`, `staff_social_media`) are already created with the following schema:

# {sql_schema}

# Do NOT include any CREATE TABLE commands. Assume `business_id` and `staff_id` are SERIAL PRIMARY KEYS, retrieved using RETURNING clauses in PostgreSQL.

# Provide exactly ONE executable PostgreSQL transaction (wrapped in BEGIN; ... COMMIT;) containing only valid INSERT commands, precisely formatted and ready to run through a Python script executing SQL commands. No additional explanations or formatting outside the SQL code.

# HTML Content:
# {html_content}
# """

# prompt = f"""
# You are given HTML content of a business webpage. Extract ONLY the following information precisely:

# - Business Name
# - Description
# - Category
# - Country of Origin
# - Office Locations (City, Country)
# - Staff Members:
#     - Full Name
#     - Position
#     - Email
#     - Phone
#     - Social Media (platform and URLs)

# Assume the PostgreSQL tables below are ALREADY CREATED, and DO NOT attempt to create them again:

# {sql_schema}

# Produce EXACTLY ONE PostgreSQL transaction (use BEGIN; ... COMMIT;) containing ONLY valid INSERT commands ready to execute directly in PostgreSQL. 

# Use PostgreSQL's RETURNING clause properly to retrieve generated primary keys for foreign key relations. Assume the use of variables (`business_id`, `staff_id`) for referencing foreign keys clearly.

# Do NOT include Python, explanations, analysis, or any text except the executable SQL commands.

# HTML Content:
# {html_content}
# """

# prompt = f"""
# Extract the following from the given HTML:

# - Business (name, description, category, country_of_origin)
# - Office locations (city, country)
# - Staff (full_name, position, email, phone)
# - Staff social media (platform, url)

# Generate ONLY PostgreSQL INSERT statements wrapped strictly within one transaction (BEGIN; ... COMMIT;) assuming the following schema already exists:

# {sql_schema}

# Strict instructions:
# - NEVER write CREATE TABLE commands.
# - Use RETURNING to get foreign keys.
# - No placeholders; assume correct PostgreSQL syntax with variables.
# - No explanations, code samples, HTML, Python, or analysis—ONLY SQL commands.

# HTML content:
# {html_content}
# """

In [None]:
from vllm import LLM

class SingletonLLM:
    _instance = None

    def __new__(cls, *args, **kwargs):
        if cls._instance is None:
            cls._instance = LLM(*args, **kwargs)
        return cls._instance

llm = SingletonLLM(
    model="mistralai/Mistral-7B-v0.1",
    gpu_memory_utilization=0.97,
    max_model_len=16384, # 12,288 words
    max_num_seqs=2,
    cpu_offload_gb=4,
    swap_space=8
)


  from .autonotebook import tqdm as notebook_tqdm


INFO 05-03 15:55:19 [__init__.py:239] Automatically detected platform cuda.


2025-05-03 15:55:20,826	INFO util.py:154 -- Missing packages: ['ipywidgets']. Run `pip install -U ipywidgets`, then restart the notebook server for rich notebook output.


INFO 05-03 15:55:25 [config.py:717] This model supports multiple tasks: {'score', 'embed', 'reward', 'generate', 'classify'}. Defaulting to 'generate'.
INFO 05-03 15:55:25 [config.py:2003] Chunked prefill is enabled with max_num_batched_tokens=8192.


  self.tokenizer = get_tokenizer(self.tokenizer_id, **tokenizer_config)


INFO 05-03 15:55:26 [core.py:58] Initializing a V1 LLM engine (v0.8.5) with config: model='mistralai/Mistral-7B-v0.1', speculative_config=None, tokenizer='mistralai/Mistral-7B-v0.1', skip_tokenizer_init=False, tokenizer_mode=auto, revision=None, override_neuron_config=None, tokenizer_revision=None, trust_remote_code=False, dtype=torch.bfloat16, max_seq_len=16384, download_dir=None, load_format=LoadFormat.AUTO, tensor_parallel_size=1, pipeline_parallel_size=1, disable_custom_all_reduce=False, quantization=None, enforce_eager=False, kv_cache_dtype=auto,  device_config=cuda, decoding_config=DecodingConfig(guided_decoding_backend='auto', reasoning_backend=None), observability_config=ObservabilityConfig(show_hidden_metrics=False, otlp_traces_endpoint=None, collect_model_forward_time=False, collect_model_execute_time=False), seed=None, served_model_name=mistralai/Mistral-7B-v0.1, num_scheduler_steps=1, multi_step_stream_outputs=True, enable_prefix_caching=True, chunked_prefill_enabled=True, 

Loading safetensors checkpoint shards:   0% Completed | 0/2 [00:00<?, ?it/s]
Loading safetensors checkpoint shards:  50% Completed | 1/2 [00:02<00:02,  2.80s/it]
Loading safetensors checkpoint shards: 100% Completed | 2/2 [00:06<00:00,  3.52s/it]
Loading safetensors checkpoint shards: 100% Completed | 2/2 [00:06<00:00,  3.41s/it]



INFO 05-03 16:00:31 [loader.py:458] Loading weights took 6.94 seconds
INFO 05-03 16:00:31 [gpu_model_runner.py:1347] Model loading took 9.4341 GiB and 304.282915 seconds
INFO 05-03 16:00:35 [backends.py:420] Using cache directory: /home/mohammed/.cache/vllm/torch_compile_cache/f9c33428ba/rank_0_0 for vLLM's torch.compile
INFO 05-03 16:00:35 [backends.py:430] Dynamo bytecode transform time: 3.25 s
INFO 05-03 16:00:37 [backends.py:118] Directly load the compiled graph(s) for shape None from the cache, took 1.842 s
INFO 05-03 16:00:37 [monitor.py:33] torch.compile takes 3.25 s in total
INFO 05-03 16:03:52 [kv_cache_utils.py:634] GPU KV cache size: 37,152 tokens
INFO 05-03 16:03:52 [kv_cache_utils.py:637] Maximum concurrency for 16,384 tokens per request: 2.27x
INFO 05-03 16:12:10 [gpu_model_runner.py:1686] Graph capturing finished in 498 secs, took 0.51 GiB
INFO 05-03 16:12:13 [core.py:159] init engine (profile, create kv cache, warmup model) took 701.77 seconds
INFO 05-03 16:12:13 [core_

In [2]:
print(type(llm))

print(llm)

<class 'vllm.entrypoints.llm.LLM'>
<vllm.entrypoints.llm.LLM object at 0x7ed859199110>


In [None]:
import os
import json
import requests
from pydantic import BaseModel

class LLMOutput(BaseModel):
  content: str
  stage: int

class Singleton(type):
    _instances = {}
    def __call__(cls, *args, **kwargs):
        if cls not in cls._instances:
            cls._instances[cls] = super(Singleton, cls).__call__(*args, **kwargs)
        return cls._instances[cls]
        
class llm_wrap(metaclass=Singleton):
    def __init__(self, config_file="ollama_config.json"):
        self.loaded = False
        self.config_data = self.load_config(config_file)
        self.load_response = self.load_model(self.config_data["LLM"])
        if self.load_response.status_code == 200: self.loaded = True
        # self.output_schema = LLMOutput.model_json_schema()

    def load_config(self, config_file):
        with open(config_file, 'r') as f:
            config_data = json.load(f)
        for key, value in config_data.items():
            os.environ[key] = str(value)
        return config_data

    def load_model(self, model_name: str):
        url = self.config_data["LOAD_MODEL_API_PATH"]
        payload = {
            "model": model_name,
            "stream": True 
        }

        response = requests.post(url, json=payload)

        return response
    
    def run(self, prompt: str, chat = True):
        return self.process_prompt(prompt, chat)

    def process_prompt(self, prompt: str, chat = True):
        if self.loaded:
            url = self.config_data["CHAT_API_PATH"] if chat else self.config_data["GENERATE_API_PATH"]
            payload = {
                        "model": self.config_data["LLM"],
                        "messages": prompt,
                        # "messages": [
                        #     {
                        #     "role": "user",
                        #     "content": prompt
                        #     }
                        # ],
                        "stream": False,
                        "options": {
                                    "temperature": self.config_data["TEMPERATURE"]
                                    },
                        # "format": self.output_schema
                        }
            response = requests.post(url, json=payload)

            return response
        else:
            print("Failed to load the model")
            return False

    # def process_prompt(self, prompt: str, chat = True):
    #     if self.loaded:
    #         url = self.config_data["CHAT_API_PATH"] if chat else self.config_data["GENERATE_API_PATH"]
    #         payload = {
    #                     "model": self.config_data["LLM"],
    #                     "messages": prompt,
    #                     # "messages": [
    #                     #     {
    #                     #     "role": "user",
    #                     #     "content": prompt
    #                     #     }
    #                     # ],
    #                     "stream": True,
    #                     "options": {
    #                                 "temperature": self.config_data["TEMPERATURE"]
    #                                 },
    #                     # "format": self.output_schema
    #                     }
    #         response = requests.post(url, json=payload, stream=True)
    #         complete_message = ""
    #         for line in response.iter_lines(decode_unicode=True):
    #             if line:  
    #                 try:
    #                     chunk = json.loads(line)
    #                     complete_message += chunk["message"]["content"]
    #                     yield complete_message
    #                 except json.JSONDecodeError as e:
    #                     print("Could not decode chunk:", line)
    #                     return False

    #         # return response
    #     else:
    #         print("Failed to load the model")
    #         return False

In [None]:
# import numpy as np


# class Singleton(type):
#     _instances = {}
#     def __call__(cls, *args, **kwargs):
#         if cls not in cls._instances:
#             cls._instances[cls] = super(Singleton, cls).__call__(*args, **kwargs)
#         return cls._instances[cls]

# class handle_prompt(metaclass=Singleton):
#     def __init__(self, cache_size = 512):
#         self.llm = llm_wrap()
#         self.cache_size = cache_size

#     def get_llm_output(self, prompt, chat = True):
#         if len(self.cache) > self.cache_size:
#             self.cache.popitem(last=False)

#         self.embedding = embedding_wrap().run(prompt)
#         if self.embedding: self.embedding = np.array(self.embedding)
#         else: self.llm.run(prompt, chat = chat)

#         self.embedding = self.process_embedding(self.embedding)
#         if self.embedding in self.cache:
#             self.cache.move_to_end(self.embedding)
#             return [self.cache[self.embedding]]
        
#         return self.llm.run(prompt, chat = chat)
    

In [None]:
# with open(".env", "r") as f:
#     for line in f:
#         key, value = line.strip().split("=")
#         os.environ[key] = value

# llm_model = os.environ["LLM_VERSION"]

In [None]:
def search_serper(search_query):
    url = "https://google.serper.dev/search"
    
    payload = json.dumps({
        "q": search_query,
        "gl": "is", 
        "num": 1,
        "tbs": "qdr:d"
    })

    headers = {
        'X-API-KEY': os.environ["SERPER_API_KEY"],
        'Content-Type': 'application/json'
    }

    response = requests.request("POST", url, headers=headers, data=payload)
    results = json.loads(response.text)
    results_list = results['organic']

    all_results = []
    for id, result in enumerate(results_list, 1):
        result_dict = {
            'title': result['title'],
            'link': result['link'],
            'snippet': result['snippet'],
            'search_term': search_query,
            'id': id
        }
        all_results.append(result_dict)
    return all_results
