In [None]:
# !pip install -q nemollm langchain langchain_openai "openai~=1.9.0" tritonclient[all]
# !cp completions.py /usr/local/lib/python3.10/dist-packages/openai/resources/completions.py

In [3]:
import os

api_model_name = os.environ["OPENAI_MODEL"]
open_ai_base_url = os.environ["OPEN_AI_BASE_URL"]
nemo_llm_base_url = os.environ["NEMO_LLM_BASE_URL"]
max_tokens = 512
temperature = 0.3
repetition_penalty = 1
top_p=1.0
top_k=2
length_penalty=1.
beam_search_diversity_rate=0.
seed = 42
stop = "</s>"

question = "Do mutations causing low HDL-C promote increased carotid intima-media thickness?"
context = "Although observational data support an inverse relationship between high-density lipoprotein (HDL) cholesterol and coronary heart " \
    "disease (CHD), genetic HDL deficiency states often do not correlate with premature CHD. Carotid intima-media thickness (cIMT) " \
    "measurements were obtained in cases comprising 10 different mutations in LCAT, ABCA1 and APOA1 to further evaluate the " \
    "relationship between low HDL resulting from genetic variation and early atherosclerosis. In a 1:2 case-control study of " \
    "sex and age-related (+/-5 y) subjects (n=114), cIMT was nearly identical between cases (0.66+/-0.17 cm) and controls (0.65+/-0.18 cm) " \
    "despite significantly lower HDL cholesterol (0.67 vs. 1.58 mmol/l) and apolipoprotein A-I levels (96.7 vs. 151.4 mg/dl) (P<0.05)."

prompt = f"Context: {context} Question: {question} Answer:"
messages = [{"role": "system", "content": f"Use the context provided to answer the researcher's question."}, 
            {"role": "user", "content": f"question: {question} context: {context}"}]

### NeMo LLM Completion API

Supported Parameters
- prompt
- tokens_to_generate
- temperature
- top_p
- top_k
- stop
- random_seed
- repetition_penalty
- beam_search_diversity_rate
- beam_width
- length_penalty
- logprobs

In [7]:
from nemollm.api import NemoLLM
import json

API_HOST = f"{nemo_llm_base_url}/v1"
API_KEY = "<key>"
ORG_ID = "<org>"

conn = NemoLLM(
  api_key=API_KEY, 
  org_id=ORG_ID, 
  api_host=API_HOST
)

completion_response = conn.generate(
        model=api_model_name,
        prompt=prompt,
        tokens_to_generate=max_tokens,
        temperature=temperature,
        top_p=top_p,
        top_k=top_k,
        stop=[stop],
        repetition_penalty=repetition_penalty,
        beam_search_diversity_rate=beam_search_diversity_rate,
        beam_width=1,
        length_penalty=length_penalty,
    )
completion_response=json.loads(completion_response)
print(completion_response["text"].strip())


No.

This study found that genetic mutations that cause low HDL-C do not lead to increased carotid intima-media thickness (cIMT), which is a marker of early atherosclerosis. Despite having significantly lower HDL-C and apolipoprotein A-I levels, the cases and controls had similar cIMT measurements. This suggests that low HDL-C may not be a reliable indicator of early atherosclerosis in all individuals, and that other factors may be more important in determining an individual's risk for atherosclerosis.


### NeMo LLM Chat API

Supported Parameters
- chat_context
- tokens_to_generate
- temperature
- top_p
- top_k
- stop
- random_seed
- repetition_penalty
- beam_search_diversity_rate
- beam_width
- length_penalty
- logprobs

In [10]:
import json
chat_response = conn.generate_chat(
    model=api_model_name,
    chat_context=messages,
    tokens_to_generate=max_tokens,
    temperature=temperature,
    top_p=top_p,
    top_k=top_k,
    stop=[stop],
    repetition_penalty=repetition_penalty,
    beam_search_diversity_rate=beam_search_diversity_rate,
    beam_width=1,
    length_penalty=length_penalty,
    )
chat_response=json.loads(chat_response)
print(chat_response["chat_context"][0]["content"])

Use the context provided to answer the researcher's question.


### OpenAI Completion API

Supported Parameters
- model
- prompt
- frequency_penalty
- max_tokens
- n = 1
- stop
- stream
- temperature
- top_p
- logprobs

In [11]:
import os
from langchain.prompts import PromptTemplate
from langchain_openai import OpenAI
from langchain.chains import LLMChain
from langchain.callbacks import streaming_stdout

os.environ["OPENAI_API_KEY"] = "local"
os.environ["OPENAI_API_BASE"] = f"{open_ai_base_url}/v1"
os.environ["OPENAI_API_TYPE"] = "open_ai"

template = "Context: {context} Question: {question} Answer:"

prompt = PromptTemplate(template=template, input_variables=["context", "question"])

completion_llm = OpenAI(
    model=api_model_name,
    streaming=False,
    verbose=False,
    max_tokens=max_tokens,
    temperature=temperature,
    model_kwargs={"stop": stop},
)

llm_chain = LLMChain(prompt=prompt, llm=completion_llm)
completion_response = llm_chain.invoke({"question": question, "context": context})
print(completion_response["text"])

BadRequestError: Error code: 400 - {'object': 'error', 'message': "[{'type': 'list_type', 'loc': ('body', 'logit_bias'), 'msg': 'Input should be a valid list', 'input': {}, 'url': 'https://errors.pydantic.dev/2.5/v/list_type'}]", 'type': 'invalid_request_error', 'param': None, 'code': None}

### OpenAI Chat API
Supported Parameters
- messages
- model
- frequency_penalty
- max_tokens
- n = 1
- stop
- stream
- temperature
- top_p
- logprobs

In [None]:
from langchain_openai import ChatOpenAI
from langchain.schema import HumanMessage, SystemMessage

In [None]:
callbacks = [streaming_stdout.StreamingStdOutCallbackHandler()]

chat_llm = ChatOpenAI(
    model=api_model_name,
    callbacks=callbacks,
    streaming=True,
    temperature=temperature,
    max_tokens=max_tokens,
    model_kwargs={"stop": stop},
    )

In [None]:
messages = [
    SystemMessage(
        content="Use the context provided to answer the researcher's question."
    ),
    HumanMessage(
        content=f"{context} {question}"
    ),
]

_ = chat_llm.invoke(messages)

___