In [1]:
! pip install xinference
!pip install typing-extensions --upgrade
!pip install tiktoken
! pip install transformers_stream_generator
! pip install accelerate
! pip install transformers
! pip install openai

Collecting xinference
  Downloading xinference-0.9.1-py3-none-any.whl (21.7 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m21.7/21.7 MB[0m [31m9.5 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting xoscar>=0.2.1 (from xinference)
  Downloading xoscar-0.2.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (4.1 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m4.1/4.1 MB[0m [31m13.7 MB/s[0m eta [36m0:00:00[0m
Collecting gradio>=3.39.0 (from xinference)
  Downloading gradio-4.20.0-py3-none-any.whl (17.0 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m17.0/17.0 MB[0m [31m33.7 MB/s[0m eta [36m0:00:00[0m
Collecting fastapi (from xinference)
  Downloading fastapi-0.110.0-py3-none-any.whl (92 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m92.1/92.1 kB[0m [31m13.8 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting uvicorn (from xinference)
  Downloading uvicorn-0.27.1-py3-none-any.whl (60 kB)
[2K   

In [5]:
## Start Local Server

!nohup xinference-local  > xinference.log 2>&1 &

In [6]:
!xinference --help

Usage: xinference [OPTIONS] COMMAND [ARGS]...

  Xinference command-line interface for serving and deploying models.

Options:
  -v, --version       Show the current version of the Xinference tool.
  --log-level TEXT    Set the logger level. Options listed from most log to
                      (Default level is INFO)
  -H, --host TEXT     Specify the host address for the Xinference server.
  -p, --port INTEGER  Specify the port number for the Xinference server.
  --help              Show this message and exit.

Commands:
  chat           Chat with a running LLM.
  generate       Generate text using a running LLM.
  launch         Launch a model with the Xinference framework with the...
  list           List all running models in Xinference.
  login          Login when the cluster is authenticated.
  register       Registers a new model with Xinference for deployment.
  registrations  Lists all registered models in Xinference.
  terminate      Terminate a deployed model through unique 

In [7]:
# ! xinference launch -u my-llm --model-name qwen-chat --size-in-billions 1_8 --model-format pytorch
# ! xinference launch -u my-llm --model-name qwen-chat --size-in-billions 7 --model-format pytorch
# ! xinference launch -u my-llm --model-name qwen-chat --size-in-billions 14 --model-format pytorch

! xinference launch -u my-llm --model-name qwen1.5-chat --size-in-billions 7 --model-format pytorch



Launch model name: qwen1.5-chat with kwargs: {}
Model uid: my-llm


In [8]:
import json
import difflib
from IPython.display import display, Markdown, Latex
import openai



In [9]:
file_path = "/content/Applied_Materials_technical_glossary.json"
with open(file_path) as f:
    data = json.load(f)

In [10]:
## test local LLM
deployment_name = "my-llm"
messages=[
    {
        "role": "user",
        "content": "Who are you?"
    }
]

client = openai.Client(api_key="empty", base_url=f"http://0.0.0.0:9997/v1")
client.chat.completions.create(
    model=deployment_name,
    messages=messages,
)

ChatCompletion(id='chatd88baedc-db8f-11ee-aeec-0242ac1c000c', choices=[Choice(finish_reason='stop', index=0, logprobs=None, message=ChatCompletionMessage(content="I am a large language model created by Alibaba Cloud, known as Qwen. I'm designed to assist and communicate with users like you in various tasks, such as answering questions, generating text, or helping with problem-solving. How can I assist you today?", role='assistant', function_call=None, tool_calls=None))], created=1709712135, model='my-llm', object='chat.completion', system_fingerprint=None, usage=CompletionUsage(completion_tokens=52, prompt_tokens=23, total_tokens=75))

## tools using


In [11]:
## define a searching tool
tools = [{
    "type": "function",
    "function": {
        "name": "find_semiconductor_term_definition",
        "description": "Finds definitions for semiconductor manufacturing-related terms using fuzzy matching.",
        "parameters": {
            "type": "object",
            "properties": {
                "query": {
                    "type": "string",
                    "description": "The term to query for a definition."
                }
            },
            "required": ["query"]
        }
    }
}]


def find_term_definition(query, cutoff=0.6):

    terms_dict = data
    # 使用difflib找到最接近的匹配项
    matches = difflib.get_close_matches(query.upper(), terms_dict.keys(), n=1, cutoff=cutoff)
    if matches:
        # 如果找到匹配项，返回匹配项的名词和解释
        return json.dumps({matches[0]:terms_dict[matches[0]]})
    else:
        # 如果没有找到匹配项，返回提示信息
        return json.dumps({None:"No close match found."})



In [12]:
from tenacity import retry, wait_random_exponential, stop_after_attempt

@retry(wait=wait_random_exponential(multiplier=1, max=40), stop=stop_after_attempt(3))
def chat_completion_request(messages, tools=None, tool_choice=None, model=deployment_name):
    try:
        response = client.chat.completions.create(
            model=model,
            messages=messages,
            tools=tools,
            tool_choice=tool_choice,
        )
        return response
    except Exception as e:
        print("Unable to generate ChatCompletion response")
        print(f"Exception: {e}")
        return e


def chat_completion_with_function_execution(messages, tools=[None]):
    """This function makes a ChatCompletion API call with the option of adding tools"""
    response = chat_completion_request(messages, tools=tools, tool_choice="auto")
    full_message = response.choices[0]
    # print(full_message)
    if full_message.finish_reason == "tool_calls":
        print("Function generation requested, calling function")
        return call_term_definition_function(messages, full_message)
    else:
        print("Function not required, responding to user")
        return response

def call_term_definition_function(messages, full_message):
    """Function calling function which executes function calls when the model believes it is necessary."""
    if full_message.message.tool_calls[-1].function.name == "find_semiconductor_term_definition":
        try:
            parsed_output = json.loads(full_message.message.tool_calls[-1].function.arguments)
            print("Finding term definition")
            results = find_term_definition(parsed_output["query"])
            print(results)
        except Exception as e:
            print(parsed_output)
            print("Function execution failed")
            print(f"Error message: {e}")
        # print(full_message.message.tool_calls[-1])
        messages.append({
            "role": "tool",
            "tool_call_id": full_message.message.tool_calls[-1].id,
            "name": full_message.message.tool_calls[-1].function.name,
            "content": str(results),
        })
        try:
            print("Got term definition, completing chat")
            # print(messages)
            response = chat_completion_request(messages, tools=tools, tool_choice="auto")
            # print(response.choices[0].message.content)
            return response
        except Exception as e:
            print(type(e))
            raise Exception("Function chat request failed")
    else:
        raise Exception("Function does not exist and cannot be called")



In [13]:
system_prompt = """
You are an AI assistant specializing in the semiconductor manufacturing industry. Your task is to respond to user inquiries related to this field.

For any industry-specific technical terms or jargon you are unsure about, you have access to reference materials and tools to look them up and ensure accurate responses.

You are an AI assistant specializing in the semiconductor manufacturing industry. Your task is to respond to user inquiries related to this field.

For any industry-specific technical terms or jargon you are unsure about, you have access to reference materials and tools to look them up and ensure accurate responses.
"""

def search(query):
  messages = []
  messages.append({"role": "system", "content": system_prompt})
  messages.append({"role": "user", "content": query})

  response = chat_completion_with_function_execution(messages, tools=tools)

  return response

In [14]:
response = search("hi")
display(Markdown(response.choices[0].message.content))

Function not required, responding to user


Hello! How can I assist you with information or inquiries related to the semiconductor manufacturing industry? If you have any specific questions, feel free to ask.

In [15]:
response = search("what is wafer?")
display(Markdown(response.choices[0].message.content))

Function generation requested, calling function
Finding term definition
{"WAFER": "The thin, circular or nearly square slices of mono- or multicrystalline silicon on which semiconductors and PV cells are built."}
Got term definition, completing chat


A wafer is a thin, flat piece of semiconductor material, typically made from mono- or multicrystalline silicon, used as the basis for the production of integrated circuits (ICs) and photovoltaic cells in the semiconductor industry. These wafers are usually circular but sometimes rectangular, and they go through various manufacturing processes before being cut into individual chips.

In [16]:
response = search("what is CD-SEM?")
display(Markdown(response.choices[0].message.content))

Function generation requested, calling function
Finding term definition
{"CD-TE": "A category of thin-film solar cells that uses a cadmium-tellurium compound as the light-converting active layer."}
Got term definition, completing chat


CD-SEM, if it refers to "Cross-Disciplinary Semiconductor Etching," would likely refer to a process used in semiconductor manufacturing where different techniques from both electronics and material science are combined to etch or remove layers during the fabrication of integrated circuits. However, without additional context, it's important to clarify to get a precise definition. In the case of "CD-TE," it refers to Cadmium-Telluride Thin-Film Solar Cells, a type of photovoltaic technology.