feat: new llama model (#356)

IBM · May 20, 2024 · e8ecae9 · e8ecae9
1 parent b54f36d
commit e8ecae9
Show file tree

Hide file tree

Showing 17 changed files with 163 additions and 160 deletions.
diff --git a/examples/extensions/huggingface/huggingface_agent.py b/examples/extensions/huggingface/huggingface_agent.py
@@ -49,7 +49,7 @@ def __call__(self):
 
 agent = IBMGenAIAgent(
     client=client,
-    model="meta-llama/llama-2-70b-chat",
+    model="meta-llama/llama-3-70b-instruct",
     parameters=TextGenerationParameters(min_new_tokens=10, max_new_tokens=200, random_seed=777, temperature=0),
     additional_tools=[BitcoinPriceFetcher()],
 )

diff --git a/examples/extensions/langchain/langchain_agent.py b/examples/extensions/langchain/langchain_agent.py
@@ -78,7 +78,7 @@ def _run(self, word: str, run_manager: Optional[CallbackManagerForToolRun] = Non
 client = Client(credentials=Credentials.from_env())
 llm = LangChainChatInterface(
     client=client,
-    model_id="meta-llama/llama-2-70b-chat",
+    model_id="meta-llama/llama-3-70b-instruct",
     parameters=TextGenerationParameters(
         max_new_tokens=250, min_new_tokens=20, temperature=0, stop_sequences=["\nObservation"]
     ),

diff --git a/examples/extensions/langchain/langchain_chat_generate.py b/examples/extensions/langchain/langchain_chat_generate.py
@@ -27,7 +27,7 @@ def heading(text: str) -> str:
 
 llm = LangChainChatInterface(
     client=Client(credentials=Credentials.from_env()),
-    model_id="meta-llama/llama-2-70b-chat",
+    model_id="meta-llama/llama-3-70b-instruct",
     parameters=TextGenerationParameters(
         decoding_method=DecodingMethod.SAMPLE,
         max_new_tokens=100,

diff --git a/examples/extensions/langchain/langchain_chat_stream.py b/examples/extensions/langchain/langchain_chat_stream.py
@@ -21,7 +21,7 @@ def heading(text: str) -> str:
 print(heading("Stream chat with langchain"))
 
 llm = LangChainChatInterface(
-    model_id="meta-llama/llama-2-70b-chat",
+    model_id="meta-llama/llama-3-70b-instruct",
     client=Client(credentials=Credentials.from_env()),
     parameters=TextGenerationParameters(
         decoding_method=DecodingMethod.SAMPLE,

diff --git a/examples/extensions/langchain/langchain_sql_agent.py b/examples/extensions/langchain/langchain_sql_agent.py
@@ -75,7 +75,7 @@ def create_llm():
     client = Client(credentials=Credentials.from_env())
     return LangChainChatInterface(
         client=client,
-        model_id="meta-llama/llama-2-70b-chat",
+        model_id="meta-llama/llama-3-70b-instruct",
         parameters=TextGenerationParameters(
             max_new_tokens=250, min_new_tokens=20, temperature=0, stop_sequences=["\nObservation"]
         ),

diff --git a/examples/extensions/llama_index/llama_index_llm.py b/examples/extensions/llama_index/llama_index_llm.py
@@ -22,7 +22,7 @@ def heading(text: str) -> str:
 
 llm = IBMGenAILlamaIndex(
     client=client,
-    model_id="meta-llama/llama-2-70b-chat",
+    model_id="meta-llama/llama-3-70b-instruct",
     parameters=TextGenerationParameters(
         decoding_method=DecodingMethod.SAMPLE,
         max_new_tokens=100,

diff --git a/examples/text/chat.py b/examples/text/chat.py
@@ -31,7 +31,7 @@ def heading(text: str) -> str:
 )
 
 client = Client(credentials=Credentials.from_env())
-model_id = "meta-llama/llama-2-70b-chat"
+model_id = "meta-llama/llama-3-70b-instruct"
 
 prompt = "What is NLP and how it has evolved over the years?"
 print(heading("Generating a chat response"))

diff --git a/src/genai/extensions/langchain/chat_llm.py b/src/genai/extensions/langchain/chat_llm.py
@@ -93,7 +93,7 @@ class LangChainChatInterface(BaseChatModel):
         client = Client(credentials=Credentials.from_env())
         llm = LangChainChatInterface(
             client=client,
-            model_id="meta-llama/llama-2-70b-chat",
+            model_id="meta-llama/llama-3-70b-instruct",
             parameters=TextGenerationParameters(
                 max_new_tokens=250,
             )

diff --git a/src/genai/extensions/langchain/llm.py b/src/genai/extensions/langchain/llm.py
@@ -62,7 +62,7 @@ class LangChainInterface(LLM):
         client = Client(credentials=Credentials.from_env())
         llm = LangChainInterface(
             client=client,
-            model_id="meta-llama/llama-2-70b-chat",
+            model_id="meta-llama/llama-3-70b-instruct",
             parameters=TextGenerationParameters(max_new_tokens=50)
         )
 

diff --git a/src/genai/text/chat/chat_generation_service.py b/src/genai/text/chat/chat_generation_service.py
@@ -79,7 +79,7 @@ def create(
 
             # Create a new conversation
             response = client.text.chat.create(
-                model_id="meta-llama/llama-2-70b-chat",
+                model_id="meta-llama/llama-3-70b-instruct",
                 messages=[HumanMessage(content="Describe the game Chess?")],
                 parameters=TextGenerationParameters(max_token_limit=100)
             )
@@ -152,7 +152,7 @@ def create_stream(
 
             # Create a new conversation
             for response in client.text.chat.create_stream(
-                    model_id="meta-llama/llama-2-70b-chat",
+                    model_id="meta-llama/llama-3-70b-instruct",
                     messages=[HumanMessage(content="Describe the game Chess?")],
                     parameters=TextGenerationParameters(max_token_limit=100)
                 ):

diff --git a/...egration/extensions/cassettes/test_huggingface_agent/TestHuggingfaceAgent.test_agent.yaml b/...egration/extensions/cassettes/test_huggingface_agent/TestHuggingfaceAgent.test_agent.yaml
@@ -12,22 +12,22 @@ interactions:
     uri: https://api.com/v2/text/generation/limits?version=2023-11-22
   response:
     body:
-      string: '{"result":{"concurrency":{"limit":200,"remaining":200}}}'
+      string: '{"result":{"concurrency":{"limit":10,"remaining":10}}}'
     headers:
       cache-control:
       - private
       content-length:
-      - '56'
+      - '54'
       content-type:
       - application/json; charset=utf-8
       content-version:
       - '2023-11-22'
       date:
-      - Wed, 20 Mar 2024 08:27:00 GMT
+      - Mon, 20 May 2024 14:12:17 GMT
       keep-alive:
       - timeout=72
       set-cookie:
-      - 2eef5f4c257f6bca76e8da5586743beb=1e3545705d3737525c7629e9f28dc93d; path=/;
+      - 2eef5f4c257f6bca76e8da5586743beb=de04e502e3969a930842cae695f31f86; path=/;
         HttpOnly; Secure; SameSite=None
       vary:
       - accept-encoding
@@ -75,47 +75,41 @@ interactions:
       `src_lang`, which should be the language of the text to translate and `tgt_lang`,
       which should be the language for the desired ouput language. Both `src_lang`
       and `tgt_lang` are written in plain English, such as ''Romanian'', or ''Albanian''.
-      It returns the text translated in `tgt_lang`.\n- image_transformer: This is
-      a tool that transforms an image according to a prompt. It takes two inputs:
-      `image`, which should be the image to transform, and `prompt`, which should
-      be the prompt to use to change it. The prompt should only contain descriptive
-      adjectives, as if completing the prompt of the original image. It returns the
-      modified image.\n- text_downloader: This is a tool that downloads a file from
-      a `url`. It takes the `url` as input, and returns the text contained in the
-      file.\n- image_generator: This is a tool that creates an image according to
-      a prompt, which is a text description. It takes an input named `prompt` which
-      contains the image description and outputs an image.\n- video_generator: This
-      is a tool that creates a video according to a text description. It takes an
-      input named `prompt` which contains the image description, as well as an optional
-      input `seconds` which will be the duration of the video. The default is of two
-      seconds. The tool outputs a video object.\n\n\nTask: \"Answer the question in
-      the variable `question` about the image stored in the variable `image`. The
-      question is in French.\"\n\nI will use the following tools: `translator` to
-      translate the question into English and then `image_qa` to answer the question
-      on the input image.\n\nAnswer:\n```py\ntranslated_question = translator(question=question,
-      src_lang=\"French\", tgt_lang=\"English\")\nprint(f\"The translated question
-      is {translated_question}.\")\nanswer = image_qa(image=image, question=translated_question)\nprint(f\"The
-      answer is {answer}\")\n```\n\nTask: \"Identify the oldest person in the `document`
-      and create an image showcasing the result.\"\n\nI will use the following tools:
-      `document_qa` to find the oldest person in the document, then `image_generator`
-      to generate an image according to the answer.\n\nAnswer:\n```py\nanswer = document_qa(document,
-      question=\"What is the oldest person?\")\nprint(f\"The answer is {answer}.\")\nimage
-      = image_generator(answer)\n```\n\nTask: \"Generate an image using the text given
-      in the variable `caption`.\"\n\nI will use the following tool: `image_generator`
-      to generate an image.\n\nAnswer:\n```py\nimage = image_generator(prompt=caption)\n```\n\nTask:
-      \"Summarize the text given in the variable `text` and read it out loud.\"\n\nI
-      will use the following tools: `summarizer` to create a summary of the input
-      text, then `text_reader` to read it out loud.\n\nAnswer:\n```py\nsummarized_text
-      = summarizer(text)\nprint(f\"Summary: {summarized_text}\")\naudio_summary =
-      text_reader(summarized_text)\n```\n\nTask: \"Answer the question in the variable
-      `question` about the text in the variable `text`. Use the answer to generate
-      an image.\"\n\nI will use the following tools: `text_qa` to create the answer,
-      then `image_generator` to generate an image according to the answer.\n\nAnswer:\n```py\nanswer
-      = text_qa(text=text, question=question)\nprint(f\"The answer is {answer}.\")\nimage
-      = image_generator(answer)\n```\n\nTask: \"Caption the following `image`.\"\n\nI
-      will use the following tool: `image_captioner` to generate a caption for the
-      image.\n\nAnswer:\n```py\ncaption = image_captioner(image)\n```\n\nTask: \"Summarize
-      the chat\"\n\nI will use the following", "model_id": "meta-llama/llama-2-70b",
+      It returns the text translated in `tgt_lang`.\n- image_transformation: This
+      is a tool that transforms an image according to a prompt and returns the modified
+      image.\n- text_downloader: This is a tool that downloads a file from a `url`.
+      It takes the `url` as input, and returns the text contained in the file.\n-
+      image_generator: This is a tool that creates an image according to a prompt,
+      which is a text description.\n- video_generator: This is a tool that creates
+      a video according to a text description. It takes an optional input `seconds`
+      which will be the duration of the video. The default is of two seconds. The
+      tool outputs a video object.\n\n\nTask: \"Answer the question in the variable
+      `question` about the image stored in the variable `image`. The question is in
+      French.\"\n\nI will use the following tools: `translator` to translate the question
+      into English and then `image_qa` to answer the question on the input image.\n\nAnswer:\n```py\ntranslated_question
+      = translator(question=question, src_lang=\"French\", tgt_lang=\"English\")\nprint(f\"The
+      translated question is {translated_question}.\")\nanswer = image_qa(image=image,
+      question=translated_question)\nprint(f\"The answer is {answer}\")\n```\n\nTask:
+      \"Identify the oldest person in the `document` and create an image showcasing
+      the result.\"\n\nI will use the following tools: `document_qa` to find the oldest
+      person in the document, then `image_generator` to generate an image according
+      to the answer.\n\nAnswer:\n```py\nanswer = document_qa(document, question=\"What
+      is the oldest person?\")\nprint(f\"The answer is {answer}.\")\nimage = image_generator(answer)\n```\n\nTask:
+      \"Generate an image using the text given in the variable `caption`.\"\n\nI will
+      use the following tool: `image_generator` to generate an image.\n\nAnswer:\n```py\nimage
+      = image_generator(prompt=caption)\n```\n\nTask: \"Summarize the text given in
+      the variable `text` and read it out loud.\"\n\nI will use the following tools:
+      `summarizer` to create a summary of the input text, then `text_reader` to read
+      it out loud.\n\nAnswer:\n```py\nsummarized_text = summarizer(text)\nprint(f\"Summary:
+      {summarized_text}\")\naudio_summary = text_reader(summarized_text)\n```\n\nTask:
+      \"Answer the question in the variable `question` about the text in the variable
+      `text`. Use the answer to generate an image.\"\n\nI will use the following tools:
+      `text_qa` to create the answer, then `image_generator` to generate an image
+      according to the answer.\n\nAnswer:\n```py\nanswer = text_qa(text=text, question=question)\nprint(f\"The
+      answer is {answer}.\")\nimage = image_generator(answer)\n```\n\nTask: \"Caption
+      the following `image`.\"\n\nI will use the following tool: `image_captioner`
+      to generate a caption for the image.\n\nAnswer:\n```py\ncaption = image_captioner(image)\n```\n\nTask:
+      \"Summarize the chat\"\n\nI will use the following", "model_id": "meta-llama/llama-2-70b",
       "parameters": {"max_new_tokens": 500, "stop_sequences": ["Task:"]}}'
     headers:
       accept:
@@ -125,30 +119,24 @@ interactions:
       connection:
       - keep-alive
       content-length:
-      - '6535'
+      - '6135'
       content-type:
       - application/json
     method: POST
     uri: https://api.com/v2/text/generation?version=2024-03-19
   response:
     body:
-      string: '{"id":"230e435f-6c47-46bd-b0f2-ddc56c834dee","model_id":"meta-llama/llama-2-70b","created_at":"2024-03-20T08:27:06.828Z","results":[{"generated_text":"
-        tools: `document_qa` to identify the oldest person, then `image_captioner`
-        to generate a caption about that person, I will then generate an image from
-        that caption using `image_generator`, and finally `doc_to_text` and `text_reader`
-        to read the generated text. The list of tools will be executed one after the
-        other.\n\nAnswer:\n```py\nanswer = document_qa(document, question=\"What is
-        the oldest person?\")\ncaption = image_captions(answer)\nimage = image_generator(prompt=caption)\noutput
-        = summary_of_document(document)\n```\n\nTask:","generated_token_count":142,"input_token_count":1580,"stop_reason":"stop_sequence","stop_sequence":"Task:","seed":3264333442}]}'
+      string: '{"id":"22234f91-ef73-4721-9c85-04a22aad9296","model_id":"meta-llama/llama-2-70b","created_at":"2024-05-20T14:12:25.703Z","results":[{"generated_text":"
+        tool: `summarizer` to generate a summary of the chat.\n\nAnswer:\n```py\n))","generated_token_count":25,"input_token_count":1490,"stop_reason":"eos_token","seed":3150004800}]}'
     headers:
       content-length:
-      - '817'
+      - '327'
       content-type:
       - application/json; charset=utf-8
       content-version:
       - '2024-03-19'
       date:
-      - Wed, 20 Mar 2024 08:27:06 GMT
+      - Mon, 20 May 2024 14:12:25 GMT
       keep-alive:
       - timeout=72
       vary:

diff --git a/...ation/extensions/cassettes/test_langchain_chat/TestLangChainChat.test_async_generate.yaml b/...ation/extensions/cassettes/test_langchain_chat/TestLangChainChat.test_async_generate.yaml
@@ -8,7 +8,8 @@ interactions:
       explain why instead of answering something incorrectly.\n    If you don''t know
       the answer to a question, please don''t share false information.\n    ", "role":
       "system"}, {"content": "What is NLP and how it has evolved over the years?",
-      "role": "user"}], "model_id": "meta-llama/llama-2-70b-chat", "parameters": {}}'
+      "role": "user"}], "model_id": "meta-llama/llama-3-70b-instruct", "parameters":
+      {}}'
     headers:
       accept:
       - '*/*'
@@ -17,28 +18,29 @@ interactions:
       connection:
       - keep-alive
       content-length:
-      - '730'
+      - '734'
       content-type:
       - application/json
     method: POST
     uri: https://api.com/v2/text/chat?version=2024-03-19
   response:
     body:
-      string: '{"id":"1b962f25-87c0-4b96-9c77-739bd9f5b2f6","model_id":"meta-llama/llama-2-70b-chat","created_at":"2024-03-20T08:27:23.202Z","results":[{"generated_text":"  NLP
-        (Natural Language Processing) refers to the branch of Artificial Intelligence","generated_token_count":20,"input_token_count":160,"stop_reason":"max_tokens","seed":683510637}],"conversation_id":"adc37c54-87cc-43ad-bf50-16013eec263f"}'
+      string: '{"id":"01f28b95-5ecd-4a01-9a2b-7803c6db824f","model_id":"meta-llama/llama-3-70b-instruct","created_at":"2024-05-20T14:12:28.750Z","results":[{"generated_text":"Natural
+        Language Processing (NLP) is a subfield of artificial intelligence (AI) that
+        deals with","generated_token_count":20,"input_token_count":134,"stop_reason":"max_tokens","seed":1025128500}],"conversation_id":"90edb70b-c4e7-45f5-81c3-fab231227b7a"}'
     headers:
       content-length:
-      - '395'
+      - '412'
       content-type:
       - application/json; charset=utf-8
       content-version:
       - '2024-03-19'
       date:
-      - Wed, 20 Mar 2024 08:27:23 GMT
+      - Mon, 20 May 2024 14:12:28 GMT
       keep-alive:
       - timeout=72
       set-cookie:
-      - 2eef5f4c257f6bca76e8da5586743beb=1e3545705d3737525c7629e9f28dc93d; path=/;
+      - 2eef5f4c257f6bca76e8da5586743beb=fad6ffcbebbca45726eaa14ee11d2c44; path=/;
         HttpOnly; Secure; SameSite=None
       vary:
       - accept-encoding

diff --git a/...integration/extensions/cassettes/test_langchain_chat/TestLangChainChat.test_generate.yaml b/...integration/extensions/cassettes/test_langchain_chat/TestLangChainChat.test_generate.yaml
@@ -8,7 +8,8 @@ interactions:
       explain why instead of answering something incorrectly.\n    If you don''t know
       the answer to a question, please don''t share false information.\n    ", "role":
       "system"}, {"content": "What is NLP and how it has evolved over the years?",
-      "role": "user"}], "model_id": "meta-llama/llama-2-70b-chat", "parameters": {}}'
+      "role": "user"}], "model_id": "meta-llama/llama-3-70b-instruct", "parameters":
+      {}}'
     headers:
       accept:
       - '*/*'
@@ -17,29 +18,29 @@ interactions:
       connection:
       - keep-alive
       content-length:
-      - '730'
+      - '734'
       content-type:
       - application/json
     method: POST
     uri: https://api.com/v2/text/chat?version=2024-03-19
   response:
     body:
-      string: '{"id":"fb9c6250-62c1-412e-beb3-e8db76569817","model_id":"meta-llama/llama-2-70b-chat","created_at":"2024-03-20T08:27:17.855Z","results":[{"generated_text":"  NLP,
-        or Natural Language Processing, is a subfield of artificial intelligence that
-        deals","generated_token_count":20,"input_token_count":160,"stop_reason":"max_tokens","seed":3913660195}],"conversation_id":"a8512cf7-652f-42cc-9471-ff4996aecef9"}'
+      string: '{"id":"d8160c0f-36a3-49a0-b001-a3f9cbdb9643","model_id":"meta-llama/llama-3-70b-instruct","created_at":"2024-05-20T14:12:27.354Z","results":[{"generated_text":"NLP,
+        or Natural Language Processing, is a subfield of artificial intelligence (AI)
+        that deals","generated_token_count":20,"input_token_count":134,"stop_reason":"max_tokens","seed":2032949123}],"conversation_id":"bcc5d917-5e64-48e0-9047-ac5e032e8f21"}'
     headers:
       content-length:
-      - '403'
+      - '410'
       content-type:
       - application/json; charset=utf-8
       content-version:
       - '2024-03-19'
       date:
-      - Wed, 20 Mar 2024 08:27:17 GMT
+      - Mon, 20 May 2024 14:12:27 GMT
       keep-alive:
       - timeout=72
       set-cookie:
-      - 2eef5f4c257f6bca76e8da5586743beb=c7a0964ef13502a09f12e4a9f37d8d7f; path=/;
+      - 2eef5f4c257f6bca76e8da5586743beb=fad6ffcbebbca45726eaa14ee11d2c44; path=/;
         HttpOnly; Secure; SameSite=None
       vary:
       - accept-encoding

diff --git a/tests/integration/extensions/test_langchain_chat.py b/tests/integration/extensions/test_langchain_chat.py
@@ -18,7 +18,7 @@
 @pytest.mark.integration
 class TestLangChainChat:
     def setup_method(self):
-        self.model_id = "meta-llama/llama-2-70b-chat"
+        self.model_id = "meta-llama/llama-3-70b-instruct"
 
     @pytest.fixture
     def parameters(self):