https://medium.com/@lokaregns/using-large-language-models-apis-with-python-a-comprehensive-guide-0020a51bf5b6

https://github.com/cheahjs/free-llm-api-resources

##### Initialise

In [2]:
from initialise import *

##### Simple LLM function for "one-shot" prompts
- [Google Gemini](https://aistudio.google.com/app)
- [Mistral AI](https://docs.mistral.ai/getting-started/quickstart/)
- [GitHub Marketplace Models](https://github.com/marketplace/models)

In [None]:
def oneshot_llm(model_name: str, prompt: str, system_message: str = "Answer briefly"):
    
    if model_name == "google":
        
        response = clients[model_name].models.generate_content(
            model = "gemini-2.0-flash", 
            config = genai.types.GenerateContentConfig(system_instruction = system_message),
            contents = prompt
            )
        
        return print(response.text)

    elif model_name == "mistral":
        
        response = clients[model_name].chat.complete(
            model = "mistral-small-latest",
            messages = [{"role": "system", "content": system_message},
                        {"role": "user", "content": prompt}]
            )
        
        return print(response.choices[0].message.content)

    elif model_name == "mistral-large":

        client = Mistral(
            api_key = api_keys["github"],
            server_url = "https://models.inference.ai.azure.com"
        )

        response = client.chat.complete(
            messages = [
                SystemMessage(system_message),
                UserMessage(prompt),
            ],
            model       = params[model_name]["model"],
            temperature = params[model_name]["temperature"],
            top_p       = params[model_name]["top_p"],
            max_tokens  = params[model_name]["max_tokens"]
        )

        return print(response.choices[0].message.content)
    
    elif model_name in list(params.keys()):

        response = clients["github"].complete(
            messages = [
                SystemMessage(system_message),
                UserMessage(prompt)
                ],
                model       = params[model_name]["model"],
                temperature = params[model_name]["temperature"],
                top_p       = params[model_name]["top_p"],
                max_tokens  = params[model_name]["max_tokens"]
                )
        
        return print(response.choices[0].message.content)


In [46]:
oneshot_llm("mistral-large", "what model are you?")

I am a Large Language Model trained by Mistral AI.


##### Langchain for conversational memory
- https://python.langchain.com/docs/tutorials/chatbot/

In [38]:
model = select_model("google")

prompt_template = ChatPromptTemplate.from_messages(
    [("system", "Answer in 30 words"), 
     MessagesPlaceholder(variable_name = "messages")
     ]
     )

def call_model(state: MessagesState):
    prompt = prompt_template.invoke(state)
    response = model.invoke(prompt)
    return {"messages": [response]}


workflow = StateGraph(state_schema = MessagesState)

workflow.add_edge(START, "model")
workflow.add_node("model", call_model)

memory = MemorySaver()
app = workflow.compile(checkpointer = memory)

config = {"configurable": {"thread_id": "conversation_1"}}

def stream_output(stream):
    for chunk, _ in stream:
        print(chunk.content, end = "", flush = True)

In [None]:
# app.invoke({"messages": [HumanMessage("What is a cat?")]}, config,)["messages"]

[HumanMessage(content='What is a cat?', additional_kwargs={}, response_metadata={}, id='08204268-02f0-41be-81b4-57e8bbd2e71c'),
 AIMessage(content='A small, domesticated carnivorous mammal with soft fur.', additional_kwargs={}, response_metadata={'prompt_feedback': {'block_reason': 0, 'safety_ratings': []}, 'finish_reason': 'STOP', 'model_name': 'gemini-2.0-flash', 'safety_ratings': []}, id='run-68ad8514-47d7-4d74-ba14-5b6e5bdf0d51-0', usage_metadata={'input_tokens': 13, 'output_tokens': 12, 'total_tokens': 25, 'input_token_details': {'cache_read': 0}})]

In [None]:
stream_output(
    app.stream({"messages": [HumanMessage("Do cats have friends?")]}, config, stream_mode = "messages")
    ) # Running this `stream_output(...)` function makes all the difference...

While cats don't form friendships like humans, they can develop strong bonds with other cats, showing affection through grooming, playing, and sharing resources.

In [None]:
app.invoke({"messages": [HumanMessage("Do _you_ have friends?")]}, config,)["messages"]

[HumanMessage(content='Do cats have friends?', additional_kwargs={}, response_metadata={}, id='57c79e80-58ab-44f5-8bcc-31fb551199a5'),
 AIMessage(content='Yes, cats can form friendships with other cats.', additional_kwargs={}, response_metadata={'safety_ratings': [], 'finish_reason': 'STOP', 'model_name': 'gemini-2.0-flash'}, id='run-fc192720-85c9-4809-b5fd-1befaff0c417', usage_metadata={'input_tokens': 13, 'output_tokens': 11, 'total_tokens': 24, 'input_token_details': {'cache_read': 0}}),
 HumanMessage(content='Do _you_ have friends?', additional_kwargs={}, response_metadata={}, id='37aa99f9-dc0c-4e20-90f4-fb79c8dc504f'),
 AIMessage(content="As an AI, I don't have friends.", additional_kwargs={}, response_metadata={'prompt_feedback': {'block_reason': 0, 'safety_ratings': []}, 'finish_reason': 'STOP', 'model_name': 'gemini-2.0-flash', 'safety_ratings': []}, id='run-f646ac99-9fc6-4c9a-9938-164ffb1fbbba-0', usage_metadata={'input_tokens': 30, 'output_tokens': 12, 'total_tokens': 42, 'in

In [144]:
app.get_state(config)[0]["messages"]

[HumanMessage(content='Do cats have friends?', additional_kwargs={}, response_metadata={}, id='57c79e80-58ab-44f5-8bcc-31fb551199a5'),
 AIMessage(content='Yes, cats can form friendships with other cats.', additional_kwargs={}, response_metadata={'safety_ratings': [], 'finish_reason': 'STOP', 'model_name': 'gemini-2.0-flash'}, id='run-fc192720-85c9-4809-b5fd-1befaff0c417', usage_metadata={'input_tokens': 13, 'output_tokens': 11, 'total_tokens': 24, 'input_token_details': {'cache_read': 0}}),
 HumanMessage(content='Do _you_ have friends?', additional_kwargs={}, response_metadata={}, id='37aa99f9-dc0c-4e20-90f4-fb79c8dc504f'),
 AIMessage(content="As an AI, I don't have friends.", additional_kwargs={}, response_metadata={'prompt_feedback': {'block_reason': 0, 'safety_ratings': []}, 'finish_reason': 'STOP', 'model_name': 'gemini-2.0-flash', 'safety_ratings': []}, id='run-f646ac99-9fc6-4c9a-9938-164ffb1fbbba-0', usage_metadata={'input_tokens': 30, 'output_tokens': 12, 'total_tokens': 42, 'in

In [None]:
# Running invoke -> stream -> invoke, or stream -> stream -> ... doesnt commit streamed response to the app's states
# Need to use the stream_output function (something is happening in the backend where Langchain identifies we want to use streaming, and also commits messages to the app's state)

##### Langchain with multimodal input
- https://python.langchain.com/docs/how_to/multimodal_inputs/

- Note: ChatMistralAI does not have multimodal input (e.g. images). 
    - See more [here.](https://python.langchain.com/docs/integrations/chat/)

- Gemini allows URL images
- The rest require the image data


In [186]:
# Fetch image data
model = select_model("google")

prompt_template = ChatPromptTemplate.from_messages(
    [("system", "Answer in 250 words or less. Explain to user in simple terms."), 
     MessagesPlaceholder(variable_name = "messages")
     ]
     )

def call_model(state: MessagesState):
    prompt = prompt_template.invoke(state)
    response = model.invoke(prompt)
    return {"messages": [response]}


workflow = StateGraph(state_schema = MessagesState)

workflow.add_edge(START, "model")
workflow.add_node("model", call_model)

memory = MemorySaver()
app = workflow.compile(checkpointer = memory)

config = {"configurable": {"thread_id": "conversation_1"}}

image_url = "https://scikit-learn.org/1.1/_images/sphx_glr_plot_roc_002.png"
image_data = base64.b64encode(httpx.get(image_url).content).decode("utf-8")

In [None]:
prompt = HumanMessage([
        {
            "type": "text",
            "text": "Explain to me what this plot shows",
        },
        {
            "type": "image",
            "source_type": "base64",
            "data": image_data,
            "mime_type": "image/jpg",
        }
        # {
        #     "type": "image",
        #     "source_type": "url",
        #     "url": image_url,
        # }
        ])

response = app.invoke({"messages": [prompt]}, config)
app.get_state(config)[0]["messages"][-1].content

'This plot is called a Receiver Operating Characteristic (ROC) curve, used to evaluate the performance of a classification model, especially when dealing with multiple classes.\n\n*   **Axes:** The x-axis represents the False Positive Rate (incorrectly predicted positives), and the y-axis represents the True Positive Rate (correctly predicted positives).\n\n*   **Curves:** Each colored line represents the ROC curve for a specific class (0, 1, and 2). The closer the curve is to the top-left corner, the better the model is at distinguishing that class.\n\n*   **Averages:** The plot also shows two average ROC curves:\n    *   **Micro-average:** Calculates the average performance across all classes.\n    *   **Macro-average:** Calculates the average performance for each class individually and then averages those scores.\n\n*   **Area Under the Curve (AUC):** The number in the legend indicates the AUC for each curve. A higher AUC (closer to 1) means better performance.\n\n*   **Dashed Line:

##### Langchain trimmed messages (to be implemented...)

In [60]:
from langchain_core.messages import SystemMessage, trim_messages

trimmer = trim_messages(
    max_tokens = 65,
    strategy = "last",
    token_counter = model,
    include_system = True,
    allow_partial = False,
    start_on = "human",
)

messages = [
    SystemMessage(content = "You're a good assistant"),
    HumanMessage(content = "Hi! I'm Henry"),
    AIMessage(content = "Hi!"),
    HumanMessage(content = "I like vanilla ice cream"),
    AIMessage(content = "Nice"),
    HumanMessage(content = "What's 2 + 2"),
    AIMessage(content = "4"),
    HumanMessage(content = "Thanks"),
    AIMessage(content = "No problem!"),
    HumanMessage(content = "Having fun?"),
    AIMessage(content = "Yes!"),
]

trimmer.invoke(messages)

[SystemMessage(content="You're a good assistant", additional_kwargs={}, response_metadata={}),
 HumanMessage(content="Hi! I'm bob", additional_kwargs={}, response_metadata={}),
 AIMessage(content='Hi!', additional_kwargs={}, response_metadata={}),
 HumanMessage(content='I like vanilla ice cream', additional_kwargs={}, response_metadata={}),
 AIMessage(content='Nice', additional_kwargs={}, response_metadata={}),
 HumanMessage(content="What's 2 + 2", additional_kwargs={}, response_metadata={}),
 AIMessage(content='4', additional_kwargs={}, response_metadata={}),
 HumanMessage(content='Thanks', additional_kwargs={}, response_metadata={}),
 AIMessage(content='No problem!', additional_kwargs={}, response_metadata={}),
 HumanMessage(content='Having fun?', additional_kwargs={}, response_metadata={}),
 AIMessage(content='Yes!', additional_kwargs={}, response_metadata={})]

In [None]:
workflow = StateGraph(state_schema = State)

def call_model(state: State):
    trimmed_messages = trimmer.invoke(state["messages"])
    prompt = prompt_template.invoke(
        {"messages": trimmed_messages, "language": state["language"]}
    )
    response = model.invoke(prompt)
    return {"messages": [response]}


workflow.add_edge(START, "model")
workflow.add_node("model", call_model)

memory = MemorySaver()
app = workflow.compile(checkpointer = memory)