In [1]:
%pip install streamlit llama-index

Collecting streamlit
  Downloading streamlit-1.43.1-py2.py3-none-any.whl.metadata (8.9 kB)
Collecting llama-index
  Downloading llama_index-0.12.23-py3-none-any.whl.metadata (12 kB)
Collecting altair<6,>=4.0 (from streamlit)
  Using cached altair-5.5.0-py3-none-any.whl.metadata (11 kB)
Collecting blinker<2,>=1.0.0 (from streamlit)
  Using cached blinker-1.9.0-py3-none-any.whl.metadata (1.6 kB)
Collecting cachetools<6,>=4.0 (from streamlit)
  Using cached cachetools-5.5.2-py3-none-any.whl.metadata (5.4 kB)
Collecting click<9,>=7.0 (from streamlit)
  Using cached click-8.1.8-py3-none-any.whl.metadata (2.3 kB)
Collecting numpy<3,>=1.23 (from streamlit)
  Using cached numpy-2.2.3-cp312-cp312-win_amd64.whl.metadata (60 kB)
Collecting pandas<3,>=1.4.0 (from streamlit)
  Using cached pandas-2.2.3-cp312-cp312-win_amd64.whl.metadata (19 kB)
Collecting pillow<12,>=7.1.0 (from streamlit)
  Using cached pillow-11.1.0-cp312-cp312-win_amd64.whl.metadata (9.3 kB)
Collecting protobuf<6,>=3.20 (from st

In [3]:
%pip install llama-index-llms-ollama

Collecting llama-index-llms-ollama
  Downloading llama_index_llms_ollama-0.5.3-py3-none-any.whl.metadata (3.8 kB)
Collecting ollama>=0.4.3 (from llama-index-llms-ollama)
  Downloading ollama-0.4.7-py3-none-any.whl.metadata (4.7 kB)
Downloading llama_index_llms_ollama-0.5.3-py3-none-any.whl (7.8 kB)
Downloading ollama-0.4.7-py3-none-any.whl (13 kB)
Installing collected packages: ollama, llama-index-llms-ollama
Successfully installed llama-index-llms-ollama-0.5.3 ollama-0.4.7
Note: you may need to restart the kernel to use updated packages.


In [None]:

from llama_index.core.llms import ChatMessage
from llama_index.llms.ollama import Ollama
import streamlit as st
import logging
import time

logging.basicConfig(level=logging.INFO)

In [None]:
# Initialize chat history in session state
if 'messages' not in st.session_state:
    st.session_state.messages = []

In [8]:
llm = Ollama(model="llama3.3", request_timeout=120.0)

In [None]:
def stream_chat(model, messages):
    try:
        llm = Ollama(model=model, request_timeout=120.0)
        resp = llm.stream_chat(messages)
        response = ""
        response_placeholder = st.empty()
        for r in resp:
            response += r.delta
            response_placeholder.write(response)
        logging.info(f"Model: {model}, Messages: {messages}, Response: {response}")
        return response
    except Exception as e:
        logging.error(f"Error during streaming: {str(e)}")
        raise e
        

Streamlit App Setup

In [None]:
def main():
    st.title("Chat with LLMS Models")
    logging.info("App started")

    model = st.sidebar.selectbox("Choose a model", ["llama3", "phi3", "mistral"]) 
    logging.info(f"Model selected: {model}")

    if prompt:= st.chat_input("Your question"):
        st.session_state.messages.append({"role": "user", "content": prompt})
        logging.info(f"User input: {prompt}")

In [None]:
def main():
    st.title("Chat with LLMs Models")  # Set the title of the Streamlit app
    logging.info("App started")  # Log that the app has started

    # Sidebar for model selection
    model = st.sidebar.selectbox("Choose a model", ["llama3", "phi3", "mistral"])
    logging.info(f"Model selected: {model}")

    # Prompt for user input and save to chat history
    if prompt := st.chat_input("Your question"):
        st.session_state.messages.append({"role": "user", "content": prompt})
        logging.info(f"User input: {prompt}")

        # Display the user's query
        for message in st.session_state.messages:
            with st.chat_message(message["role"]):
                st.write(message["content"])

        # Generate a new response if the last message is not from the assistant
        if st.session_state.messages[-1]["role"] != "assistant":
            with st.chat_message("assistant"):
                start_time = time.time()  # Start timing the response generation
                logging.info("Generating response")

                with st.spinner("Writing..."):
                    try:
                        # Prepare messages for the LLM and stream the response
                        messages = [ChatMessage(role=msg["role"], content=msg["content"]) for msg in st.session_state.messages]
                        response_message = stream_chat(model, messages)
                        duration = time.time() - start_time  # Calculate the duration
                        response_message_with_duration = f"{response_message}\n\nDuration: {duration:.2f} seconds"
                        st.session_state.messages.append({"role": "assistant", "content": response_message_with_duration})
                        st.write(f"Duration: {duration:.2f} seconds")
                        logging.info(f"Response: {response_message}, Duration: {duration:.2f} s")

                    except Exception as e:
                        # Handle errors and display an error message
                        st.session_state.messages.append({"role": "assistant", "content": str(e)})
                        st.error("An error occurred while generating the response.")
                        logging.error(f"Error: {str(e)}")

if __name__ == "__main__":
    main()