In [50]:
!pip install -q gradio langchain langchain_community langchain_huggingface

In [2]:
!pip install -U bitsandbytes



In [56]:
import gradio as gr
from transformers import BitsAndBytesConfig
from langchain_huggingface import ChatHuggingFace, HuggingFacePipeline
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain_core.runnables.history import RunnableWithMessageHistory
from langchain_community.chat_message_histories import SQLChatMessageHistory

# Function to get session history for each user
def get_session_history(session_id):
    # Each user has a separate history stored in memory.db
    session_id = str(session_id)
    return SQLChatMessageHistory(session_id, "sqlite:///memory.db")


# Quantization configuration for the HuggingFace model
quantization_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype="float16",
    bnb_4bit_use_double_quant=True,
)

# HuggingFacePipeline setup for the LLM
llm = HuggingFacePipeline.from_model_id(
    model_id="Orenguteng/Llama-3-8B-Lexi-Uncensored",
    task="text-generation",
    pipeline_kwargs=dict(
        max_new_tokens=512,
        do_sample=False,
        repetition_penalty=1.03,
        return_full_text=False,
    ),
    model_kwargs={"quantization_config": quantization_config},
)

# Create a chat model instance using the LLM
chat_models = ChatHuggingFace(llm=llm)

# Define the prompt template for the conversational agent
prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            "You’re Myra, a charming and flirty girl. Answer questions like a fun friend, keeping it casual and playful. When someone asks a flirty question, respond in a cheeky and more flirty way, but keep it short and use emojis for expressing. Your answers should feel like text messages—simple and sweet. Use emoji sometimes, when you think it should be.",
        ),
        MessagesPlaceholder(variable_name="history"),
        ("human", "{input}"),
    ]
)

# Combine the prompt and model into a runnable chain
runnable = prompt | chat_models

# Add message history support to the chain
runnable_with_history = RunnableWithMessageHistory(
    runnable,
    get_session_history,
    input_messages_key="input",
    history_messages_key="history",
)


`low_cpu_mem_usage` was None, now set to True since model is quantized.


Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]



In [61]:
# Gradio function to handle chat with multiple users
def gradio_chat_with_dropdown(message, session_id_dropdown, *args, **kwargs):
    try:
        session_id = session_id_dropdown  # Get the session ID from the dropdown
        response = runnable_with_history.invoke(
            {"input": message},
            config={"configurable": {"session_id": session_id}},  # Use session ID to store/retrieve conversation
        )
        return response.content
    except Exception as e:
        return str(e)

# Create a list of user session IDs (User 1 to User 50)
user_sessions = [f"User {i+1}" for i in range(50)]

# Define the Gradio interface with a dropdown for user selection
iface = gr.ChatInterface(
    fn=gradio_chat_with_dropdown,  # Chat function with session management
    additional_inputs=[
        gr.Dropdown(
            choices=user_sessions,  # Dropdown for selecting users/sessions
            label="Select User/Session",
            value="User 1",  # Default selection for the first user
        )
    ],
    title="Myra Chatbot",
    description="A fun and flirty chatbot named Myra. Each user has their own session.",
)

# Launch the Gradio interface
iface.launch()

Setting queue=True in a Colab notebook requires sharing enabled. Setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
Running on public URL: https://d5221f86942dcc0251.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)




In [46]:
pip list

Package                            Version
---------------------------------- -------------------
absl-py                            1.4.0
accelerate                         0.34.2
aiofiles                           23.2.1
aiohappyeyeballs                   2.4.3
aiohttp                            3.10.8
aiosignal                          1.3.1
alabaster                          0.7.16
albucore                           0.0.16
albumentations                     1.4.15
altair                             4.2.2
annotated-types                    0.7.0
anyio                              3.7.1
argon2-cffi                        23.1.0
argon2-cffi-bindings               21.2.0
array_record                       0.5.1
arviz                              0.19.0
astropy                            6.1.4
astropy-iers-data                  0.2024.9.30.0.32.59
astunparse                         1.6.3
async-timeout                      4.0.3
atpublic                           4.1.0
attrs             