In [3]:
from llama_cpp import Llama
import os

openhermes_path = r"C:\Users\DaysPC\Documents\Datasets\Transformers\openhermes-2.5-mistral-7b.Q4_K_M.gguf"
deepseek_path = r"C:\Users\DaysPC\Documents\Datasets\Transformers\deepseek-coder-6.7b-instruct-q4_k_m.gguf"

HISTORY_DIR = r"D:\\Jupytor\\Wraps\\Tessa\\History"
os.makedirs(HISTORY_DIR, exist_ok=True)


In [None]:
OpenHermes = Llama(
    model_path=openhermes_path,
    n_gpu_layers=20,
    n_ctx=2048,
    n_batch=256,
    n_threads=6,
    use_mlock=True,
    verbose=True
)

In [None]:
DeepSeekCode = Llama(
    model_path=deepseek_path,
    n_gpu_layers=20,
    n_ctx=2048,
    n_batch=256,
    n_threads=6,
    use_mlock=True,
    verbose=True
)

In [6]:
def format_prompt(session_history, current_input, for_model="OpenHermes"):
    prompt = ""

    if for_model == "OpenHermes":
        
        for user, assistant, model_name in session_history:
            if model_name == "OpenHermes":
                prompt += f"<|user|>\n{user}\n<|assistant|>\n{assistant}\n"
        prompt += f"<|user|>\n{current_input}\n<|assistant|>\n"

    elif for_model == "DeepSeekCode":
        
        for user, assistant, model_name in session_history:
            if model_name == "DeepSeekCode":
                prompt += f"User: {user}\nAssistant: {assistant}\n"
        prompt += f"User: {current_input}\nAssistant:"

    else:
        
        for user, assistant, model_name in session_history:
            prompt += f"{user}\n{assistant}\n"
        prompt += f"{current_input}\n"

    return prompt


In [7]:
def chat(user_input, session_history, coding_mode):
    model = DeepSeekCode if coding_mode else OpenHermes
    model_name = "DeepSeekCode" if coding_mode else "OpenHermes"

    
    for_model = "OpenHermes" if not coding_mode else "DeepSeekCode"
    prompt = format_prompt(session_history, user_input, for_model=for_model)

    
    prompt_token_est = len(prompt.split())  
    max_total_tokens = 2048
    generation_tokens = max_total_tokens - prompt_token_est

    
    if not coding_mode:
        
        generation_tokens = min(generation_tokens, 512)
    else:
        
        generation_tokens = max(256, min(generation_tokens, 1536))

    
    output = model(prompt, max_tokens=generation_tokens, stop=None if coding_mode else ["<|user|>", "<|assistant|>"])
    response = output["choices"][0]["text"].strip()

    session_history.append((user_input, response, model_name))
    return session_history, gr.update(value="")


In [8]:
def save_chat(history):
    if not history:
        return
    filename = os.path.join(HISTORY_DIR, f"chat_{len(os.listdir(HISTORY_DIR)) + 1}.json")
    with open(filename, "w", encoding="utf-8") as f:
        json.dump(history, f, indent=2)

def load_chat(file):
    if file is None:
        return []
    with open(file.name, "r", encoding="utf-8") as f:
        data = json.load(f)
    return data

In [9]:
import gradio as gr

css = """
/* Clear background for chat */
#chatbot .message.user {
    background-color: transparent !important;
    color: #d1d5db;
    font-weight: 500;
}
#chatbot .message.bot {
    background-color: transparent !important;
    color: #ffffff;
}

/* Input styling */
#input-row {
    display: flex;
    margin-top: 10px;
}
#textbox {
    flex-grow: 1;
    background-color: #1f1f1f;
    color: #eee;
}

/* Button styling */
#save-btn, #open-btn {
    height: 60px !important;
    font-size: 16px;
    padding: 10px 16px;
}

/* Send Button Blue */
#send-btn {
    background-color: #007BFF !important;
    color: white !important;
    border: none !important;
}
#send-btn:hover {
    background-color: #0056b3 !important;
}

/* Remove chatbot box style */
#chatbot {
    background: transparent !important;
    box-shadow: none !important;
}
"""


def chat(user_input, session_history, coding_mode):
    model = DeepSeekCode if coding_mode else OpenHermes
    model_name = "DeepSeekCode" if coding_mode else "OpenHermes"
    for_model = "OpenHermes" if not coding_mode else "DeepSeekCode"
    prompt = format_prompt(session_history, user_input, for_model=for_model)

    prompt_token_est = len(prompt.split())
    max_total_tokens = 2048
    generation_tokens = (
        min(max_total_tokens - prompt_token_est, 512)
        if not coding_mode else max(256, min(max_total_tokens - prompt_token_est, 1536))
    )

    output = model(prompt, max_tokens=generation_tokens, stop=None if coding_mode else ["<|user|>", "<|assistant|>"])
    response = output["choices"][0]["text"].strip()

    
    session_history.append((user_input, response, model_name))

    
    chatbot_display = [[u, a] for u, a, _ in session_history]

    return chatbot_display, gr.update(value="")


def load_chat(file):
    if file is None:
        return [], []
    with open(file.name, "r", encoding="utf-8") as f:
        data = json.load(f)  
    chatbot_display = [[u, a] for u, a, _ in data]  # strip model_name
    return data, chatbot_display  


def save_chat(history):
    if not history:
        return
    filename = os.path.join(HISTORY_DIR, f"chat_{len(os.listdir(HISTORY_DIR)) + 1}.json")
    with open(filename, "w", encoding="utf-8") as f:
        json.dump(history, f, indent=2)


with gr.Blocks(css=css) as demo:
    with gr.Row():
        gr.Markdown("<h2 style='color:#007BFF;'>💬 OpenHermes Chat (GTX 1650m)</h2>")
        with gr.Row():
            open_btn = gr.Button("📂 Open Chat", elem_id="open-btn", scale=1)
            save_btn = gr.Button("💾 Save Chat", elem_id="save-btn", scale=1)

    chatbot = gr.Chatbot(elem_id="chatbot", height=400)
    state = gr.State([])
    coding_mode = gr.State(False)

    with gr.Row(elem_id="input-row"):
        txt = gr.Textbox(placeholder="Type here...", show_label=False, container=False, elem_id="textbox")
        send = gr.Button("Send", elem_id="send-btn")

    
    coding_btn = gr.Button("💻 Coding Mode: OFF", elem_id="send-btn")  

    def toggle_coding(current):
        new_mode = not current
        label = "💻 Coding Mode: ON" if new_mode else "💻 Coding Mode: OFF"
        return new_mode, gr.update(value=label)

    coding_btn.click(fn=toggle_coding, inputs=coding_mode, outputs=[coding_mode, coding_btn])

    hidden_file = gr.File(visible=False, file_types=[".json"])

    
    hidden_file.change(fn=load_chat, inputs=hidden_file, outputs=[state, chatbot])
    save_btn.click(fn=save_chat, inputs=state, outputs=[])

    
    txt.submit(fn=chat, inputs=[txt, state, coding_mode], outputs=[chatbot, txt])
    send.click(fn=chat, inputs=[txt, state, coding_mode], outputs=[chatbot, txt])
    state.change(fn=lambda h: [[u, a] for u, a, _ in h], inputs=state, outputs=chatbot)


    
    open_btn.click(None, None, None, js="() => document.querySelector('input[type=file]').click()")

demo.launch()


  chatbot = gr.Chatbot(elem_id="chatbot", height=400)


* Running on local URL:  http://127.0.0.1:7860
* To create a public link, set `share=True` in `launch()`.


