# LLM Chatbot

In [12]:
# Libraries
## pytorch framework
import torch # installed by --index-url https://download.pytorch.org/whl/cu118 to GPU availability
## AutoModelForCausalLM -> pretrained model
## AutoTokenizer -> tokenizer
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
# Visual
import gradio as gr


In [16]:
print("I will use:", end=" ")
print("GPU.") if torch.cuda.is_available()==True else print("CPU.")

I will use: GPU.


In [None]:
# Load the tokenizer and the model (deepseek)
model_name = "gpt2-medium"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    torch_dtype=torch.float16,
    # trust_remote_code=True, # only for less common modes (always checked before using one of them)
    device_map="auto" # auto if GPU; cpu if CPU
).to("cuda") # to force if device_map="auto" does not work

## Chat loop

In [9]:
# Chat loop
chat_history_ids = None
n_turns = 10
max_new_tokens = 1000
print("Start chatting with the bot (type 'quit' to stop)")

for _ in range(n_turns):
    # User input
    user_input = input(">> User: ")
    print(user_input)
    if user_input.lower() == "quit":
        break

    # Tokenize the input
    new_input_ids = tokenizer.encode(user_input + tokenizer.eos_token, return_tensors='pt').to("cuda")

    # Generate the response (from history if exists)
    bot_input_ids = torch.cat([chat_history_ids, new_input_ids], dim=-1).to("cuda") if chat_history_ids is not None else new_input_ids

    chat_history_ids = model.generate(
        bot_input_ids,
        max_length=max_new_tokens,
        pad_token_id=tokenizer.eos_token_id,
        do_sample=True,
        top_k=50,
        top_p=0.95,
        temperature=0.8
    )

    # Decode response
    response = tokenizer.decode(chat_history_ids[:, bot_input_ids.shape[-1]:][0], skip_special_tokens=True)
    print(f">> Bot: \n {response}")


Start chatting with the bot (type 'quit' to stop)
Tell me something
>> Bot: 
 Pentagon officials said they did not know whether the drone would be used to attack Islamic State in Syria, but they hoped it would be able to deliver accurate strikes against other militants.

U.S. officials told Fox News that they hoped the drone, which has a radar-evading design, would help the U.S. hunt down other terrorists trying to infiltrate into the country through Syria and Iraq.

"We're not in the business of killing people on the ground," said one military official, who asked not to be identified.

The White House also said that it would allow the drone to take off and fly for at least 20 minutes before being put down. The military official added that the U.S. would not send the drone to a combat zone for any mission.

"We're not going to be in Syria or Iraq with a drone," said the military official.

White House spokesman Josh Earnest said the White House was still in the process of discussing th

## Visual

In [21]:
pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)

def respond(message, history):
    prompt = message
    output = pipe(
        prompt, 
        max_new_tokens=max_new_tokens,
        pad_token_id=tokenizer.eos_token_id, 
        do_sample=True, 
        temperature=0.7
    )
    return output[0]["generated_text"][len(prompt):].strip()

chat_ui = gr.ChatInterface(
    fn=respond,
    type="messages",
    textbox=gr.Textbox(placeholder="Ask something...", scale=7),
    title="DeepSeek Chatbot",
    theme="soft"
)

chat_ui.launch()

Device set to use cuda:0


* Running on local URL:  http://127.0.0.1:7867
* To create a public link, set `share=True` in `launch()`.




In [22]:
gr.close_all()