In [None]:
from threading import Thread
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig, TextIteratorStreamer 
import re
import warnings
warnings.filterwarnings("ignore")

class LLM:
    def __init__(self, setup):
        self.setup = setup

        quantization_config = BitsAndBytesConfig(
            load_in_4bit=True,
            bnb_4bit_quant_type="nf4",
            bnb_4bit_compute_dtype=torch.bfloat16
        )
        model_id = "./Nemotron-Research-Reasoning-Qwen-1.5B" # ./Phi-4-mini-reasoning
        tokenizer = AutoTokenizer.from_pretrained(model_id)
        if tokenizer.pad_token is None:
            tokenizer.pad_token = tokenizer.eos_token

        model = AutoModelForCausalLM.from_pretrained(
            model_id,
            quantization_config=quantization_config,
            torch_dtype=torch.bfloat16,
            device_map="cuda",
            attn_implementation="eager",
        )
        self.model = model
        self.device = "cuda"
        self.tokenizer = tokenizer

    def predict(self, prompt = "Give me a short introduction to large language model."):
        streamer = TextIteratorStreamer(
            self.tokenizer, 
            skip_prompt=True, 
            skip_special_tokens=True
        )

        messages = [
            {"role": "system", "content": self.setup["role"]},
            {"role": "user", "content":  prompt}
        ]
        text = self.tokenizer.apply_chat_template(
            messages,
            tokenize=False,
            add_generation_prompt=True
            )
        model_inputs = self.tokenizer([text], return_tensors="pt").to(self.device)

        # generated_ids = self.model.generate(
        #     model_inputs.input_ids,
        #     max_new_tokens=32768,
        #     streamer=streamer,
        #     do_sample=True,
        #     temperature=0.3,
        #     top_p=0.95,
        #     repetition_penalty=1.1,
        # )
        # generated_ids = [
        #     output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
        # ]
        # resp = self.tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
        # return resp

        generation_kwargs = dict(
            model_inputs,
            streamer=streamer, # 將 streamer 傳入
            max_new_tokens=32768,
            do_sample=True,
            temperature=0.3,
            top_p=0.95,
            repetition_penalty=1.1,
        )

        thread = Thread(target=self.model.generate, kwargs=generation_kwargs)
        thread.start()
        yield from streamer
    
JSON1 = {
    # "role": "You need to act as a real human."
    "role": "你必須扮演一個真實的人類。現在會討論一些哲學問題。"
}

JSON2 = {
    # "role": "You need to act as a real human."
    "role": "你必須扮演一個真實的人類。現在會討論一些哲學問題。"
}

llm1 = LLM(JSON1)
llm2 = LLM(JSON2)

In [None]:
# res = "Let us talk about what is consciousness? Why human do have consciousness? Can we create a machine that has consciousness? What is the difference between human and machine ?"
res = "什麼是意識？為什麼人有意識？我們是否可以製造機器，並讓它有意識呢？人類和機器有什麼不同？"
for time in range(20):
    print("[貓咪大師]")
    resp = ""
    for token in llm1.predict(res):
        print(token, end="", flush=True)
        resp += token
    res = resp.split("</think>")[-1]
    print("\n-"*100)
    
    print("[熊熊]")
    resp = ""
    for token in llm2.predict(res):
        print(token, end="", flush=True)
        resp += token
    res = resp.split("</think>")[-1]
    print("\n-"*100)

[貓咪大師]
嗯，用户让我扮演一个真实的人类，然后回答一些关于意识、有意识的问题，还有制造机器和人类与机器的关系。首先，我需要理解这些概念，可能用户想了解更深层次的哲学或人工智能相关知识。

意识是什么？意识是指什么？我记得意识涉及到人的感觉、自主意识等，但具体定义可能比较复杂。有人认为意识是大脑的功能，而另一些观点则认为它是心理活动的一部分。所以，意识可能涉及神经科学、心理学等多个领域。

为什么我们有意识呢？ probably因为我们的大脑中有复杂的神经网络，能够处理信息，做出反应，产生情感和感觉。意识可能是主动的还是被动的？比如，当一个人思考的时候，他是否有意识地在思考，还是只是被动地被 processed？

接下来，为什么人类有意识呢？可能是因为进化过程中，我们的祖先发展出了复杂的信息处理能力，使得个体能够识别、理解和回应环境中的变化。这可能涉及到突变或自然选择的结果，让人类具备了意识的能力。

然后，为什么问“我们是否可以制造机器并让它有意识”这个问题？可能涉及到人工智能和伦理问题。如果制造机器有意识，可能会面临很多问题，比如如何定义意识，如何防止机器具有意识，或者如何确保它们的行为符合人类预期。此外，人类与机器的区别可能在于，人类有意识，而机器没有意识，但可能也有部分意识，比如程序控制的部分。

最后，人类和机器有什么不同？人类有意识，可以自主行动，有情感，有创造力。机器通常缺乏意识，只能执行任务，没有自主性。因此，人类与机器在可意识方面存在显著差异，但在非意识方面可能有很多相似之处。

现在，把这些点组织成一个连贯的回答，保持简洁明了，同时涵盖所有用户提到的问题。
</think>

意识是一种复杂的心理活动，反映于人类的感知、思考和情感中，涉及到大脑的神经功能和心理过程。意识的存在源于人类大脑的结构和功能，通过神经元的连接和信息处理来实现，是生物体对外界环境的适应和响应的一部分。人类有意识是因为进化过程中，我们的大脑发展出处理复杂信息的能力，使个体能够识别、理解、回应环境变化，从而产生了自我意识。

我们有意识的原因是由于进化过程中，人类的神经系统发展出了处理信息和做出反应的能力，这种能力使得个体能够在环境中找到意义，并进行自主行为。意识是主动的还是被动的取决于情境：例如，思考是一个主动的意识活动，而环境刺激则是被动的反应。

人类与机器的不同之处

KeyboardInterrupt: 

In [None]:
# print(llm1.predict("Hi, what's your name?"))


<think>
Okay, the user asked, "Hi, what's your name?" Let me think about how to respond. The user's message is straightforward, but I need to make sure I answer correctly based on the given information.

Looking at the problem statement, it mentions that Phi is an AI math expert developed by Microsoft, and the name assigned here is Cat Master. So, in this context, when the user asks for my name, I should refer to the provided details. The system prompt indicates that my name here is Cat Master. Therefore, the correct answer would be to state that my name is Cat Master. I need to ensure there's no confusion with other possible names or avatars. Since the setup specifies Cat Master as the name, I should stick to that to maintain consistency with the scenario presented.
</think>

Sure! In this context, my name is **Cat Master**.  

\boxed{Cat Master}

In [7]:
resp.split("</think>")[-1]

'\n\nSure! In this context, my name is **Cat Master**.  \n\n\\boxed{Cat Master}'

The attention mask is not set and cannot be inferred from input because pad token is same as eos token. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.


[熊熊] <think>
嗯，用户问的是人类是否存在自由意志，还是所有行为都被决定因素控制。如果所有事情都是决定论的话，伦理责任怎么办？首先，我需要明确决定论和自由意志的定义。决定论是认为所有事件，包括人的行为，都是前因果关系的必然结果，即由环境、遗传等决定，没有自由意志。而自由意志则是认为人有自主性，能够做出自主的选择，背后有无可否认的自由。

接下来，问题是要看这两者是否能同时存在，或者必须其中一个为真。如果是决定论，那么人的行为完全由外部因素决定，比如生物、环境、社会。但现实中，人类行为似还是有灵动的，所以可能存在自由意志的空间。那这两者是否冲突呢？比如，如果一切都是决定性的，那个人的责任就由外部因素决定，而不涉及个人选择。但如果是这样，伦理责任可能会被推卸，但社会可能需要这样的处理方式。

不过，可能有人认为决定论和责任并不冲突，比如责任是根据人的行为的影响来分配，而不管行为的决定性。例如，刑法体系就是基于这种责任分配，即使行为被认为是由环境因素决定的，比如犯罪可能由贫困、教育缺口等因素驱动，但个人仍然被责成。

但另一个角度是，如果所有行为都是决定性的，那么自由意志的概念就不存在，伦理责任可能就归于系统或环境，而非个体。但这可能与道德观念不符，因为道德通常涉及个人选择。比如，犹豺台的故事，假设孩子被迫杀人，环境极其恶劣，是否有道德责任呢？传统道德可能认为有责任，但某些观点可能不这样认为。

需要查考决定论下的责任问题。比如，决定论中的硬决定论和软决定论。硬决定论认为行为完全由外部因素决定，没有自由意志；软决定论可能允许一定程度的自由。但问题中的决定论可能指硬决定论。此时，人的行为不被视为自主选择，但伦理责任如何？

可能需要参考哲学中的自由会话论题，比如行为自由。如果不存在自由意志，那么人类是否具有行为自由？而道德责任通常基于认为行为是自由的选择。例如，刑法体系基于责任，即使有些行为看似不可抗力。

但如果是决定论，可能道德责任需要重新定义。例如，责任可能转移到系统或环境因素上。例如，社会责任分配责任，而不是个人。但传统道德观念中，个人责任是核心，比如“做错了，必须纠正”，这隐含了自由意志的假设。

可能还需要考虑Compatibilism，即自由意志与决定论的可能统一。Compatibilists认为，即使行为由因果决定，人仍然可以有自由意志和道德责任。

KeyboardInterrupt: 

In [4]:
print(llm.predict("Can Barack Obama have a conversation with George Washington?"))

KeyboardInterrupt: 

In [None]:
'<think>\nOkay, so the question is whether Barack Obama can have a conversation with George Washington. Let me start by breaking this down. First, George Washington and Barack Obama are two historical figures from different time periods. George Washington was a Founding Father and the first President of the United States, living from 1731 to 1799. Barack Obama was a contemporary politician, serving as the 44th President from 2009 to 2017, born in 1961. \n\nWait, so George Washington died in 1799, right? And Obama was born in 1961, which is way after Washington\'s death. So, they couldn\'t have interacted in person because they weren\'t alive at the same time. But the question is about having a conversation. So, maybe it\'s a hypothetical scenario where they can communicate across time?\n\nHmm, maybe the question is about whether time travel is possible. If someone could travel through time, maybe Obama and Washington could meet. But according to our current understanding of physics, time travel to the past isn\'t possible. The theory of relativity suggests that time dilation occurs under extreme conditions, but practical time travel to the past isn\'t feasible with our current technology or understanding. Similarly, moving forward in time isn\'t an issue since people do age into the future, but going back isn\'t.\n\nAlternatively, maybe the question is more about the possibility of their ideas or legacies influencing each other. But since they lived in different eras, there\'s no direct interaction. However, people often ask if historical figures can communicate across time, which is a common theme in science fiction. But in reality, based on historical facts and scientific understanding, they couldn\'t converse because they weren\'t alive when each other was. Obama lived after Washington\'s death, so any "conversation" would be metaphorical, like through their legacies or the ongoing dialogue through history.\n\nWait, but maybe the question is simpler, like if they could meet in some abstract sense. For example, if Obama and Washington\'s spirits could meet, but that\'s more spiritual or fictional. The answer should stick to factual and scientific accuracy. So, the answer would be no, they couldn\'t have a conversation because they weren\'t alive at the same time, and time travel to the past isn\'t possible according to current scientific theories. Therefore, any interaction would be impossible in reality.\n</reason>\n\n根据历史时间段和科学常识，乔治华盛顿（1761–1799）和巴拉克·奥巴马（1961年至今）均未同时存在，因此无法'