### current directory: /home/lzc/mindspore/ChatBabel.ipynb

In [1]:
model_path = '/data1/model/bge1_5-large-zh'
llm_path = '/data1/model/qwen1_5-7b-chat'

In [2]:
!nvidia-smi

Tue Jun  4 06:59:07 2024       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 550.54.15              Driver Version: 550.54.15      CUDA Version: 12.4     |
|-----------------------------------------+------------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|   0  NVIDIA RTX 6000 Ada Gene...    Off |   00000000:3B:00.0 Off |                  Off |
| 30%   30C    P8             26W /  300W |   38975MiB /  49140MiB |      0%      Default |
|                                         |                        |                  N/A |
+-----------------------------------------+------------------------+----------------------+
|   1  NVIDIA RTX 6000 Ada Gene...    Off |   00

## Preparing papers
1. Locate the zip file that contains the papers and unzip them into the `./data` repository.
2. Manually create a .bib file that contains all the metadata for the papers and store them in `./bib_data`

In [3]:
# %%capture captured_output
# !unzip papers_condensed.zip -d ./data

In [4]:
class DotDict(dict):
    def __getattr__(self, key):
        try:
            return self[key]
        except KeyError:
            raise AttributeError(f"'DotDict' object has no attribute '{key}'")

    def __setattr__(self, key, value):
        self[key] = value

# 1. Preparation for PDF loader

In [5]:
from multitask_classifier import *
from utils import *
from utils_rag import *
from tokenizer import BertTokenizer

tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

config = {'hidden_dropout_prob': 0.3,
			'num_labels': {3: 0, 4: 1, 2: 2, 1: 3, 0: 4},
			'hidden_size': 768,
			'data_dir': '.',
			'option': 'finetune'}

config = SimpleNamespace(**config)

argpath = './finetune-5-1e-05-multitask-final-2.pt'

if torch.cuda.is_available():
    device = torch.device('cuda:2')
else:
    device = torch.device('cpu')

saved = torch.load(argpath)

bert_model = MultitaskBERT(config)
add_lora_layers(bert_model)
for name, param in bert_model.named_parameters():
	if "lora" not in name and "classifier" not in name and "bias" not in name:
		param.requires_grad = False
	else:
		param.requires_grad = True
bert_model.load_state_dict(saved['model'])
bert_model = bert_model.to(device)

https://hf-mirror.com/bert-base-uncased/resolve/main/vocab.txt


In [6]:
sentence1 = "gradient-based/evolutionary relay hybrid for computing pareto front approximations maximizing the s-metric"
sentence2 = """
muiltiobj ective optimization using nondominated sorting in genetic algorithms
"""

print(detect_paraphrase(bert_model, device, tokenizer, sentence1, sentence2))
print(detect_similarity(bert_model, device, tokenizer, sentence1, sentence2))

0.0
0.9694808125495911


In [7]:
### Preprocess pdf documents
import pdfplumber
import pdftotext
from langchain_community.document_loaders import TextLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain.schema.document import Document
import importlib
from utils_rag import *
import os
import glob

# modified from https://stackoverflow.com/questions/77045559/langchain-load-with-string
def get_text_chunks_langchain(text, title, author):
    """ Turns raw string into docs that conform with docs = loader.load()"""
    text_splitter = CharacterTextSplitter(
        separator="\n",
        chunk_size=1000,
        chunk_overlap=50,
        length_function=len
    )
    docs = [Document(page_content=x, metadata={"title":title, "author":author}) for x in text_splitter.split_text(text)]
    return docs

def load_pdf(filepath, bib_file, model, device, tokenizer):
    """ From a pdf, return a docs"""
    text = ""
    match = get_title_author_from_pdf(filepath, bib_file, model, device, tokenizer)[0]
    print(match)
    title, author = match
    with open(filepath, 'rb') as f:
        pdf = pdftotext.PDF(f)
        for page in pdf:
            text += page
    return get_text_chunks_langchain(text, title, author)

# idea: train on malformed titles from bibliography to enhance similarity detection

""" Unit test """
# pdf_files = ["./GA_papers/CMA_ES.pdf", "./GA_papers/SBX.pdf", './GA_papers/HypE.pdf', './GA_papers/SPEA2.pdf', './GA_papers/NSGA.pdf']
directory = './GA_papers/'
pattern = '*.pdf'
pdf_files = glob.glob(os.path.join(directory, pattern))

documents = []
bib_file = "./bib_data/paper_metadata_full.bib"

for pdf_file in pdf_files:
    docs = load_pdf(pdf_file, bib_file, bert_model, device, tokenizer)
    documents += docs

print('\n')
for pdf_file in pdf_files:
    if pdf_file == "./GA_papers/SPEA2.pdf":
        print(pdf_file)
        docs = load_pdf(pdf_file, bib_file, bert_model, device, tokenizer)

('A Survey on Evolutionary Computation for Computer Vision and Image Analysis: Past, Present, and Future Trends', 'Bi, Ying')
('Modified Distance Calculation in Generational Distance and Inverted Generational Distance', 'Gaspar-Cunha, António')
('Gradient-Based/Evolutionary Relay Hybrid for Computing Pareto Front Approximations Maximizing the S-Metric', 'Bartz-Beielstein, Thomas')
('The Pareto archived evolution strategy: a new baseline algorithm for Pareto multiobjective optimisation', 'Knowles, J.')
('Enhanced decomposition-based hybrid evolutionary and gradient-based algorithm for many-objective optimization', 'Mohammad Zadeh, Parviz')
('Computing Hypervolume Contributions in Low Dimensions: Asymptotically Optimal Algorithm and Complexity Results', 'Takahashi, Ricardo H. C.')
('A Scalable Multi-objective Test Problem Toolkit', 'Coello Coello, Carlos A.')
('Enhanced decomposition-based hybrid evolutionary and gradient-based algorithm for many-objective optimization', 'Mohammad Zadeh,

# 2. Preparation for Retriever

In [8]:
# !pip install transformers torch ipywidgets

In [9]:
### Load model from local files
from transformers import AutoModel, AutoTokenizer

model_path = '/data1/model/bge1_5-large-zh'
llm_path = '/data1/model/qwen1_5-7b-chat'

model = AutoModel.from_pretrained(model_path)
tokenizer = AutoTokenizer.from_pretrained(model_path)

In [10]:
# !pip install sentence_transformers chromadb

In [11]:
# !pip install -U langchain

In [12]:
### Embed documents into vectordb
from langchain.vectorstores import Chroma, FAISS
from langchain.embeddings import HuggingFaceEmbeddings, HuggingFaceBgeEmbeddings
from sentence_transformers import SentenceTransformer

model_path = '/data1/model/bge1_5-large-zh'
# embeddings = SentenceTransformer(model_name_or_path=model_path, local_files_only=True)

# current directory: /home/lzc/mindspore/ChatBabel.ipynb
# embeddings = HuggingFaceBgeEmbeddings(model_name='BAAI/bge-large-zh-v1.5', cache_folder=model_path)

embeddings = HuggingFaceBgeEmbeddings(model_name=model_path)#, cache_folder=model_path)

vector_store = Chroma(embedding_function=embeddings)
vector_store.add_documents(documents)
retriever = vector_store.as_retriever()

""" Unit test """
query = "A crossover operator in the continuous space."
retrieved_docs = retriever.invoke(query)
for doc in retrieved_docs:
    print(doc.metadata)
    print(doc.page_content[:250])

{'author': 'Bartz-Beielstein, Thomas', 'title': 'Gradient-Based/Evolutionary Relay Hybrid for Computing Pareto Front Approximations Maximizing the S-Metric'}
1995, pp. 1556–1561.
[22] D. Van Veldhuizen, “Multiobjective evolutionary algorithms: Classifications, analyzes, and new innovations,” Air Force Inst. Technol., Dayton,
OH, Tech. Rep. AFIT/DS/ENG/99-01, 1999.
[23] D. Van Veldhuizen and G. Lamont, “Mu
{'author': 'Mohammad Zadeh, Parviz', 'title': 'Enhanced decomposition-based hybrid evolutionary and gradient-based algorithm for many-objective optimization'}
[235] H. Esbensen, E.S. Kuh, Design space exploration using the genetic algorithm,
in: IEEE Symposium on Circuits and Systems, ISCAS 1996, vol. 4, 1996, pp.
500–503.
[236] P. Czyzak, A. Jaszkiewicz, Pareto simulated annealing—a metaheuristic for
mult
{'author': 'Zitzler, Eckart', 'title': 'Comparison of Multiobjective Evolutionary Algorithms: Empirical Results'}
Springer, Berlin, Germany.
Zitzler, E. and Thiele, L. (1999). Multi

In [13]:
""" Unit test """
query = "A crossover operator in the continuous space."
retrieved_docs = retriever.invoke(query)
for doc in retrieved_docs:
    print(doc.metadata)
    print(doc.page_content[:250])

{'author': 'Bartz-Beielstein, Thomas', 'title': 'Gradient-Based/Evolutionary Relay Hybrid for Computing Pareto Front Approximations Maximizing the S-Metric'}
1995, pp. 1556–1561.
[22] D. Van Veldhuizen, “Multiobjective evolutionary algorithms: Classifications, analyzes, and new innovations,” Air Force Inst. Technol., Dayton,
OH, Tech. Rep. AFIT/DS/ENG/99-01, 1999.
[23] D. Van Veldhuizen and G. Lamont, “Mu
{'author': 'Mohammad Zadeh, Parviz', 'title': 'Enhanced decomposition-based hybrid evolutionary and gradient-based algorithm for many-objective optimization'}
[235] H. Esbensen, E.S. Kuh, Design space exploration using the genetic algorithm,
in: IEEE Symposium on Circuits and Systems, ISCAS 1996, vol. 4, 1996, pp.
500–503.
[236] P. Czyzak, A. Jaszkiewicz, Pareto simulated annealing—a metaheuristic for
mult
{'author': 'Zitzler, Eckart', 'title': 'Comparison of Multiobjective Evolutionary Algorithms: Empirical Results'}
Springer, Berlin, Germany.
Zitzler, E. and Thiele, L. (1999). Multi

# 3. Preparation for LLM module

In [14]:
import mindspore as ms
from mindnlp.transformers import AutoModelForCausalLM, AutoTokenizer

ms.context.set_context(device_target='GPU', device_id=2)
llm_path = '/data1/model/qwen1_5-7b-chat'
model = AutoModelForCausalLM.from_pretrained(llm_path)
model.set_train(False)

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Av

Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

MindSpore do not support bfloat16 dtype, we will automaticlly convert to float16


Qwen2ForCausalLM<
  (model): Qwen2Model<
    (embed_tokens): Embedding<vocab_size=151936, embedding_size=4096, use_one_hot=False, weight=Parameter (Tensor(shape=[151936, 4096], dtype=Float16, value=[...], name=model.embed_tokens.weight), requires_grad=True), dtype=Float32, padding_idx=None>
    (layers): CellList<
      (0): Qwen2DecoderLayer<
        (self_attn): Qwen2Attention<
          (q_proj): Dense<input_channels=4096, output_channels=4096, has_bias=True>
          (k_proj): Dense<input_channels=4096, output_channels=4096, has_bias=True>
          (v_proj): Dense<input_channels=4096, output_channels=4096, has_bias=True>
          (o_proj): Dense<input_channels=4096, output_channels=4096>
          (rotary_emb): Qwen2RotaryEmbedding<>
          >
        (mlp): Qwen2MLP<
          (gate_proj): Dense<input_channels=4096, output_channels=11008>
          (up_proj): Dense<input_channels=4096, output_channels=11008>
          (down_proj): Dense<input_channels=11008, output_channels=4

In [15]:
tokenizer = AutoTokenizer.from_pretrained(llm_path)

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


# 4. Asking ChatBabel questions about research.

In [16]:
from mindspore import Tensor
from mindspore import context
from mindnlp.transformers import TextIteratorStreamer
from threading import Thread
import json

def stream_generate_answer(
    input_ids,
    tokenizer,
    model,
    max_new_tokens=300,
    temperature=0.7,
    repetition_penalty=1.0,
    context_len=2048
):
    streamer = TextIteratorStreamer(tokenizer, timeout=60.0, skip_prompt=True, skip_special_tokens=True)
    max_src_len = context_len - max_new_tokens - 8
    input_ids = input_ids[-max_src_len:]
    
    input_ids = Tensor(input_ids)
    
    generation_kwargs = dict(
        input_ids=input_ids,
        max_new_tokens=max_new_tokens,
        temperature=temperature,
        do_sample=True,
        repetition_penalty=repetition_penalty,
        streamer=streamer,
    )
    thread = Thread(target=model.generate, kwargs=generation_kwargs)
    thread.start()

    yield from streamer

def answer(prompt):
	context_str = ""
	retrieved_docs = retriever.get_relevant_documents(prompt)
	for doc in retrieved_docs:
		context_str += json.dumps(doc.metadata)
		context_str += '\n'
		context_str += doc.page_content[:100]
		context_str += '\n'

	PROMPT_TEMPLATE = """基于以下已知信息，简洁和专业的告知用户他们的研究想法是否出现在已知信息的文献的实际内容中。
	请提供相关条目的标题以及作者，不允许在答案中添加编造成分，答案请使用中文。

	已知信息：
	{context}

	请仔细思考并回答。
	""".format(context=context_str)

	messages = [
		{"role": "system", "content": PROMPT_TEMPLATE},
		{"role": "user", "content": prompt}
	]

	input_ids = tokenizer.apply_chat_template(
		conversation=messages,
		tokenize=True,
		add_generation_prompt=True,
		return_tensors='ms'
	)

	response = ""
	for new_text in stream_generate_answer(input_ids, tokenizer, model):
		response += new_text
	response = response.strip()


	unique_metadata = []
	for retrieved_doc in retrieved_docs:
		metadata = retrieved_doc.metadata
		if metadata['author'] in ('None', 'Unknown author') or metadata['title'] in ('None', 'Unknown title'):
			pass
		elif metadata not in unique_metadata:
			unique_metadata.append(metadata)

	references = "\n参考资料：\n"
	for item in unique_metadata:
		references += json.dumps(item)
		references += '\n'

	return response, references

# prompt = "I have a new idea! For a LLM, it's almost impossible to pre-train from scratch: too costly. A solution is to freeze all the parameters, and create a new low-rank estimation of the original weights and train those weights with reduced parameters. What do you think?"
prompt = """
I have a new idea! for evolutionary algorithms, we usually perform the crossover operation on discrete strings. 
but we could study the probability distribution of the variation operator and mathematically model them in a continuous space to
perform a real-valued crossover operation. what do you think of this idea?
"""

# awesome answer.

response, references = answer(prompt)
print(response)
print(references)


  warn_deprecated(


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

您的研究想法与Deb在"Simulated Binary Crossover for Continuous Search Space"（[12]）中探讨的内容相似。他们确实提出了使用模拟二进制交叉（Simulated Binary Crossover, SBC）来处理连续搜索GCC（即连续变量），这是一种通过模拟二进制编码的概念，实现在连续空间中进行交叉操作的方法。这种方法通过映射问题变量并避开二进制编码中的哈密顿悬崖问题来提高交叉的灵活性。

您提出的在实际操作中根据概率分布建模连续空间的变异操作，类似于SBC中的连续版本，是一种合理的演化算法改进。理论上，这样的设计可以减少二进制编码带来的限制，并且理论上分析了稀疏性的影响。因此，您的想法是可行且在已知文献中有所探讨的，但需要具体研究来验证其在实际优化问题中的效果。

参考资料：
{"author": "Deb, Kalyanmoy", "title": "Simulated Binary Crossover for Continuous Search Space"}
{"author": "Mohammad Zadeh, Parviz", "title": "Enhanced decomposition-based hybrid evolutionary and gradient-based algorithm for many-objective optimization"}



In [17]:
prompt = """I have a new idea! For a LLM, it's almost impossible to pre-train from scratch: too costly. 
A solution is to freeze all the parameters, and create a new low-rank estimation of the original weights and train those weights with reduced parameters. 
What do you think?"""

# No entry in database related to LoRA.

response, references = answer(prompt)
print(response)
print(references)

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

您的研究想法在已知文献中没有直接提及。"Pre-training from scratch"在大型语言模型（LLM）的训练中是一个常见的策略，尤其是在Transformer架构中，通过大量数据进行初始.","然而，您提到的"freeze all parameters"然后"create a new low-rank estimation and train with reduced parameters"这个方法更接近于模型微调或者模型压缩的领域，而非从头开始的预训练。这与Ishibuchi在"Performance of Decomposition-Based Many-Objective Algorithms"中讨论的可能面对的"fitness evaluation mechanisms not always suitable for many-objective optimization"（在多目标优化中的适应性问题）或者Bartz-Beielstein在"Gradient-Based/Evolutionary Relay Hybrid"中提到的模型优化技术有相似之处，但具体是否可行，需要结合实际的数学模型和语言模型的学习原理来分析。

如果您的解决方案是针对如何在成本有限的情况下优化模型训练，可能需要结合更具体的上下文或算法理论来讨论，这可能更符合Deb的"Niched-Penalty Approach for Constraint Handling"中处理约束和资源有限问题的方法论。但 advocates for this exact technique in the context of LLM pre-training aren't covered in these references.

综上，您的想法可能在某些特定的模型优化或资源管理的场景下有其合理性，但要确定是否与现有文献中的研究一致，需要进行详细的文献调研或实验

参考资料：
{"author": "Ishibuchi, Hisao", "title": "Performance of Decomposition-Based Many-Objective Algorithms Strongly Depends on Pareto Front Shapes"}
{"author": "Bart

In [18]:
prompt = "I would like to create a new genetic algorithm."

# Wrong usage, but acceptable answer.

response, references = answer(prompt)
print(response)
print(references)

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

您的研究想法是否出现在已知信息的文献中？

1. Zitzler, Eckart的"Comparison of Multiobjective Evolutionary Algorithms: Empirical Results"（在"Cybernetics"杂志，1999年，第28卷，第1期，38-֪ͨ47页）中提到了对比多目标进化算法的实验结果，但并没有直接提及创建新遗传算法的内容。如果您想比较或改进现有遗传算法，这可能是一个参考点。

2. Fourman, M. P.（1985年）的论文讨论了遗传算法用于布局压缩，虽然没有直接关于新遗传算法的创造，但可以启发对遗传算法应用的创新思考。

3. Bartz-Beielstein, Thomas的两篇论文，"Gradient-Based/Evolutionary Relay Hybrid for Computing Pareto Front Approximations Maximizing the S-Metric"，分别在2000年和2001年发表，涉及使用某种混合算法来优化Pareto前沿，这可能为新算法设计提供了理论基础。如果您的新算法是这种混合进化策略的变体，那么它可能与这些研究有 Katie（K.）的工作相关。

4. 如果您在设计新遗传算法的过程中，使用了类似进化或梯度的方法，或者目标是最大化某种评价指标（如S-Metric），那么这些论文可能会提供灵感。但要确认

参考资料：
{"author": "Zitzler, Eckart", "title": "Comparison of Multiobjective Evolutionary Algorithms: Empirical Results"}
{"author": "Bartz-Beielstein, Thomas", "title": "Gradient-Based/Evolutionary Relay Hybrid for Computing Pareto Front Approximations Maximizing the S-Metric"}



In [19]:
prompt = """I have come up with a new genetic algorithm that utilizes the concept of Pareto dominance. 
What if we store all the best solutions so far in an archive, and replace the worst solutions in the current population with the best solutions in that archive?
Then, each generation will only preserve the genes from the best individuals, and the solution is surely guaranteed to improve!
"""

# PAES is actually a very good catch: I didn't remember what this algorithm was when I asked this question.

response, references = answer(prompt)
print(response)
print(references)

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

您的研究想法在已知信息中有所体现。作者Knowles在论文"The Pareto archived evolution strategy: a new baseline algorithm for Pareto multiobjective optimisation"中提到了一种名为"Pareto Archived Evolution Strategy (PAES)"的算法，它利用了类似的思想，即保留并更新最优解以改进多目标优化。您的描述类似于PAES中的"replacement mechanism"，即用archive中的最佳解决方案替换当前种群中的较差个体。这种策略有助于确保解决方案的改进，因为它保留了种群中的最优基因。因此，您的研究想法是PAES算法的一种变体或相关概念，已经在文献中被讨论过。

参考资料：
{"author": "Knowles, J.", "title": "The Pareto archived evolution strategy: a new baseline algorithm for Pareto multiobjective optimisation"}
{"author": "Bartz-Beielstein, Thomas", "title": "Gradient-Based/Evolutionary Relay Hybrid for Computing Pareto Front Approximations Maximizing the S-Metric"}



In [20]:
prompt = """Are you familiar with the concept of hypervolume? Since it is a metric (therefore scalar), 
can we take its derivative with respect to the decision variables? Then, we can nudge the solutions toward better hypervolume, therefore guaranteeing convergence.
"""

# HypE is fine, but I was thinking of HIGA-MO.

response, references = answer(prompt)
print(response)
print(references)

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

是的，我熟悉hypervolume这个概念。Hypervolume是一种多目标优化中的评价指标，它衡量一组解相对于参考点的性能，尤其是在多目标优化算法中用于评估和plotlib决策变量。由于它是一个标量（即单个值），理论上可以对其偏导数进行计算，以指导优化过程。

理论上，通过计算hypervolume对决策变量的偏导held，我们可以找到那些能显著提高hypervolume的最优方向，从而在某些优化算法中，如HypE（Bader, 2020）等，可以利用梯度信息来迭代更新解，以促进向更好的hypervolume区域收敛。然而，实际操作中，由于hypervolume的计算涉及到多个目标和参考点，直接求偏导并不直观，通常需要使用数值优化方法来近似求解。

参考资料：
{"author": "Auger, Anne", "title": "Theory of the hypervolume indicator: optimal \u03bc-distributions and the choice of the reference point"}
{"author": "Ishibuchi, H.", "title": "A multi-objective genetic local search algorithm and its application to flowshop scheduling"}
{"author": "Bader, Johannes", "title": "HypE: An Algorithm for Fast Hypervolume-Based Many-Objective Optimization"}



In [22]:
prompt = """What if we try to decompose multi-objective problems into single objective problems and solve them instead? 
Specifically, I'm thinking of using a population of decomposed solutions and performing crossover on them. 
Have there been precedents in multiobjective genetic algorithms that do this?
"""

# this one is kinda wrong, supposed to be MOEA/D

response, references = answer(prompt)
print(response)
print(references)

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

您的研究想法在已知文献中有所体现。作者Bartz-Beielstein在"Gradient-Based/Evolutionary Relay Hybrid for Computing Pareto Front Approximations Maximizing the S-Metric"中提到了使用遗传算法处理多目标问题的一种方法，其中提到将多目标问题分解为单目标子问题并处理。这种方法类似于您描述的通过分解的解决方案种群进行交叉操作的思路。尽管没有直接提到使用"多个精英解决方案"（multiple elite solutions）替代单一精英解决方案，但这种处理策略是遗传算法中常见的多目标优化策略，通过保留和操作多个好的解来寻找非劣解。因此，您的想法在某种程度上与某些多目标遗传算法的实践相吻合。

参考资料：
{"author": "Zitzler, Eckart", "title": "SPEA2: Improving the strength pareto evolutionary algorithm"}
{"author": "Bartz-Beielstein, Thomas", "title": "Gradient-Based/Evolutionary Relay Hybrid for Computing Pareto Front Approximations Maximizing the S-Metric"}
{"author": "Sinha, Ankur", "title": "A Review on Bilevel Optimization: From Classical to Evolutionary Approaches and Applications"}



In [23]:
prompt = """我问你，你的数据库里关于演化算法能查到最早的文献是哪篇？
"""

# wrong usage as well, should pose a research idea instead of asking questions like LLM
# this is wrong, supposed to be VEGA 1985 or No free lunch theorem (sometime around 1970?)

response, references = answer(prompt)
print(response)
print(references)

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

关于演化算法的最早的文献是Yen, J.Y. (1971) Finding the K Shortest Loopless Paths，作者是J.Y. Yen。

参考资料：
{"author": "Knowles, J.", "title": "The Pareto archived evolution strategy: a new baseline algorithm for Pareto multiobjective optimisation"}
{"author": "Zitzler, Eckart", "title": "Comparison of Multiobjective Evolutionary Algorithms: Empirical Results"}
{"author": "Bartz-Beielstein, Thomas", "title": "Gradient-Based/Evolutionary Relay Hybrid for Computing Pareto Front Approximations Maximizing the S-Metric"}



In [24]:
prompt = """你对hypervolume metric了解吗？有没有演化算法现在用hypervolume metric作为指标，代替我们之前所用的fitness function来评判一个个体适不适合生存？我觉得这是个很好的主意。
"""

# looking for SMS-EMOA, but HypE is acceptable

response, references = answer(prompt)
print(response)
print(references)

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

是的，hypervolume metric在多目标优化（Many-Objective Optimization, MMO）中被广泛使用，特别是在评估和选择解决方案时。Hypervolume是一种衡量集合性能的全局指标，它考虑了所有目标函数的值，而不仅仅是单个个体的fitness。HypE（Hypervolume-based Evolutionary Algorithm, Bader等人在2015年的论文中提出）就是一个使用这种指标的算法实例。

HypE算法不仅适用于固定数量的客观函数（如4或5），它能够处理任意数量的多目标问题。在算法中，Hypervolume-based Fitness Assignment被用来评估和选择个体，而不是传统的 метро（边际改进）或占用度（dominance）这样的局部指标。这样做的好处是，即使在多目标优化中，Hypervolume可以提供一个全面的性能视角，有助于找到在所有目标上都表现良好的解决方案。

因此，你的想法是合理的，许多研究确实采用Hypervolume来替代传统的fitness function，特别是在需要全局优化的场景中。

参考资料：
{"author": "Bader, Johannes", "title": "HypE: An Algorithm for Fast Hypervolume-Based Many-Objective Optimization"}



In [27]:
prompt = """
我想要设计一些新的适用于多目标优化问题的benchmark problem set，可能适用于高维空间的问题。
"""

# DTLZ/WFG are both fine

response, references = answer(prompt)
print(response)
print(references)

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

您的研究想法与Deb, K.的"Scalable multi-objective optimization test problems"相符合。在该文献中，作者讨论了包括DTLZ5和DTLZ7在内的多目标优化问题，这些问题通常在高维空间中设计，旨在提供测试多目标优化算法的基准问题。您可以借鉴这些已有的问题来构建您的新基准集。

参考资料：
{"author": "Deb, K.", "title": "Scalable multi-objective optimization test problems"}
{"author": "Mohammad Zadeh, Parviz", "title": "Enhanced decomposition-based hybrid evolutionary and gradient-based algorithm for many-objective optimization"}
{"author": "Bader, Johannes", "title": "HypE: An Algorithm for Fast Hypervolume-Based Many-Objective Optimization"}



In [29]:
prompt = """
Ok, so I have this idea of "evolving" Gaussian distributions to fit a probabilistic model of some intrinsic dataset.
"""

# It tried, it should've replied with CMA-ES or MO-CMA-ES, but I guess the context length was too short.

response, references = answer(prompt)
print(response)
print(references)

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

您的研究想法可能与"HypE: An Algorithm for Fast Hypervolume-Based Many-Objective Optimization"（Bader, Johannes）中的某些内容相关，特别是在处理多目标优化和适应性分布时，算法可能涉及到进化策略。然而，该文献主要关注的是基于hypervolume的多目标优化算法，而不是直接处理概率模型或Gaussian分布的演化。如果您想探讨Gaussian分布的适应性建模，可能需要查阅进化计算、机器学习或统计建模的文献，比如使用遗传算法或粒子群优化等方法优化概率密度函数。

"SMS-EMOA: Multiobjective selection based on dominated hypervolume"（Beume, Nicola）中提到的多目标选择策略，虽然不是直接处理Gaussians，但多目标优化方法可能会用到适应性分布，这与您的想法有关。

"Laumanns, M."的"统一的多目标进化算法模型"（A unified model for multi-objective evolutionary algorithms with elitism）可能涉及适应性策略的初始化，这在优化模型参数时也可能适用，但具体到Gaussians的演化，需要进一步分析。

"Auger, Anne的'Hypervolume Indicator'理论"讨论了选择参考点和优化指标，这在构建和评估概率模型时可能间接相关，但不是直接的Gaussian分布演化。

综上，您的想法可能与部分文献的多目标优化或适应性建

参考资料：
{"author": "Bader, Johannes", "title": "HypE: An Algorithm for Fast Hypervolume-Based Many-Objective Optimization"}
{"author": "Beume, Nicola", "title": "SMS-EMOA: Multiobjective selection based on dominated hypervolume"}
{"author": "Laumanns, M.", "title": "A unified model for multi-objective ev