In [1]:
import os
import torch
from langchain.vectorstores import Chroma
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.document_loaders import TextLoader
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline, BitsAndBytesConfig
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.llms import HuggingFacePipeline
from langchain_core.messages import AIMessage, HumanMessage

# --- 模型加载 ---
model_path = "/home/student/zzc/deepseek/DeepSeek-R1-Distill-Qwen-32B"
tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16
)

model = AutoModelForCausalLM.from_pretrained(
    model_path,
    device_map="auto",
    quantization_config=bnb_config,
    trust_remote_code=True
)

# --- Pipeline 创建 ---
pipe = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    max_new_tokens=1024,
    temperature=0.1,
    top_p=0.9,
    repetition_penalty=1.1,
    do_sample=True
)

llm = HuggingFacePipeline(pipeline=pipe)

# --- Embedding 模型 ---
embeddings = HuggingFaceEmbeddings(
    model_name="/home/student/zzc/代码/GAI-agent-satellite-main/embedding",
    model_kwargs={'device': 'cuda:0'}
)
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)

  from .autonotebook import tqdm as notebook_tqdm
Sliding Window Attention is enabled but not implemented for `sdpa`; unexpected results may be encountered.
Loading checkpoint shards: 100%|██████████| 8/8 [00:17<00:00,  2.21s/it]
Device set to use cuda:0
  llm = HuggingFacePipeline(pipeline=pipe)
  embeddings = HuggingFaceEmbeddings(


In [2]:
# --- 加载和处理知识库 ---
loader_Scenarios_Homogeneous = TextLoader('./database/ref_Scenarios_Homogeneous.txt')
documents_Scenarios_Homogeneous = loader_Scenarios_Homogeneous.load()
texts_Scenarios_Homogeneous = text_splitter.split_documents(documents_Scenarios_Homogeneous) 
vectordb_Scenarios_Homogeneous = Chroma.from_documents(texts_Scenarios_Homogeneous, embeddings, persist_directory="database/Scenarios_Homogeneous_db")
vectordb_Scenarios_Homogeneous.persist()

loader_Scenarios_Heterogeneous = TextLoader('./database/ref_Scenarios_Heterogeneous.txt')
documents_Scenarios_Heterogeneous = loader_Scenarios_Heterogeneous.load()
texts_Scenarios_Heterogeneous = text_splitter.split_documents(documents_Scenarios_Heterogeneous)
vectordb_Scenarios_Heterogeneous = Chroma.from_documents(texts_Scenarios_Heterogeneous, embeddings, persist_directory="database/Scenarios_Heterogeneous_db")
vectordb_Scenarios_Heterogeneous.persist()

loader_SDMA = TextLoader('./database/ref_SDMA.txt')
documents_SDMA = loader_SDMA.load()
texts_SDMA = text_splitter.split_documents(documents_SDMA) 
vectordb_SDMA = Chroma.from_documents(texts_SDMA, embeddings, persist_directory="database/SDMA_db")
vectordb_SDMA.persist()

loader_RSMA = TextLoader('./database/ref_RSMA.txt')
documents_RSMA = loader_RSMA.load()
texts_RSMA = text_splitter.split_documents(documents_RSMA) 
vectordb_RSMA = Chroma.from_documents(texts_RSMA, embeddings, persist_directory="database/RSMA_db")
vectordb_RSMA.persist()

loader_Channels_Fixed = TextLoader('./database/ref_Channels_Fixed.txt')
documents_Channels_Fixed = loader_Channels_Fixed.load()
texts_Channels_Fixed = text_splitter.split_documents(documents_Channels_Fixed)
vectordb_Channels_Fixed = Chroma.from_documents(texts_Channels_Fixed, embeddings, persist_directory="database/Channels_Fixed_db")
vectordb_Channels_Fixed.persist()

loader_Channels_TimeVarying = TextLoader('./database/ref_Channels_TimeVarying.txt')
documents_Channels_TimeVarying = loader_Channels_TimeVarying.load()
texts_Channels_TimeVarying = text_splitter.split_documents(documents_Channels_TimeVarying)
vectordb_Channels_TimeVarying = Chroma.from_documents(texts_Channels_TimeVarying, embeddings, persist_directory="database/Channels_TimeVarying_db")
vectordb_Channels_TimeVarying.persist()


loader_Optimization_SE = TextLoader('./database/ref_Optimization_SE.txt')
documents_Optimization_SE = loader_Optimization_SE.load()
texts_Optimization_SE = text_splitter.split_documents(documents_Optimization_SE)
vectordb_Optimization_SE = Chroma.from_documents(texts_Optimization_SE, embeddings, persist_directory="database/Optimization_SE_db")
vectordb_Optimization_SE.persist()

loader_Optimization_EE = TextLoader('./database/ref_Optimization_EE.txt')
documents_Optimization_EE = loader_Optimization_EE.load()
texts_Optimization_EE = text_splitter.split_documents(documents_Optimization_EE)
vectordb_Optimization_EE = Chroma.from_documents(texts_Optimization_EE, embeddings, persist_directory="database/Optimization_EE_db")
vectordb_Optimization_EE.persist()

  vectordb_Scenarios_Homogeneous.persist()


In [3]:
# --- 创建 Retriever ---
retriever_Scenarios_Homogeneous = vectordb_Scenarios_Homogeneous.as_retriever(search_kwargs={"k": 3})
retriever_Scenarios_Heterogeneous = vectordb_Scenarios_Heterogeneous.as_retriever(search_kwargs={"k": 3})
retriever_SDMA = vectordb_SDMA.as_retriever(search_kwargs={"k": 3})
retriever_RSMA = vectordb_RSMA .as_retriever(search_kwargs={"k": 3})
retriever_Channels_Fixed = vectordb_Channels_Fixed.as_retriever(search_kwargs={"k": 3})
retriever_Channels_TimeVarying = vectordb_Channels_TimeVarying.as_retriever(search_kwargs={"k": 3})
retriever_Optimization_SE = vectordb_Optimization_SE.as_retriever(search_kwargs={"k": 3})
retriever_Optimization_EE = vectordb_Optimization_EE.as_retriever(search_kwargs={"k": 3})

In [4]:
block_representations = {
    "Scenarios": "Knowledge encompassing diverse satellite network deployment scenarios, including variations in constellation types (e.g., GEO, LEO), application-specific mission objectives (e.g., communication, Earth observation, navigation, scientific), and architectural configurations (e.g., bent-pipe, regenerative payload, inter-satellite links, ground segment topologies).",
    "Access Protocols": "Knowledge concerning various multiple access protocols employed in satellite communication systems, such as Space Division Multiple Access (SDMA) utilizing beamforming, Rate-Splitting Multiple Access (RSMA) for non-orthogonal transmission. Understanding the operational principles, performance trade-offs, and application suitability of each protocol is crucial.",
    "Channel Models": "Knowledge of different channel models relevant to satellite communication links, covering both static and dynamic channel conditions. This includes fixed channel models like Additive White Gaussian Noise (AWGN) for idealized scenarios and time-varying channel models to represent realistic impairments such as fading (e.g., Rician, Rayleigh, Nakagami-m), shadowing due to obstacles, atmospheric absorption, scintillation effects, rain attenuation, and Doppler frequency shifts caused by satellite motion. Understanding the statistical properties and parameters of these models is essential for link budget analysis and system design.",
    "Optimization Goals": "Knowledge regarding various performance optimization objectives in satellite communication system design and operation. This includes maximizing Spectral Efficiency (SE) to improve data rates, enhancing Energy Efficiency (EE) to reduce power consumption, increasing system throughput for higher capacity, minimizing communication latency for real-time applications, ensuring fairness in resource allocation among multiple users, and guaranteeing Quality of Service (QoS) requirements for different service types.  Understanding the mathematical formulations of these objectives and the trade-offs between them is important for algorithm development and resource management."
}

sub_block_representations = {
    "Scenarios": {
        "Homogeneous": "Homogeneous satellite network scenarios define constellations where all satellites possess uniform characteristics, including orbital altitude, coverage area, capabilities, and service provision. These scenarios are used for deploying and managing uniform constellations for specific applications, such as global broadband internet access via Low Earth Orbit (LEO) constellations, where consistency in service and design is crucial.",
        "Heterogeneous": "Heterogeneous satellite network scenarios define constellations composed of satellites with diverse characteristics, including mixed orbital regimes (LEO, MEO, GEO), varied payloads (communication, Earth observation), and multi-layer network architectures. These scenarios are used to achieve enhanced coverage, capacity, and service diversity by integrating different satellite types, addressing complex application requirements like multi-service platforms and integrated Earth observation and communication systems."
    },
    "Access Protocols": {
        "SDMA": "Space Division Multiple Access (SDMA) protocol spatially separates users via beamforming techniques, including fixed, steerable, and adaptive beams, to enhance frequency reuse and system capacity in satellite networks. It's used to manage interference and improve efficiency by directing signals to specific user locations.",
        "RSMA": "Rate-Splitting Multiple Access (RSMA) protocol improves spectral efficiency through non-orthogonal transmission, splitting user messages into common and private streams. Utilizing schemes like basic and enhanced RSMA, and employing successive interference cancellation (SIC) at receivers, it handles heterogeneous user demands and improves system throughput compared to orthogonal access methods in satellite communications."
    },
    "Channel Models": {
        "Fixed": "Fixed channel models, like the AWGN model, assume constant or slowly varying channel parameters (signal attenuation, noise power spectral density). These idealizations are used for initial link budget calculations, assessing performance under ideal conditions, and in static scenarios (fixed ground station to geostationary satellite), calculating path loss and antenna gain. They provide baselines for system design and are widely used in education.",
        "Time-Varying": "Time-varying channel models account for channel parameter variations over time, accurately representing actual satellite communication. These models, like Rayleigh (NLOS), Rician (LOS with multipath), shadowing (obstruction-induced signal loss), and those describing Doppler shift, are used for simulating dynamic scenarios (mobile and LEO satellite communication), optimizing link performance in complex environments, designing anti-fading techniques, and developing Doppler compensation technologies."
    },
   "Optimization Goals": {
        "SE": "Spectral Efficiency (SE) optimization in satellite communications focuses on maximizing data rate per unit bandwidth through techniques like advanced modulation and coding, signal processing, interference management, and resource allocation. It's used to enhance data throughput in limited bandwidth scenarios, improving overall link and network capacity.",
        "EE": "Energy Efficiency (EE) optimization in satellite communications aims to minimize power consumption while maintaining performance, employing methods such as power amplifier optimization, power control, energy-aware resource management, and efficient hardware design. It's crucial for extending the operational life of battery-powered terminals and payloads, especially in long-duration satellite missions."
    }
}

In [5]:
block_representation_embeddings = {
    block_name: embeddings.embed_query(representation)
    for block_name, representation in block_representations.items()
}

sub_block_representation_embeddings = {}
for block_name, sub_blocks in sub_block_representations.items():
    sub_block_representation_embeddings[block_name] = {
        sub_block_name: embeddings.embed_query(representation)
        for sub_block_name, representation in sub_blocks.items()
    }

In [6]:
def route_to_block(query_embedding, block_representation_embeddings):
    similarity_scores = {
        block_name: torch.nn.functional.cosine_similarity(
            torch.tensor(query_embedding),
            torch.tensor(block_embedding),
            dim=0
        ).item()
        for block_name, block_embedding in block_representation_embeddings.items()
    }
    selected_block = max(similarity_scores, key=similarity_scores.get)
    print(f"Layer-1 Routing: Selected Block - {selected_block}")
    return selected_block

def route_to_sub_block(query_embedding, sub_block_representation_embeddings, selected_block):
    similarity_scores = {
        sub_block_name: torch.nn.functional.cosine_similarity(
            torch.tensor(query_embedding),
            torch.tensor(sub_block_embedding),
            dim=0
        ).item()
        for sub_block_name, sub_block_embedding in sub_block_representation_embeddings[selected_block].items()
    }
    selected_sub_block = max(similarity_scores, key=similarity_scores.get)
    print(f"Layer-2 Routing: Selected Sub-block - {selected_sub_block} within {selected_block}")
    return selected_sub_block

In [12]:
def summarize_history(history, pipe):
    """
    总结对话历史。

    Args:
        history (list): 完整的对话历史列表，包含 HumanMessage 和 AIMessage 对象。
        pipe:  HuggingFace Pipeline 对象，用于调用 DeepSeek 模型。

    Returns:
        str: 精炼后的对话历史摘要。
    """
    if not history:
        return ""

    history_text = ""
    for message in history:
        if isinstance(message, HumanMessage):
            history_text += f"User: {message.content}\n"
        elif isinstance(message, AIMessage):
            history_text += f"Assistant: {message.content}\n"

    summarization_prompt = f"""Please summarize the following conversation history, extract key information, and use concise language to summarize the main content of the conversation, so that subsequent conversations can quickly review the context. The final answer should start with "Summary:"

    Dialogue history:
    {history_text}

    Summary:
    """

    summarized_text = pipe(summarization_prompt)[0]['generated_text']
    think_tag_end = summarized_text.rfind("</think")
    
    if think_tag_end != -1:
        response_content_processed = summarized_text[think_tag_end + len("</think>"):] # Extract after </think>
        result_tag_end = response_content_processed.rfind("<Summary:>")
        if result_tag_end != -1:
            response_content_processed = response_content_processed[result_tag_end + len("<Summary:>"):]
        else:
            response_content_processed = response_content_processed
    else:
        response_content_processed = summarized_text # If </think> not found, return full response
        
    return response_content_processed

In [13]:
def ask_deepseek(user_input, history, summarized_history_memory):
    # 1. 编码用户查询
    query_embedding = embeddings.embed_query(user_input)

    # 2. Layer-1 路由 (选择 Block)
    selected_block = route_to_block(query_embedding, block_representation_embeddings)

    # 3. Layer-2 路由 (选择 Sub-block)
    selected_sub_block = route_to_sub_block(query_embedding, sub_block_representation_embeddings, selected_block)

    # 4. 基于路由结果选择 Retriever
    retriever = None
    if selected_block == "Scenarios":
        if selected_sub_block == "Homogeneous":
            retriever = retriever_Scenarios_Homogeneous
        elif selected_sub_block == "Heterogeneous":
            retriever = retriever_Scenarios_Heterogeneous
    elif selected_block == "Access Protocols":
        if selected_sub_block == "SDMA":
            retriever = retriever_SDMA
        elif selected_sub_block == "RSMA":
            retriever = retriever_RSMA
    elif selected_block == "Channel Models":
        if selected_sub_block == "Fixed":
            retriever = retriever_Channels_Fixed
        elif selected_sub_block == "Time-Varying":
            retriever = retriever_Channels_TimeVarying
    elif selected_block == "Optimization Goals":
        if selected_sub_block == "SE":
            retriever = retriever_Optimization_SE
        elif selected_sub_block == "EE":
            retriever = retriever_Optimization_EE

    if retriever is None:
        retriever = retriever_SDMA

    retrieved_chunks = retriever.get_relevant_documents(user_input)
    context = "\n\n".join([chunk.page_content for chunk in retrieved_chunks])
    
    # Modified Prompt
    prompt_content = f"""You are an expert in satellite communications. Please help me formulate a satellite communication model in English based on the following background knowledge and your local knowledge, maybe the background knowledge is wrong, so it just a reference. You just need to give a brief answer. If there are formulas, please also provide the corresponding modeled formulas. The final answer should start with "Answer:" 
    Summarized Conversation History:
    {summarized_history_memory}
    Background Knowledge:
    {context}
    Conversation History:
    """
    for message in history: # 遍历对话历史
        if isinstance(message, HumanMessage):
            prompt_content += f"User: {message.content}\n"
        elif isinstance(message, AIMessage):
            prompt_content += f"Assistant: {message.content}\n"
    prompt_content += f"""
    User Problem:
    {user_input}
    """
    generated_text = pipe(prompt_content)[0]['generated_text']
    response_content = generated_text

    # Extract content after </think> tag
    think_tag_end = response_content.rfind("</think>") # Use rfind to find the last </think>
    if think_tag_end != -1:
        response_content_processed = response_content[think_tag_end + len("</think>"):] # Extract after </think>
        result_tag_end = response_content_processed.rfind("<Answer:>") # Use rfind to find the last <Answer:> in processed content
        if result_tag_end != -1:
            response_content_processed = response_content_processed[result_tag_end + len("<Answer:>"):]
        else:
            response_content_processed = response_content_processed
    else:
        response_content_processed = response_content # If </think> not found, return full response

    history.append(HumanMessage(content=user_input))
    history.append(AIMessage(content=response_content_processed))

    # 生成新的摘要
    summarized_history_memory = summarize_history(history, pipe)

    return response_content_processed, selected_block, selected_sub_block, summarized_history_memory # 返回处理后的生成结果和路由结果


In [14]:
history = []
summarized_history_memory = "" 
user_input = "Since heterogeneous satellite networks are considered, to ensure scalability, please use the SDMA protocol at the LEO satellite. Show the transmit signals generated by the GEO satellite. Answer based on your local knowledge."
response, block1, sub_block1, summarized_history_memory = ask_deepseek(user_input, history, summarized_history_memory)
print(f"AI Assistant: {response}") 

Layer-1 Routing: Selected Block - Access Protocols
Layer-2 Routing: Selected Sub-block - SDMA within Access Protocols
AI Assistant: 

Answer: 

In a satellite communication model utilizing SDMA at a LEO satellite, the transmit signals from a GEO satellite can be modeled as follows:

**Transmit Signal Model:**

For each user \( k \), the signal transmitted by the GEO satellite is denoted as \( s_k \).

**Received Signal Model:**

At the LEO satellite, the received signal \( \mathbf{y} \) is given by:

\[
\mathbf{y} = \sum_{k=1}^{N} \mathbf{h}_k s_k + \mathbf{n}
\]

Where:
- \( N \) is the total number of users/signals.
- \( \mathbf{h}_k \) is the channel vector representing the state of the channel for user \( k \).
- \( \mathbf{n} \) is the additive Gaussian noise accounting for thermal noise.

**SINR Calculation:**

The SINR for each user \( k \) is calculated as:

\[
\text{SINR}_k = \frac{|\mathbf{h}_k|^2 P}{\sigma^2 + \sum_{j \neq k} |\mathbf{h}_j|^2 P}
\]

Where:
- \( P \) is the t

In [15]:
print(summarized_history_memory)



Summary: The conversation discusses satellite communication models employing SDMA at LEO satellites. It covers the transmit signal model from GEO satellites, the received signal model including channel vectors and noise, and SINR calculations for each user, supporting scalable communication in a heterogeneous network.


In [16]:
user_input = "Good, show me the corresponding channel model. Use your local knowledge." # Example user input, modify as needed
response, block1, sub_block1, summarized_history_memory = ask_deepseek(user_input, history, summarized_history_memory)
print(f"AI Assistant: {response}") 


Layer-1 Routing: Selected Block - Channel Models
Layer-2 Routing: Selected Sub-block - Fixed within Channel Models
AI Assistant: 

Answer: 

The channel model for the satellite communication system using SDMA at LEO satellites can be described as follows:

**Channel Model:**

Each user's signal undergoes fading and attenuation through the satellite channel. The channel vector \( \mathbf{h}_k \) for user \( k \) is composed of both line-of-sight (LOS) and non-line-of-sight (NLOS) components, represented as:

\[
\mathbf{h}_k = \sqrt{\alpha} \mathbf{g}_{\text{LOS},k} + \sqrt{1-\alpha} \mathbf{g}_{\text{NLOS},k}
\]

Where:
- \( \alpha \) is the LOS probability.
- \( \mathbf{g}_{\text{LOS},k} \) and \( \mathbf{g}_{\text{NLOS},k} \) are the respective channel gains for LOS and NLOS paths.

**Power Delay Profile:**

The channel's temporal characteristics are captured by the power delay profile (PDP), which for user \( k \) is:

\[
f_k(\tau) = \beta_0 \delta(\tau) + \sum_{l=1}^{L} \beta_l e^{-