In [None]:
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
from langchain.prompts import PromptTemplate
from langchain.chat_models import ChatOpenAI

# 构建历史数据知识库
def build_kb(df, feature_cols, target_col, past_len=30):
    # 假设每行数据为一个历史窗口
    windows = []
    for i in range(len(df) - past_len):
        window = df.iloc[i:i+past_len]
        text_repr = "\n".join([f"{col}: {row[col]}" for col in feature_cols for _, row in window.iterrows()])
        windows.append(text_repr)
    
    embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
    vectorstore = FAISS.from_documents(windows, embeddings)
    return vectorstore

# 预测模型
def predict_streamflow(current_window_df, vectorstore, model, prompt_template):
    # 生成文本描述
    query_text = "\n".join([f"{col}: {current_window_df[col].values[-1]}" for col in feature_cols])
    
    # 检索相似的历史片段
    results = vectorstore.similarity_search(query_text, k=5)
    
    # 格式化提示文本
    prompt = prompt_template.format(recent_data=query_text, similar_chunks="\n\n".join(results))
    
    # 调用 LLM 模型进行预测
    response = model.predict(prompt)
    return response

# 示例：构建知识库并预测
vectorstore = build_kb(df, feature_cols=["temperature_2m_mean", "total_precipitation_sum", "streamflow"], target_col="streamflow")
llm_model = ChatOpenAI(model="gpt-4o", temperature=0)
prompt_template = """
You are a water flow prediction expert. Given the following recent data and similar historical patterns, predict the streamflow for the next 7 days.
Recent data: {recent_data}
Similar historical data:
{similar_chunks}
"""
current_window_df = df.iloc[-30:]  # 假设最近30天的数据
prediction = predict_streamflow(current_window_df, vectorstore, llm_model, prompt_template)
print("Predicted Streamflow:", prediction)
