In [None]:
import pandas as pd
df_neurips = pd.read_csv("df_neurips.csv")

In [None]:
futurw_work_generation_prompt = '''
I want to generate future work directions for my research paper based on its entire content (all sections, including abstract, introduction,
background, methodology, results, discussion, etc.). Please analyze the paper and propose substantial, long-term research goals
that extend the current work in a meaningful way, advancing the field or addressing significant open challenges. Ensure the suggested
future work directions are ambitious, grounded in the paper’s content, and avoid trivial or short-term tasks (e.g., minor experiments,
parameter tuning, or small-scale tests). Each direction should be clearly linked to specific aspects of the paper (e.g., limitations,
findings, or discussed challenges) and propose innovative, impactful research objectives. If no suitable long-term future work can
be derived, clearly state: "No long-term future work directions could be derived from the paper." Provide the generated future work
directions in a concise, bulleted list, with each direction accompanied by a brief explanation of how it connects to the paper’s content.

Input Text (Paper Content): [Insert the full text or relevant sections of the paper here]

Output Format: Future Work Directions (Long-Term Goals)

[Future work direction]: [Brief explanation of how this direction connects to the paper’s content and why it is a substantial,
long-term goal.]

[Additional future work directions and explanations, if applicable.]

OR

No long-term future work directions could be derived from the paper.

'''

In [None]:
prompt = '''
Instructions: You are provided with two texts for each pair: one is Author-Mentioned Future Work and another is
LLM-Generated Future Work. Please read both texts carefully. After reviewing each text, assign a score from
1 to 5 for each criterion outlined below. The score should reflect how well the LLM-Generated Future Work compares to the
Author-Mentioned Future Work,
where 1 represents poor quality and 5 represents excellent quality that closely matches or even surpasses the Author-Mentioned Future Work
in some aspects.

Author-Mentioned Future Work:
<Author-Mentioned Future Work>

LLM-Generated Future Work:
<LLM-Generated Future Work>

Scoring Criteria:
Coherence and Logic:

5: The text is exceptionally coherent; the ideas flow logically and are well connected.

3: The text is coherent but may have occasional lapses in logic or flow.

1: The text is disjointed or frequently illogical.

Relevance and Accuracy:

5: The text is completely relevant to the topic and accurate in all presented facts.

3: The text is generally relevant with minor factual errors or slight deviations from the topic.

1: The text often strays off topic or includes multiple factual inaccuracies.

Readability and Style:

5: The text is engaging, well-written, and stylistically consistent with the Author-Mentioned Future Work
3: The text is readable but may lack flair or have minor stylistic inconsistencies.
1: The text is difficult to read or stylistically poor.

Grammatical Correctness:

5: The text is free from grammatical errors.
3: The text has occasional grammatical errors that do not impede understanding.
1: The text has frequent grammatical errors that hinder comprehension.

Overall Impression:
5: The text is of a quality that you would expect from a professional writer.
3: The text is acceptable but would benefit from further editing.
1: The text is of a quality that needs significant revision to be usable.

Task: For each text pair:
Rate the LLM-Generated Future Work on each criterion and provide a final overall score out of 5.
Provide a justification for each criterion score, highlighting strengths and weaknesses observed in the LLM-Generated Future Work
relative to the Author-Mentioned Future Work.
Present the scores and justifications in JSON format, structured as follows:

{ "Coherence and Logic": { "score": , "justification": "" }, "Relevance and Accuracy": { "score": , "justification": "" },
"Readability and Style": { "score": , "justification": "" }, "Grammatical Correctness": { "score": , "justification": "" },
"Overall Impression": { "score": , "justification": "" } }

'''

In [None]:
df_rest['response_string'] = df_rest.apply(
    lambda row: (
        f"Abstract: {row['df_Abstract']}\n"
        f"Introduction: {row['df_Introduction']}\n"
        f"Related_Work: {row['df_Related_Work']}\n"
        f"Methodology: {row['df_Methodology']}\n"
        f"Dataset: {row['df_Dataset']}\n"
        f"Conclusion: {row['df_Conclusion']}\n"
        f"Experiment_and_Results: {row['df_Experiment_and_Results']}\n"
        f"Extra1: {row['df_col_2']}\n"
        f"Extra2: {row['df_col_3']}\n"
        f"Extra3: {row['df_col_4']}\n"
        f"Extra4: {row['df_col_5']}\n"
        f"Extra5: {row['df_col_6']}\n"
        f"Extra6: {row['df_col_7']}\n"
        f"Extra7: {row['df_col_8']}"
    ),
    axis=1
)


In [None]:
import os
import tiktoken
from langchain.chat_models import ChatOpenAI
from langchain.vectorstores import FAISS
from langchain.retrievers import BM25Retriever, EnsembleRetriever
from langchain.embeddings import OpenAIEmbeddings
from langchain.schema import Document

# 1) Setup
os.environ['OPENAI_API_KEY'] = ''
chat_llm = ChatOpenAI(model_name="gpt-4o-mini", temperature=0)

# token encoder & limit
enc = tiktoken.encoding_for_model("gpt-4o-mini")
MAX_TOKENS = 128_000


from langchain.text_splitter import RecursiveCharacterTextSplitter


# 2) Build your ensemble retriever (same as before)
hf_emb = OpenAIEmbeddings()

def make_retriever_for_docs(docs, k=3):
    # FAISS retriever
    faiss_store = FAISS.from_documents(docs, hf_emb)
    faiss_r = faiss_store.as_retriever(search_kwargs={"k": k})
    # BM25 retriever
    bm25_r = BM25Retriever.from_documents(docs)
    bm25_r.k = k

    # Must use keywords only here:
    return EnsembleRetriever(
        retrievers=[faiss_r, bm25_r],
        weights=[0.5, 0.5]
    )

# load & convert your llama_index documents
from llama_index.core import SimpleDirectoryReader
documents = SimpleDirectoryReader("RAG_Data").load_data()
# lc_docs = [Document(page_content=d.text, metadata=getattr(d, "metadata", {}))
#            for d in docs]

# retriever = make_retriever_for_docs(lc_docs, k=3)

# 1. After converting to LangChain Documents:
lc_docs = [
    Document(page_content=doc.text, metadata=doc.metadata or {})
    for doc in documents
]

# 2. Chunk them with your desired token‐based size & overlap:
splitter = RecursiveCharacterTextSplitter(
    chunk_size=512,      # max tokens per chunk
    chunk_overlap=20     # overlap between chunks
)
chunked_docs = splitter.split_documents(lc_docs)

# 3. Build your retriever on the chunked docs instead of the full ones:
retriever = make_retriever_for_docs(chunked_docs, k=3)

# 3) Loop with truncation logic
generated_future_work = []
for i in range(len(df_rest)): # len(df_rest)
    # a) Build your user query
    user_query = futurw_work_generation_prompt + df_rest.at[i, 'response_string']

    # b) Fetch documents yourself
    retrieved = retriever.get_relevant_documents(user_query)
    docs_text = "\n\n".join([d.page_content for d in retrieved])

    # c) Token counts
    tok_docs  = enc.encode(docs_text)
    tok_query = enc.encode(user_query)
    total_len = len(tok_docs) + len(tok_query)

    # d) If over limit, truncate **only** the query portion
    if total_len > MAX_TOKENS:
        allowed_for_query = MAX_TOKENS - len(tok_docs)
        tok_query = tok_query[:allowed_for_query]
        user_query = enc.decode(tok_query)

    # e) Call the LLM with your “context + question” template
#     system_prompt = """You are an AI trained to analyze scientific research and suggest future directions...
# Generate a future work summary within 100 words."""

    # Format a two‐message chat
    messages = [
        {"role": "system", "content": futurw_work_generation_prompt + "\n\nContext:\n" + docs_text},
        {"role": "user",   "content": user_query}
    ]

    # resp = llm.chat(messages=messages)
    # Option A: call the model directly
    # resp = llm(messages=messages)
    response = chat_llm.invoke(messages)
    generated_text = response.content  # this is the AI’s reply
    generated_future_work.append(generated_text)
    # generated_future_work.append(resp.choices[0].message.content)

# 4) Attach back to DataFrame
df_rest['Generated_future_work_rag_llm'] = generated_future_work
