In [4]:
# !pip install fitz

# !pip install dotenv chromadb evaluate gradio smolagents

# !pip install pymupdf

Collecting pymupdf
  Using cached pymupdf-1.26.0-cp39-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl.metadata (3.4 kB)
Using cached pymupdf-1.26.0-cp39-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl (24.1 MB)
Installing collected packages: pymupdf
Successfully installed pymupdf-1.26.0


In [2]:
# -*- coding: utf-8 -*-
import os
import glob
import re
import fitz
import chromadb
import numpy as np
import requests
import matplotlib.pyplot as plt
import gradio as gr
import uuid

# Hugging Face & LangChain for AI models
from dotenv import load_dotenv
from sentence_transformers import SentenceTransformer
from langchain.text_splitter import RecursiveCharacterTextSplitter
from transformers import pipeline, AutoTokenizer, AutoModelForSeq2SeqLM
from smolagents import OpenAIServerModel, HfApiModel
from smolagents.tools import Tool
from typing import Dict, List, Union

# Google Drive connection
from google.colab import drive
drive.mount('/content/drive')


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [3]:
# Load environment variables
load_dotenv()
os.environ["CHROMA_HUGGINGFACE_API_KEY"] = "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"

# Initialize embedding model
embedding_model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")

# Initialize ChromaDB
client = chromadb.PersistentClient(path="/content/drive/MyDrive/VectorDB")
collection = client.get_or_create_collection(name='ties_collection_emb', metadata={"hnsw:space": "cosine"})

print(collection)


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


Collection(name=ties_collection_emb)


In [4]:
def process_scientific_text(text):
    """Cleans text by removing references, formulas, tables, numerical results, and dataset details."""
    patterns = [r'\[\d+\]',  # References
        r'L[a-zA-Z]+\s*=\s*[^=]+',  # Formulas
        r'TABLE \d+\..*?\n',  # Table headers
        r'\b\d+%|\b\d+\.\d+%|\b\d{3,}\b',  # Numerical results
        r'https?://[^\s]+',  # URLs
        r'\b\d+\s*(domains|records|samples|queries)\b',  # Dataset-specific details
        r'^\w+\s+\d+\s+[^\s]+\.[a-z]+$',  # DGA dataset examples
    ]
    for pattern in patterns:
        text = re.sub(pattern, '', text, flags=re.MULTILINE)
    return '\n'.join([line.strip() for line in text.splitlines() if line.strip()])

def extract_and_chunk_pdf(file_path, chunk_size=800, chunk_overlap=400):
    """Extracts and chunks text from a PDF."""
    doc = fitz.open(file_path)
    text = "\n".join([page.get_text("text") for page in doc])
    abstract = text.split("\n\n")[0] if "abstract" in text.lower() else ""

    text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(
        chunk_size=chunk_size, chunk_overlap=chunk_overlap
    )
    chunks = text_splitter.split_text(text)
    chunks = [process_scientific_text(chunk) for chunk in chunks]
    return chunks, abstract


In [5]:
def process_scientific_text(text):
    """Cleans text by removing references, formulas, tables, numerical results, and dataset details."""
    patterns = [r'\[\d+\]',  # References
        r'L[a-zA-Z]+\s*=\s*[^=]+',  # Formulas
        r'TABLE \d+\..*?\n',  # Table headers
        r'\b\d+%|\b\d+\.\d+%|\b\d{3,}\b',  # Numerical results
        r'https?://[^\s]+',  # URLs
        r'\b\d+\s*(domains|records|samples|queries)\b',  # Dataset-specific details
        r'^\w+\s+\d+\s+[^\s]+\.[a-z]+$',  # DGA dataset examples
    ]
    for pattern in patterns:
        text = re.sub(pattern, '', text, flags=re.MULTILINE)
    return '\n'.join([line.strip() for line in text.splitlines() if line.strip()])

def extract_and_chunk_pdf(file_path, chunk_size=800, chunk_overlap=400):
    """Extracts and chunks text from a PDF."""
    doc = fitz.open(file_path)
    text = "\n".join([page.get_text("text") for page in doc])
    abstract = text.split("\n\n")[0] if "abstract" in text.lower() else ""

    text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(
        chunk_size=chunk_size, chunk_overlap=chunk_overlap
    )
    chunks = text_splitter.split_text(text)
    chunks = [process_scientific_text(chunk) for chunk in chunks]
    return chunks, abstract


In [6]:
from transformers import pipeline

# Initialize the summarization pipeline
summarizer = pipeline("summarization", model="google/flan-t5-large")

Device set to use cpu


In [7]:
def extract_sections(paper_text):
    """Extracts structured sections from scientific papers."""
    patterns = {
        "Abstract": r"(?i)\bAbstract\b[:\.\s]*([\s\S]*?)(?=\n\s*\b(?:Introduction|1\s*Introduction)\b)",
        "Introduction": r"(?i)\bIntroduction\b[:\.\s]*([\s\S]*?)(?=\n\s*\b(?:Methods|2\s*Methods)\b)",
        "Methods": r"(?i)\b(?:Methods|Methodology)\b[:\.\s]*([\s\S]*?)(?=\n\s*\b(?:Results|3\s*Results)\b)",
        "Results": r"(?i)\bResults\b[:\.\s]*([\s\S]*?)(?=\n\s*\b(?:Discussion|4\s*Discussion)\b)",
        "Discussion": r"(?i)\bDiscussion\b[:\.\s]*([\s\S]*?)(?=\n\s*\b(?:Conclusion|5\s*Conclusion)\b)",
        "Conclusion": r"(?i)\bConclusion\b[:\.\s]*([\s\S]*?)(?=\n\s*\b(?:References|6\s*References|\Z))"
    }
    sections = {key: re.search(pattern, paper_text).group(1).strip() for key, pattern in patterns.items() if re.search(pattern, paper_text)}
    return "\n\n".join([f"{key}:\n{value}" for key, value in sections.items()]) if sections else "Full Text:\n" + paper_text


def baseline_model(pdf_path):
    chunks, abstract = extract_and_chunk_pdf(pdf_path)  # Extract text chunks
    chunks = [process_scientific_text(chunk) for chunk in chunks]

    # **Chunk large texts into manageable sizes**
    def chunk_text(text, max_length=500):
        return [text[i:i+max_length] for i in range(0, len(text), max_length)]

    chunks = chunk_text(" ".join(chunks))

    # **Summarize each chunk separately with dynamic max_length**
    summaries = [
        summarizer(
            f"Summarize this: {chunk}",
            max_length=max(50, int(len(chunk.split()) * 0.75)),  # Adjust based on word count
            min_length=50,
            do_sample=False
        ) for chunk in chunks
    ]

    # **Merge all summaries into a final output**
    summary = " ".join([s[0]['summary_text'] for s in summaries])

    clean_response = re.sub(r"<think>.*?</think>", "", summary, flags=re.DOTALL).strip()
    return clean_response


In [8]:
summary =   baseline_model("my_pdf.pdf")

In [9]:
from IPython.display import display, Markdown
display(Markdown(summary))

The abstract reasoning language model (ALM) has been used to solve many complex logical inference problems. However, it is not always the best choice for all queries. This paper argues that LLM can be used to solve a ny problems admit straightforward solutions. This motivates an open question: Can LLMs learn when to think? We propose Thinkless, a learnable framework that em- powers an LLM to adaptively select between short-form and long-form reasoning Using a decoupled formulation of GRPO, which decomposes the learning objective of hybrid reasoning into two components: (1) a control token loss that governs the selection of the reasoning mode, and (2) a response . . . . . . . . . . . . . . . . . . . . . . . . . .  Extend the model to all questions. Avoid using extended reasoning for all questions. Avoid using extended reasoning for all questions. Avoid using extended reasoning for all questions. Avoid using extended reasoning for all questions. Avoid using extended reasoning for all questions. . . . . . . . . . . . . . . . . . . . . . . . . . . . think> short> Auto Decision Obj 1: Mode Sel think> short> Auto Decision Obj 1: Mode Sel think> short> Auto Decision Obj 1: ection Obj 2: Accuracy Improvement Decoupled GRPO Figure 1: Thinkless learns a hybrid LLM capable of adaptively selecting between thinking and non-thinking inference modes, directed by two special tokens, think> and a hybrid reasoning model that can generate short-form or long-form responses based on the input query. The model is guided by the complexity of the query and its own capability. The model is tasked with autonomously deciding whether to generate a short-form or Identify the model’s capabilities. Identify the user’s tolerance for the trade-off between efficiency and accuracy. Identify the model’s ability to learn from interactions. Identify the model’s ability to learn from interactions. a hybrid reasoning model capable of selecting between short-form and long-form responses. Through iterative exploration and reward-driven updates, the model progressively acquires the ability to make autonomous, context-aware decisions about its reasoning Observe the model’s response style. Train the model. Observe the model’s response style. Observe the model’s response style. Observe the model’s response style. Observe the model’s response style. Decouple the model into two models: a reasoning model and a standard instruction-following model. Train the model on paired long- and short-form responses. Decouple the model into two models: a reasoning model . . . . . . . . . . . . . . . . . . . . . . . . . . .  elongation of long & short responses, the single control token may receive weak and biased gradient signals, ultimately leading to mode collapse at the early stages of training. To this end, we propose Thinkless, a reinforcement learning framework designed to train a hybrid reasoning Train the model to respond to control tokens, think> and short>, which are generated as the first token in the model’s output to signal the intended inference style. Warm-up the model. Observe the model’s response g model, each conditioned on a specific control token (think> or short>). Additionally, the model is trained on paired long- form and short- form responses for each query. Reinforcement Learning with De a framework for hybrid reasoning. Understand the problem. Understand the solution. Identify the problem. Identify the solution. Identify the solution. Identify the solution. Identify the solution. Identify the solution. Identify the solution. Identify the solution DeGRPO is a method for hybrid reasoning that explicitly separates the hybrid reasoning objective into two components: Mode Selection, which governs how quickly the policy adapts based on the model’s current accuracy; and Accuracy Improvem, which governs how Adapt the mode selection policy to the response tokens. Balance the learning signals for the control and response tokens. Adapt the mode selection policy to the response tokens. Adapt the mode selection policy to the response tokens. Adapt the mode DeGRPO uses a multi-layer reinforcement learning model. DeGRPO uses a multi-layer reinforcement learning model. DeGRPO uses a multi-layer reinforcement learning model. DeGRPO uses a multi-layer reinforcement learning model. De Decoupled GRPO method for reasoning inference cost reduction. Adaptive reasoning inference cost reduction. Adaptive reasoning inference cost reduction. Adaptive reasoning inference cost reduction. Adaptive reasoning inference cost reduction. g Models. g Models generate intermediate steps in a chain-of-thought process before producing a final answer. To mitigate this, recent research has explored strategies to enhance the efficiency of reasoning models without sacrificing accuracy eliciting a concise reasoning path. Using a variety of techniques, such as reinforcement learning with length penalties [1, 24], supervised fine-tuning using variable-length chain of thought data , and prompt- Improve the efficiency of the model. Optimize the decoding strategy. Optimize the hybrid reasoning strategy. Optimize the decoding strategy. Optimize the hybrid reasoning strategy. Optimize the hybrid reasoning strategy. Optimize the hybrid reasoning strategy. Optimize Hybrid reasoning is a form of reasoning that combines short-chain reasoning with long-chain reasoning. Hybrid reasoning is a form of reasoning that combines short-chain reasoning with long-chain reasoning. Hybrid reasoning is A learning-based approach to unified models that can support both primary and secondary reasoning.  2016 Elsevier B.V. All rights reserved. This work is part of the Elsevier B.V. Open Access a model of reasoning that is based on inputs. a model of reasoning that is based on inputs. a model of reasoning that is based on inputs. a model of reasoning that is based on input Improve model size and decoding strategies. Improve model complexity. Improve model speed. Improve model quality. Improve model performance. Improve model quality. Improve model speed. Improve model quality. Improve model speed. Improve model quality. Improve model speed. Improve model Embrace hybrid reasoning. Use routing mechanisms to ensure that models are not redirected. Use unified models to ensure that models are not redirected. Use routing mechanisms to ensure that models are not redirected. Use routing mechanisms to ensure that LLMs can be trained to support both reasoning modes and can switch between them via prompt-based control. However, most existing approaches depend on manually crafted heuristics to balance efficiency and performance. In this work, we explore a learning-based Hinkless is a framework for generating short- and long-form responses. It is implemented in two stages: (1) Distillation for Warm-up, where we fine- tune a pre-trained reasoning model to unify two reasoning styles, and (2) think> short> think> short> short> think> short> short> think> short> short> think>  Prepare for reinforcement learning with Decoupled GRPO. Prepare for distillation. Prepare for reinforcement learning with Decoupled GRPO. Prepare for reinforcement learning with a hybrid model. Prepare for reinforcement learning with a hybrid model.                                  Ddistill is a model that distills a control token into a multi-style response distribution conditioned on the control token. 3.2 Learning When to Think via Decoupled GRPO is a model that distills a control token A reinforcement-learning approach to mode selection for a given input x. We propose a policy (c, a | x) = (c | x) (a | x, c), where the first token c C =                                   Ddistill is a model that distills the model’s response into two types of responses. 3.2 Learning When to Think via Decoupled GRPO. The model can produce both long- and short-form answers. e is a feature of the model that can be learned to select the mode that best suits the input x. To achieve this, we propose a policy (c, a | x) = (c | x) (a | x, c) where a ai,0 C is the control token. Reward Design. Let ydenote the ground-truth answer corresponding to the input x. We consider a minimally designed reward function r(a, y, c) =   JGRPO() = Ex,aiG  x  i = 1  Ti+1 TiX  t = 0 Li,t()  DKL ( | x) ref( | x) # , where , 1 +  Ai,t (2) We compute the relative advantage using Ai,t = r mean(r) using . This 1.0 , if c = think> and Extract-Answer(a) = d. The objective is defined as: JGRPO() = Ex,ai " 1 G G X i=1 1 Ti+1 Ti X t=0 Li,t()  DKL ( | x) ref( | x) #, where Li,t( . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . Identify the control token ai,0. Identify the mode-accuracy imbalance. Identify the imbalance between mode-accuracy and mode-inference. Identify the imbalance between mode-inference and mode-respons . . . . . . . . . . . . . . . . . . . . . . . . . . . . .                                  nt  is introduced to balance the optimization between mode selection and response generation. Set  = 1/ for stable training. nt  is introduced to balance the optimization between mode selection and response generation. Set  = 1/ for stable training. GRPO-based model for mode selection and response generation. Using token-level advantages to balance the contributions of mode selection and response generation. Using token-level advantages to balance the contributions of mode selection and response generation. Using token-level advantages 4.1 Experimental Setups LLMs and Datasets. DeGRPO () = Ex,ai " 1 G G X i=1  Li,0() | z  Control Token + 1 Ti Ti X t=1 Li,  = 1 for mode selection and think-short imbalance. Set  = 1 for mode update. Set  = 1 for mode selection and think-short imbalance. Set  = 1 for mode update. Set  = 1 for mode update. GRPO is a general-purpose framework for learning to learn to answer questions. It is based on the GRPO framework and uses the same model as the GRPO framework. The model is based on the GRPO model. ental Setups LLMs and Datasets. We utilize DeepSeek-R1-Distill-Qwen-1.5B as the base model to train a hybrid reasoning policy. To construct long- . . . . . . . . . . . . . . . . . . . . . . . . . . Using the AIME Minerva Algebra and the GSM-8K models, we trained the models on the following datasets: - - - - - - - - - - - - - - - - - - . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . RL experiments were performed using the VeRL framework. The following experiments were performed: CoT-Valve  = 4  = 4  = 4  = 4  = 4  = 4  = 4  = 4  = 4  = 4  = 4  = 4  = 4                                4.2 Empirical Results on Hybrid Reasoning Finding 1. The hybrid reasoning model learned by the method is able to distinguish complex from simple queries, reducing the use of thinking by –. Table 1 presents a comparison between our method and several existing mods. ets or techniques. The first part showcases our baseline model, DeepSeek-R1-Distill-Qwen-1.5B, alongside two instruction-following models designed to generate concise answers. On challenging datasets Consider the performance of the various models. Consider the use of dels. Consider the use of model merging. Consider the use of model merging. Consider the use of model merging. Consider the use of model merging. Consider the use of model . . . . . . . . . . . . . . . . . . . . . . . . . .  . The method is compared to existing models to demonstrate the effectiveness of the method. The results show that the method is able to generate a large number of tokens, compared to existing models. The method is able to generate a large number . . . . . . . . . . . . . . . . . . . . . . . . . . Use supervised fine-tuning techniques. Adjust reasoning length. Adapt reasoning length to the model. Adapt reasoning length to the model. Adapt reasoning length to the model. Adapt reasoning length to the model. Adapt reasoning length to the We show that the proposed heuristics are effective on a range of datasets, but that they may not be optimal on all datasets. We also show that hybrid reasoning strategies can be used to improve the performance of a model. iii. iiii. iv. v. v. v. v. v. v. v. v. v. v. v. v. v Adaptive reasoning is a powerful method for reasoning about the complexity of the input. It is a novel approach to reasoning about the complexity of the input. It is a novel approach to reasoning about the complexity of the input. It is a novel RLs can be used to train a model to learn to recognize short and long responses. The RLs can be used to train a model to learn to recognize short and long responses. The RLs can be used to train a model to learn to recognize short and long responses. b) The proposed Decoupled GRPO, with a U-shape learning curve. AIME Math Minerva Algebra GSM8k 0.0 0.2 0.4 0.6 0.8 1.0 Fraction of queries P@1= P@1= P@1= P@1= d) Learned Policy of DeGRPO. d) Learning Dynamics in RL Finding 2. Policy may collapse due to imbalanced update of control tokens in Vanilla GRPO. Mode Collapse in RL. To further analyze how the model learns a reasonable policy te the Mode Collapse problem in standard GRPO. In conventional GRPO, the gradient on the control token is normalized by the total length of the response, which introduces an imbalance between long and short outputs. . Decouple the GRPO model from the short mode. . Decouple the GRPO model from the short mode. . Decouple the GRPO model from the short mode. . Decouple the GRPO model from the short mode. 4.3 Training Dynamics in RL Finding 2. Policy may collapse due to imbalanced update of control tokens in Vanilla GRPO. Mode Collapse in RL. To further analyze how the model learns a reasonable policy, we visualize the training process of RL. In conventional GRPO b, the model collapses quickly, as the number of generated long-chain responses drops below 10 within just update steps. c, the model collapses quickly, as the number of generated short-chain responses drops below 10 within just update steps. Identify the problem of GRPO collapse. Decouple the GRPO algorithm from the model. Identify the problem of GRPO collapse. Decouple the GRPO algorithm from the model. Identify the problem of GRPO collapse. RL model shows preference for long-chain reasoning in the early stages of training. As training progresses, we observe an improvement in the accuracy of short-chain responses. RL model shows preference for long-chain reasoning in the early stages of training. Increase the number of short-chain responses. Increase the number of short-chain responses. Increase the number of short-chain responses. Increase the number of short-chain responses. Increase the number of short-chain responses. Increase the number of short-chain responses ly, we observe a decline 7 Model Mode & Teacher AIME Minerva Algebra Math- GSM8K Pass@1 #Tokens Pass@1 #Tokens Pass@1 #Tokens Pass@1 #Tokens Pass@1 # Use the following SFT datasets: think> 0. short> 0. short> 0. short> 0. short> 0. short> 0. short> 0. short> 0. short . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . Long (R1-671B) 0 Long (QMath-1.5B) 0 Short (QMath-1.5B) 0 Long (R1-671B) 0 Short ( Train the model on a variety of tasks. Observe a decrease in the accuracy of long-chain responses during the latter half of training. Find the weight of the control token. 0 Training Step 0 5 10 15 20 25 Visualize the effect of decoupling on model behavior.                         Observe the effect of the weight on the control token on the short-response accuracy. Observe the effect of the weight on the control token on the short-response accuracy. Observe the effect of the weight on the control token Using knowledge distillation, we find that LLMs can be a good short response learner. In this work, knowledge distillation is deployed for warm-up, serv- ing, and re- sults. . We present a detailed implementation of the model, including a comparison of three datasets. We present a detailed analysis of the model's architecture, including a comparison of the model's architecture to the DeepSeek- a large collection of datasets; and (3) OpenThoughts-1M, a large-scale and diverse collection that subsumes the former two. We observe that generating short responses trajectories. We compare the results under two settings: a Use a fast policy update to improve the model’s performance. Avoid using too many policy updates. Avoid using too many policy updates. Avoid using too many policy updates. Avoid using too many policy updates. Avoid using too many policy updates. n initial accuracy rather than a collaborative learning of mode selection and accuracy improvement. 4.4 Details of Warm-up Distillation Finding 5. Reasoning LLMs can be a good short response learner. In this work, knowledge distillation is deployed for warm- a multi-domain dataset labeled by DeepSeek-R1-67B. We find that generating short responses using a long-chain approach is more effective than generating short responses using a short-chain approach. Obtain a distillation model. Obtain a distillation model. Obtain a distillation model. Obtain a distillation model. Obtain a distillation model. Obtain a distillation model. Obtain a distillation Find the probability of think> on MA. Find the projection of mathbfa$ onto mathbfb. Find the area of the graph of mathbfb. Select the LLM response. Analyze the results. Understand the limitations of LLM. Analyze the results of the LLM. Understand the limitations of LLM. Analyze the results of the LLM. Understand the limitations of LLM. lts in only a improvement in long-chain accuracy on the Math- benchmark. This work provides a preliminary validation of the effectiveness of simple distillation, and we leave the construction of stronger initial hybrid models as an important direction of future research. ereys of different difficulty. In addition, we highlight representative examples corresponding to high, medium, and low confidence levels to illustrate the model’s decision behavior. We observe that samples assigned to the short reasoning mode are typically simple arithm ibrated policy that adapts reasoning depth based on task complexity. 5 Limitations and Future Works This work presents an effective reinforcement learning framework that enables a hybrid model to adapt its inference mode based on both problem complexity and its own s in a slight performance drop in the initial model for reinforcement learning. Exploring better strategies for constructing the hybrid model, such as merging techniques or lightweight fine- tuning methods like LoRA to mitigate catastrophic forgetting. s in a slight omains to enable more general and practical hybrid reasoning capabilities. 6 Conclusion This paper proposes a reinforcement learning framework for building a hybrid reasoning model. It autonomously decides whether to generate a short response or engage in long-form reasoning based on the . . The model is well-calibrated and can be used to model a variety of questions of different difficulty. The model can be used to model the decision behavior of a given question. The model can be used to model the decision behavior a hybrid model that adapts its inference mode based on problem complexity. 5 Limitations and Future Works This work presents an effective reinforcement learning framework that enables a hybrid model to adapt its inference mode based on both problem complexity and its own ht performance drop in the initial model for reinforcement learning. Exploring better strategies for constructing the hybrid model, such as merging techniques or lightweight fine- tuning methods like LoRA to mitigate catastrophic forgetting. a broader range of datasets . nable more general and practical hybrid reasoning capabilities. 6 Conclusion This paper proposes a reinforcement learning framework for building a hybrid reasoning model. It autonomously decides whether to generate a short response or engage in long-form reasoning based on Decouple the long-form reasoning model from the short-form reasoning model. Decouple the short-form reasoning model from the long-form reasoning model. Decouple the short-form reasoning model from the long-form reasoning model hought: Efficient llm reasoning with adaptive cognitive-inspired sketching. arXiv preprint arXiv:., . Akhiad Bercovich, Itay Levy, The following are the names of the finalists for the 2019 World Championship of Athletics in Athletics: d, Adi Renduchintala, Haifeng Qian, Dima Rekesh, Fei Jia The winners of the ICC World Youth Championships in Dubai will be announced on Thursday. The winners will be announced on Friday. The ICC World Youth Championships in Dubai will be held from 10 to 14 November. The ICC World Youth Championship The winners of the prestigious IFBB World Women's Championships in Dubai have been announced. The winners of the IFBB World Women's Championships in Dubai have been announced. The winners of the IFBB World Women The following players have been selected for the 2018 Indian Super League: - Ying Lin, Sanjeev Satheesh, Jupinder Parmar, Pritam Gundecha, Brandon Norick, Joseph Je The list of the finalists of the FIVB Women's World Championships in Athletics in Brazil includes:              ya, Joey Conway, Trisha Saar, Ann Guan, Krzysztof Pawelec, Shyamala Prayaga, Oleksii Kuchaiev, Boris Ginsburg, Ol Adaptive Thinking for Reasoning in Large Language Models. Xingyu Chen, Jiahao Xu, Tian Liang, Zhiwei He, Jianhui Pang, Dian Yu, Linfeng Song, Qi Research on learning to reason. w. - - - - - - - - - - - - - - - - - - - - -  iii. Search for llms via reinforcement learning. arXiv preprint arXiv:., . Xiaoshu Chen, Sihang Zhou, Ke Liang, and Xinwang Liu. Read the papers. Read the reviews. Read the reviews. Read the reviews. Read the reviews. Read the reviews. Read the reviews. Read the reviews. Read the reviews. Read the reviews. Read the reviews. Read the reviews. Read the reviews. Read Open r1: A fully open reproduction of deepseek-r1, January. Open r1: A fully open reproduction of deepseek-r1, January. Open r1: A fully open reproduction of deepseek-r1, January. Token-budget- Karl Cobbe, Vineet Kosaraju, Mohammad Bavarian, Mark Chen, Heewoo Jun, Lukasz Kaiser, Matthias Plappert, Jerry Tworek, Jacob Research on efficient reasoning models. ArXiv preprint arXiv:., . Deepseek-r1: Incentivizing reasoning capability in llms via reinforcement learning. arXiv preprint arXiv:., . . . . . . . . . . . . . . . . . . . . . . . .  . . . . . . . . . . . . . . . . . . . . . . . . . . .  Research the effects of distillation on reasoning in small languages. Investigate the effects of distillation on reasoning in large models. Investigate the effects of distillation on reasoning in large models. Identify the effects of distillation on reasoning in large models. Research on r1-zero-like training. Research on r1-zero-like training. Research on r1-zero-like training. Research on r1-zero-like training. Research on r1-zer . Xinyin Ma, Guangnian Wan, Runpeng Yu, Gongfan Fang, and Xinchao Wang. Cot-valve: Surpassing o1-preview with a 1.5 b model by gth- compressible chain-of-thought tuning. arXiv preprint arXiv:., . Lucie Charlotte Magister, Jonathan Mallinson, Jakub Adamek, Eric Malmi, and . Xinyin Ma, Guangnian Wan, Runpeng Yu, Gongfan Fang, and Xinchao Wang. Deepscaler: Surpassing o1-preview with a 1.5 b model by ght tuning. arXiv preprint arXiv:., . Lucie Charlotte Magister, Jonathan Mallinson, Jakub Adamek, Eric Malmi, and Aliaksei Severyn. ent thoughts: On the power of looped transformers. . Zhihong Shao, Peiyi Wang, Qihao Zhu, Runxin Xu, Junxiao Song, Xiao Bi, Haowe . . . . . . . . . . . . . . . . . . . . . . . . . . Read the following papers: . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . Research on reasoning in large language models. Research on reasoning in large language models. Research on reasoning in large language models. Research on reasoning in large language models. Research on reasoning in large language models. Research on reasoning in large language models. Research on reasoning in large language models. Research ms with tools for the deep research. arXiv preprint arXiv:., . 11 Silei Xu, Wenhao Xie, Lingxiao Zhao, and Pengcheng He. Chain of draft: Thinking faster by writing Research on simplerl-zoo: Investigating and taming zero reinforcement learning for open base models in the wild. ArXiv preprint arXiv:., Xunyu Zhu

In [37]:
def compute_embeddings(texts):
    """Compute embeddings for a list of texts using the SentenceTransformer model."""
    return embedding_model.encode(texts, convert_to_tensor=True)

def retrieve_similar_chunks(chunks, embeddings, query, top_k=5):
    """Retrieve top_k most relevant chunks to the query using cosine similarity."""
    query_embedding = embedding_model.encode(query, convert_to_tensor=True)
    similarities = np.inner(query_embedding, embeddings)  # Cosine similarity
    top_k_indices = np.argsort(similarities)[::-1][:top_k]
    return [chunks[i] for i in top_k_indices]

def classical_rag(pdf_path, user_query, top_k=5):
    """Performs a simple RAG-like summarization using embeddings + summarizer."""
    # Step 1: Extract and clean PDF content
    pdf_chunks, _ = extract_and_chunk_pdf(pdf_path)
    cleaned_chunks = [process_scientific_text(chunk) for chunk in pdf_chunks]

    # Step 2: Embed chunks
    embeddings = compute_embeddings(cleaned_chunks)

    # Step 3: Retrieve most relevant chunks to the query
    relevant_chunks = retrieve_similar_chunks(cleaned_chunks, embeddings, user_query, top_k=top_k)

    # Step 4: Create prompt and summarize
    context = "\n".join(relevant_chunks)
    prompt = f"""
Summarize the following content in response to the question:
"{user_query}"

Context:
{context}
"""
    summary = summarizer(prompt, max_length=200, min_length=60, do_sample=False)
    return summary[0]['summary_text']


In [38]:
rag_summary = classical_rag("my_pdf.pdf", "What are the main contributions of the paper?")

  similarities = np.inner(query_embedding, embeddings)  # Cosine similarity


In [39]:
display(Markdown(rag_summary))

The paper presents a survey of the state-of-the-art in the field of machine learning and reasoning. It presents a survey of the state-of-the-art in the field of machine learning and reasoning. It presents a survey of the state-of-the-art in the field of machine learning and reasoning.

In [10]:
# def rag_summary(query, k=5):
#     results = collection.query(query_texts=[query], n_results=k)
#     context = " ".join([doc for doc in results['documents'][0]])

#     summary = summarizer(
#         f"Summarize based on context: {context}",
#         max_length=200,
#         min_length=50,
#         do_sample=False
#     )
#     return summary[0]["summary_text"]


In [11]:
# # Initialize agentic model
# agent_model = HfApiModel(name="google/flan-t5-large")

# agentic_tools = [
#     Tool(name="lookup_vector", func=lambda q: " ".join(collection.query(query_texts=[q], n_results=5)['documents'][0]), description="Retrieve context")
# ]

# def agentic_rag_summary(query):
#     response = agent_model.run_task(
#         system_prompt="You are a scientific summarizer using external tools.",
#         task_prompt=f"Summarize this query: {query}",
#         tools=agentic_tools
#     )
#     return response




In [40]:
from transformers import pipeline

def agentic_rag_summary(query):
    # Step 1: Retrieve relevant chunks from ChromaDB
    retrieved = collection.query(query_texts=[query], n_results=5)
    context = " ".join(retrieved['documents'][0])

    # Step 2: Prompt model to summarize based on query + context
    prompt = f"Given the following context, answer the question.\nContext:\n{context}\n\nQuestion: {query}"

    result = summarizer(prompt, max_length=300, min_length=50, do_sample=False)[0]["summary_text"]
    return result


In [41]:
agentic = agentic_rag_summary("Summarize the scientific paper in the PDF.")

In [42]:
display(Markdown(agentic))

The authors report the results of a study on the training of a hybrid model for reinforcement learning. The model was trained on the Megatron framework. The model was trained on the Megatron framework. The model was trained on the Megatron framework.

In [43]:
from evaluate import load

# Load evaluation metrics
bleu = load("bleu")
rouge = load("rouge")

# Get summaries
baseline = summary
rag = rag_summary
agentic = agentic

# Use original abstract or gold summary as reference
_, reference = extract_and_chunk_pdf("my_pdf.pdf")
references = [reference]

# Format predictions
baseline_pred = [baseline]
rag_pred = [rag]
agentic_pred = [agentic]

# Evaluate
print("== BLEU Scores ==")
print("Baseline:", bleu.compute(predictions=baseline_pred, references=references))
print("RAG:", bleu.compute(predictions=rag_pred, references=references))
print("Agentic RAG:", bleu.compute(predictions=agentic_pred, references=references))

print("\n== ROUGE Scores ==")
print("Baseline:", rouge.compute(predictions=baseline_pred, references=references))
print("RAG:", rouge.compute(predictions=rag_pred, references=references))
print("Agentic RAG:", rouge.compute(predictions=agentic_pred, references=references))

== BLEU Scores ==
Baseline: {'bleu': 0.028994708470058906, 'precisions': [0.08644032493230577, 0.03395833333333333, 0.01833715357366118, 0.013130471029595666], 'brevity_penalty': 1.0, 'length_ratio': 7.896381578947368, 'translation_length': 4801, 'reference_length': 608}
RAG: {'bleu': 0.0, 'precisions': [0.6122448979591837, 0.08333333333333333, 0.0, 0.0], 'brevity_penalty': 1.11044651440178e-05, 'length_ratio': 0.0805921052631579, 'translation_length': 49, 'reference_length': 608}
Agentic RAG: {'bleu': 0.0, 'precisions': [0.5869565217391305, 0.044444444444444446, 0.0, 0.0], 'brevity_penalty': 4.943725781036531e-06, 'length_ratio': 0.0756578947368421, 'translation_length': 46, 'reference_length': 608}

== ROUGE Scores ==
Baseline: {'rouge1': np.float64(0.16701853155055127), 'rouge2': np.float64(0.0699366345928186), 'rougeL': np.float64(0.08069434670419892), 'rougeLsum': np.float64(0.1482524044100399)}
RAG: {'rouge1': np.float64(0.12099644128113879), 'rouge2': np.float64(0.00714285714285