## Import Required Libraries


In [28]:
from transformers import T5ForConditionalGeneration, T5Tokenizer
import requests
import xml.etree.ElementTree as ET
from pdfminer.high_level import extract_text
import os
from peft import PeftModel, PeftConfig
from transformers import (
    AutoModelForSeq2SeqLM,
    AutoTokenizer
)

## Set Search Query

In [29]:
query = "finance"

## Fetching Papers
Function to Fetch ArXiv Papers

In [30]:
def fetch_arxiv_papers(query):
    """"
    Fetches the arXiv papers for the given query

    Args:
        query (str): The query to search arXiv for papers
        
    Returns:
        pdf_links (list): List of pdf links for the papers
    """

    url = f"http://export.arxiv.org/api/query?search_query={query}&start=0&max_results=1"

    response = requests.get(url)
    if response.status_code != 200:
        print("Failed to fetch data from arXiv")
        return []

    root = ET.fromstring(response.text)
    pdf_links = []
    for entry in root.findall("{http://www.w3.org/2005/Atom}entry"):
        for link in entry.findall("{http://www.w3.org/2005/Atom}link"):
            if link.attrib.get("title") == "pdf":
                pdf_links.append(link.attrib["href"])

    return pdf_links

Fetch PDF Links

In [31]:
pdf_links = fetch_arxiv_papers(query)
if pdf_links:
    print(f"Found PDF link: {pdf_links[0]}")
else:
    print("No PDF links found.")

Found PDF link: http://arxiv.org/pdf/1710.03211v2


Download PDF File


In [32]:
pdf_file = "1.pdf"

response = requests.get(pdf_links[0])

if response.status_code == 200:
    with open(pdf_file, "wb") as f:
        f.write(response.content)
    print(f"PDF saved to {pdf_file}")
else:
    print("Failed to fetch PDF")

PDF saved to 1.pdf


Extract Text from PDF

In [33]:
pdf_path = "1.pdf"
text = extract_text(pdf_path)

## Model 1: T5-small
Loading the fine-tuned T5-small model for summarization

In [34]:
model1 = T5ForConditionalGeneration.from_pretrained("../practice/t5-small-finetuned-arxiv")
model_checkpoint1 = "t5-small"
tokenizer = AutoTokenizer.from_pretrained(model_checkpoint1)

Summarization Function for T5-small

In [35]:
def test_summary_t5_small(input_text):
    """
    Generates a summary for the given input text using the T5 small model

    Args:
        input_text (str): The input text to summarize

    Returns:
        str: The generated summary
    """
    inputs = tokenizer(
        "summarize: " + input_text,
        max_length=512,
        truncation=True,
        return_tensors="pt"
    )
    
    outputs = model1.generate(
        inputs.input_ids,
        max_new_tokens=512,
        do_sample=True,
        top_p=0.9,

    )
    
    return tokenizer.decode(outputs[0], skip_special_tokens=True)

## Model 2: FLAN-T5-base
Load the PEFT-tuned FLAN-T5-base model

In [36]:
peft_model_id = "../practice/results" 
config = PeftConfig.from_pretrained(peft_model_id)

model2 = AutoModelForSeq2SeqLM.from_pretrained(
    config.base_model_name_or_path,
    device_map="auto" 
)
tokenizer = AutoTokenizer.from_pretrained(config.base_model_name_or_path)

model2 = PeftModel.from_pretrained(model2, peft_model_id)

Define Summarization Function for FLAN-T5-base

In [37]:
def test_summary_FLAN_T5_base(input_text):
    """
    Generates a summary for the given input text using the FLAN T5 base model

    Args:
        input_text (str): The input text to summarize

    Returns:
        str: The generated summary
    """
    inputs = tokenizer(
        "summarize: " + input_text,
        max_length=1024,
        truncation=True,
        return_tensors="pt"
    )

    outputs = model2.generate(
        input_ids=inputs.input_ids, 
        max_new_tokens=512,
        do_sample=True,
        top_p=0.9
    )

    return tokenizer.decode(outputs[0], skip_special_tokens=True)

Generate Summaries Using Both Models

In [38]:
summary_t5_small = test_summary_t5_small(text)

In [39]:
summary_FLAN_T5_base = test_summary_FLAN_T5_base(text)

Print Generated Summaries

In [40]:
print(f"Summary from T5-small: {summary_t5_small}")

Summary from T5-small: behavioral finance has three main objections to the theory of rational finance . the rational finance does not explain the predictability of asset returns . the theories are considered as anomalies and they are available to determine the difference between rationalistic and behavioralists . the problems relating to the theory of rational finance will be addressed in a multigenerational economy .


In [41]:
print(f"Summary from FLAN-T5-base: {summary_FLAN_T5_base}")

Summary from FLAN-T5-base: **Main Research Objective:** To address the three main objections of behavioral finance to the theory of rational finance: (i) Predictability of asset returns; (ii) The Equity Premium; and (iii) The Volatility Puzzle. **Methodology:** * Developed a systematic approach to address all three objections: * Provided an explanation for the "anomalies" in rational finance utilizing statistical models. * Introduced statistical tools in the rational theory of finance to provide empirical evidence of non-rational chooses. * Provided a statistical framework to analyze whether and how behavioral finance can be applied in such situations. **Key Findings:** * Behavioral finance is a structural model of financial finance that is widely used and often criticized. * The behavioral finance model is based on the behavioral finance principles of the financial world. * Behavioral finance is the principal tool for generating financial data, and is commonly used as a method to help

Cleanup: Remove Downloaded PDF

In [42]:
os.remove(pdf_file)