In [None]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

In [None]:
!pip install langchain_huggingface
!pip install langchain_community
!pip install gputil
!pip install PyMuPDF
!pip install sentence-transformers
!pip install ctransformers
!pip install faiss-cpu
!pip install numpy==1.26.4
!pip install langchain_milvus
!pip install pymilvus

In [None]:
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS
from langchain_community.document_loaders import PyPDFLoader, DirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_core.prompts import PromptTemplate
from langchain_community.llms import LlamaCpp
from langchain_community.chat_models import ChatLlamaCpp
from langchain_core.callbacks import CallbackManager, StreamingStdOutCallbackHandler
import os
import json
import warnings
import logging
import time
import psutil
import GPUtil

warnings.filterwarnings("ignore")

# Configure logging
logging.basicConfig(level=logging.INFO)  # Adjust level as needed
logger = logging.getLogger(__name__)


In [None]:
%pip install arxiv  llama-cpp-python

In [None]:
from arxiv import Search



# Initialize Llama CPP
llm = LlamaCpp(
            model_path="/kaggle/input/meta-llama-3-8b-gguf/llama3-8b-instruct-Q5_K_M.gguf",
            n_ctx=2048,
            n_gpu_layers=-1,
            verbose=True,
            seed=1000,
            temperature=0,
        )


def fetch_arxiv_data(query):
    # Fetching papers from arxiv.org
    search = Search(
        query=query,
        max_results=5, 
        
    )
    papers = list(search.results())
    sorted_papers = sorted(papers, key=lambda paper: paper.published, reverse=True)  # Sort by publication date

    return sorted_papers

    

def generate_summary(query):
    # Fetch papers
    papers = fetch_arxiv_data(query)

    # Process each paper
    summaries = []
    for paper in papers:
        title = paper.title
        abstract = paper.summary


        # Generate detailed content using Llama CPP
        prompt_template = PromptTemplate(template="Provide a detailed overview based on the title: {title}", input_variables=['title'])
        prompt_det = prompt_template.format(title=title)
        detailed_content = llm.invoke(prompt_det)


        summary_prompt = PromptTemplate(template="Summarize the following text: {title}. {abstract}. {detailed_content}", input_variables=['title', 'abstract', 'detailed_content'])
        prompt_summary =  summary_prompt.format(title = title, abstract = abstract, detailed_content = detailed_content)
        prompt_sum = prompt_summary

        # Generate summary using Llama CPP
        summary = llm.invoke(prompt_sum)

        summaries.append({
            'title': title,
            'original_abstract': abstract,
            'generated_summary': summary
        })

    return summaries

# Example usage:
query = "Naruto Uzumaki"
summaries = generate_summary(query)

for summary in summaries:
    print(f"Title: {summary['title']}")
    print(f"Original Abstract: {summary['original_abstract']}")
    print(f"Generated Summary: {summary['generated_summary']}")
    print("\n")