# Imports

In [None]:
%%capture --no-display
!pip install PyPDF2
!pip install python-docx
!pip install langchain-community sentence-transformers pinecone langchain_huggingface faiss-cpu groq scrapegraph_py dotenv

In [None]:
import pandas as pd
from dotenv import load_dotenv
import os
from groq import Groq
import re
import numpy as np
from sentence_transformers import SentenceTransformer
import faiss
from PyPDF2 import PdfReader
from docx import Document
import textwrap
from transformers import AutoModelForCausalLM, AutoTokenizer
from langchain_text_splitters import RecursiveCharacterTextSplitter
from transformers import pipeline
from langchain_community.docstore.in_memory import InMemoryDocstore
from langchain_community.vectorstores import FAISS
from langchain_huggingface import HuggingFaceEmbeddings
from scrapegraph_py import Client
from scrapegraph_py.logger import sgai_logger

In [None]:
load_dotenv("/content/Agents.env")

True

# LLM

In [None]:
class ChatAssistant:
    def __init__(self, system_prompt="You are a helpful assistant", model="llama-3.3-70b-versatile"):
        self.client = Groq()
        self.model = model
        self.base_prompt = f"{system_prompt}"

    def invoke(self, user_input: str) -> str:
        response = self.client.chat.completions.create(
            model=self.model,
            messages=[{"role": "user", "content": user_input}],
            temperature=0,
            max_completion_tokens=1024,
        )
        return response.choices[0].message.content


# Retreival

In [None]:
class Retreival:
  def __init__(self):
        embedder = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
        index = faiss.IndexFlatL2(len(embedder.embed_query("hello world")))
        self.vector_store = FAISS(
            embedding_function=embedder,
            index=index,
            docstore=InMemoryDocstore(),
            index_to_docstore_id={},
        )

  def dict_to_string(self, data, indent=0):
      """Recursively convert a dictionary to a plain string."""
      result = ""
      if isinstance(data, dict):
          for key, value in data.items():
              result += " " * indent + f"{key}:\n"
              result += self.dict_to_string(value, indent + 4)
      elif isinstance(data, list):
          for item in data:
              result += self.dict_to_string(item, indent)
      else:
          result += " " * indent + f"{data}\n"
      return result

  def scrape_url(self, website_url):

      sgai_logger.set_logging(level="INFO")

      # Initialize the client
      sgai_client = Client(api_key=os.getenv('SCRAPE_API_KEY'))
      # SmartScraper request
      response = sgai_client.smartscraper(
          website_url=website_url,
          user_prompt="Extract webpage Content only forget about headlines just go to the content"
      )

      # Print the response
      result = self.dict_to_string(response['result'])

      sgai_client.close()

      return result

  def extract_text(self, file_path=None, url = None):

    if url!=None:
      return self.scrape_url(url)
    else:
      ext = os.path.splitext(file_path)[1].lower()
      if ext == '.txt':
          with open(file_path, 'r', encoding='utf-8') as file:
              return file.read()
      elif ext == '.pdf':
          reader = PdfReader(file_path)
          return ' '.join(page.extract_text() for page in reader.pages)
      elif ext == '.docx':
          doc = Document(file_path)
          return '\n'.join([para.text for para in doc.paragraphs])
      elif ext == '.csv':
          df = pd.read_csv(file_path)
          return df.to_string()
      else:
          raise ValueError(f"Unsupported file format: {ext}")


  def chunking (self, text, chunk_size=100, chunk_overlap = 20):

    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size=chunk_size,
        chunk_overlap=chunk_overlap,)


    texts = text_splitter.split_text(text)
    return texts

  def create_db(self, text):
    self.vector_store.add_texts(text)

  def retrieve_relevant_chunks(self, query, k=3):

    results = self.vector_store.similarity_search_with_relevance_scores(query, k=k)
    scores = [round(result[1],3) for result in results]
    results = [result[0].page_content for result in results]

    return results, scores

# Generation

In [None]:
class Generation:
    def __init__(self):

        self.generator = ChatAssistant()

    def format_input(self, query, retrieved_context):

        return f"Given this Context: {retrieved_context}\n You have to Answer this Query: {query}\nResponse:"

    def generate_response(self, query, retrieved_context):

        formatted_input = self.format_input(query, retrieved_context)
        response = self.generator.invoke(formatted_input)

        return response


# RAG

In [None]:
class RAG:
  def __init__(self, file_path=None, url=None):
    self.r1 = Retreival()
    self.g1 = Generation()
    self.text = self.r1.extract_text(file_path=file_path, url=url)
    self.chunks = self.r1.chunking(self.text, chunk_size=100, chunk_overlap = 20)
    self.r1.create_db(self.chunks)

  def generate(self, query, k):

    self.top_chunks = self.r1.retrieve_relevant_chunks(query, k=k)
    response = self.g1.generate_response(query, ''.join(self.top_chunks[0]))

    return response

  def print(self):
    print(self.text)
    print("*"*100)
    print(self.top_chunks)

# Test

In [None]:
rag_faiss = RAG(file_path = '/content/test.txt')
faiss_response = rag_faiss.generate("what is philosophy?", k = 3)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/10.5k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

In [None]:
print(faiss_response)

Philosophy is a systematic study of general and fundamental questions concerning various topics, characterized by critical questioning and the development of principles, theories, and methods. It is an interdisciplinary field that is related to many other areas of study, including the sciences, and encompasses various branches and schools of thought that promote different approaches and perspectives.


In [None]:
rag_faiss.print()

Philosophy is a systematic study of general and fundamental questions concerning topics like existence, reason, knowledge, value, mind, and language. It is a rational and critical inquiry that reflects on its methods and assumptions.

Historically, many of the individual sciences, such as physics and psychology, formed part of philosophy. However, they are considered separate academic disciplines in the modern sense of the term. Influential traditions in the history of philosophy include Western, Arabic–Persian, Indian, and Chinese philosophy. Western philosophy originated in Ancient Greece and covers a wide area of philosophical subfields. A central topic in Arabic–Persian philosophy is the relation between reason and revelation. Indian philosophy combines the spiritual problem of how to reach enlightenment with the exploration of the nature of reality and the ways of arriving at knowledge. Chinese philosophy focuses principally on practical issues about right social conduct, governme

In [None]:
rag_faiss = RAG(url = 'https://en.wikipedia.org/wiki/Philosophy')
faiss_response = rag_faiss.generate("what is philosophy?", k = 3)

💬 2025-03-22 20:32:33,168 🔑 Initializing Client
INFO:scrapegraph:🔑 Initializing Client
💬 2025-03-22 20:32:33,170 ✅ Client initialized successfully
INFO:scrapegraph:✅ Client initialized successfully
💬 2025-03-22 20:32:33,172 🔍 Starting smartscraper request
INFO:scrapegraph:🔍 Starting smartscraper request
💬 2025-03-22 20:32:33,174 🚀 Making POST request to https://api.scrapegraphai.com/v1/smartscraper
INFO:scrapegraph:🚀 Making POST request to https://api.scrapegraphai.com/v1/smartscraper
💬 2025-03-22 20:41:34,325 ✅ Request completed successfully: POST https://api.scrapegraphai.com/v1/smartscraper
INFO:scrapegraph:✅ Request completed successfully: POST https://api.scrapegraphai.com/v1/smartscraper
💬 2025-03-22 20:41:34,330 ✨ Smartscraper request completed successfully
INFO:scrapegraph:✨ Smartscraper request completed successfully
💬 2025-03-22 20:41:34,333 🔒 Closing Client session
INFO:scrapegraph:🔒 Closing Client session


In [None]:
print(faiss_response)

Philosophy is the study of the nature and scope of knowledge, reality, and existence. It involves the examination of fundamental questions about the universe, human experience, and the nature of reality, including the methods and goals of intellectual inquiry. Philosophy encompasses various branches, such as metaphysics, epistemology, ethics, logic, and aesthetics, and seeks to provide a deeper understanding of the world and our place within it.


In [None]:
rag_faiss.print()

branches:
    name:
        Applied Philosophy
    description:
        The application of philosophical principles to real-world problems and issues.
    name:
        Logic
    description:
        The study of reasoning and argumentation, including the principles of inference and the structure of arguments.
    name:
        Metaphilosophy
    description:
        The study of the nature and scope of philosophy, including the methods and goals of philosophical inquiry.
    name:
        Philosophy of Education
    description:
        The study of the nature and goals of education, including the role of philosophy in educational theory and practice.
    name:
        Philosophy of Information
    description:
        The study of the nature and role of information in society, including the ethics of information and communication.
    name:
        Philosophy of Language
    description:
        The study of the nature and role of language in society, including the philosophy of mean