Install Libraries

In [None]:
# !pip install langchain langchain-core langchain-community langgraph pydantic PyYAML pdfplumber faiss-cpu sentence-transformers streamlit pandas

Setup Imports & Configuration

In [None]:
# !pip install langchain-groq

In [3]:
import pdfplumber
import faiss
import numpy as np
from sentence_transformers import SentenceTransformer
from langchain.text_splitter import CharacterTextSplitter
from langchain.prompts.chat import ChatPromptTemplate
from langchain_groq import ChatGroq
from google.colab import files
import yaml
import os

# Initialize the FAISS vector store
embedding_model = SentenceTransformer("all-MiniLM-L6-v2")

# File paths
index_file = "index.faiss"
chunks_file = "chunks.txt"

# Cryptocurrency-related keywords (need to add more terms if possible)
CRYPTO_KEYWORDS = {"crypto", "cryptocurrency", "bitcoin", "ethereum", "blockchain", "web3",
                   "decentralized", "mining", "token", "NFT", "stablecoin", "defi", "ledger"}

# Load the system prompt from advisor_prompts.yml file
def load_system_prompt():
    with open("advisor_prompts.yml", "r") as file:
        config = yaml.safe_load(file)
    return config["system_prompts"]["advisor_llm"]["description"]

# Fetch the advisor-specific system prompt
ADVISOR_SYS_PROMPT = load_system_prompt()
print("Advisor System Prompt Loaded Successfully!")

# My Groq API Key
os.environ["GROQ_API_KEY"] = "API_KEY"

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


Advisor System Prompt Loaded Successfully!


PDF Extraction & Processing

In [4]:
def extract_and_store_text(pdf_path):
    """Extract and store PDF text if it matches the cryptocurrency topic."""
    text = ""
    with pdfplumber.open(pdf_path) as pdf:
        for page in pdf.pages:
            page_text = page.extract_text()
            if page_text:
                text += page_text + "\n"

    # Check for cryptocurrency-related keywords
    if not any(keyword in text.lower() for keyword in CRYPTO_KEYWORDS):
        raise ValueError("PDF content does not match the required topic: Cryptocurrency")

    # Split text into chunks
    text_splitter = CharacterTextSplitter(chunk_size=500, chunk_overlap=50)
    chunks = text_splitter.split_text(text)

    # Generate embeddings
    embeddings = embedding_model.encode(chunks)
    embeddings = np.array(embeddings, dtype=np.float32)

    # Create FAISS index
    dimension = embeddings.shape[1]
    index = faiss.IndexFlatL2(dimension)
    index.add(embeddings)

    # Save the FAISS index
    faiss.write_index(index, index_file)

    # Save the text chunks
    with open(chunks_file, "w") as f:
        for chunk in chunks:
            f.write(chunk + "\n")

    print("Cryptocurrency-related PDF processed successfully.")

Upload & Process PDF

In [5]:
print("Please upload a PDF related to cryptocurrency.")
uploaded = files.upload()

# Process the uploaded PDF
pdf_path = list(uploaded.keys())[0]
try:
    extract_and_store_text(pdf_path)
except ValueError as ve:
    print(f"Error: {ve}")
except Exception as e:
    print(f"Unexpected error: {e}")

Please upload a PDF related to cryptocurrency.


Saving s40854-021-00321-6.pdf to s40854-021-00321-6.pdf
Cryptocurrency-related PDF processed successfully.


Retrieve Relevant Context

In [6]:
def retrieve_relevant_chunks(query, top_k=3):
    """Retrieve the top-k most relevant text chunks from FAISS."""
    if not os.path.exists(index_file):
        raise FileNotFoundError("FAISS index not found. Please upload a valid PDF first.")

    # Load FAISS index
    index = faiss.read_index(index_file)

    # Generate the query embedding
    query_embedding = embedding_model.encode([query])
    query_embedding = np.array(query_embedding, dtype=np.float32)

    # Search the FAISS index
    distances, indices = index.search(query_embedding, top_k)

    # Retrieve the corresponding text chunks
    with open(chunks_file, "r") as f:
        text_chunks = f.readlines()

    retrieved_chunks = [text_chunks[i].strip() for i in indices[0] if i < len(text_chunks)]

    return retrieved_chunks

Query the Advisor Agent (Groq API)

In [7]:
def query_advisor(query):
    """Generate a response from the advisor agent using FAISS-based context and Groq API."""
    try:
        # Retrieve context
        retrieved_chunks = retrieve_relevant_chunks(query)
        context = "\n".join(retrieved_chunks) if retrieved_chunks else "No relevant context found."

        # Construct the chat prompt with the strict cryptocurrency-only system prompt
        advisor_prompt = ChatPromptTemplate.from_messages([
            ("system", ADVISOR_SYS_PROMPT),
            ("human", f"Context: {context}\n\nUser Query: {query}")
        ])

        # Initialize Groq LLM with the correct model
        groq_api_key = os.getenv("GROQ_API_KEY")
        llm = ChatGroq(model="mixtral-8x7b-32768", api_key=groq_api_key)

        # Generate response using LangChain LLM
        response = advisor_prompt | llm
        result = response.invoke({})

        # Ensure the response is not mixed and only provides relevant output
        response_text = result.content.strip()
        if "I'm sorry" in response_text and "cryptocurrencies, digital assets" not in response_text:
            return "I'm sorry, I can only answer questions related to cryptocurrencies, or digital assets. Please ask a relevant question."

        return response_text

    except Exception as e:
        return f"An error occurred: {e}"


Run the Advisor Agent

In [11]:
user_question = input("Ask the Advisor Agent a question: ")
response = query_advisor(user_question)
print(response)

Ask the Advisor Agent a question: How should a beginner allocate their crypto portfolio?
Allocating a crypto portfolio depends on individual risk tolerance, investment goals, and financial situation. However, a common beginner strategy is the "60-30-10 rule":

1. 60% in large-cap cryptocurrencies (e.g., Bitcoin, Ethereum) for stability and growth potential.
2. 30% in mid-cap and small-cap cryptocurrencies for higher growth potential and diversification.
3. 10% in stablecoins (e.g., USDC, DAI) or savings accounts for short-term liquidity and reducing portfolio volatility.

This allocation can be adjusted based on market conditions and personal preferences. It's crucial for beginners to perform thorough research, use reputable exchanges, and follow sound risk management practices.

Note: This response does not constitute personal financial advice and should be used as a starting point for further research and personalized portfolio allocation.
