In [None]:
import requests
from bs4 import BeautifulSoup
import json
from urllib.parse import urljoin

class NvidiaDocsSpider:
    def __init__(self):
        self.allowed_domains = ["docs.nvidia.com"]
        self.start_urls = ["https://docs.nvidia.com/cuda/"]
        self.visited_urls = set()
        self.max_depth = 5

    def parse(self, url, depth):
        if url in self.visited_urls or depth > self.max_depth:
            return []
        self.visited_urls.add(url)
        print(f"Scraping: {url}")
        response = requests.get(url)
        soup = BeautifulSoup(response.text, 'html.parser')
        page_content = soup.get_text()

        data = [{
            'url': url,
            'content': page_content
        }]

        # Find and follow sub-links
        if depth < self.max_depth:
            for link in soup.find_all('a', href=True):
                next_url = urljoin(url, link['href'])
                if self.is_allowed_domain(next_url):
                    data.extend(self.parse(next_url, depth + 1))

        return data

    def is_allowed_domain(self, url):
        return any(domain in url for domain in self.allowed_domains)

    def run(self):
        data = []
        for url in self.start_urls:
            data.extend(self.parse(url, 0))
        with open('nvidia_docs.json', 'w') as f:
            json.dump(data, f)

scraper = NvidiaDocsSpider()
scraper.run()


In [None]:
import json
from sentence_transformers import SentenceTransformer
from langchain_google_genai import GoogleGenerativeAIEmbeddings
from langchain_experimental.text_splitter import SemanticChunker
import os
from dotenv import load_dotenv
load_dotenv()

# Initialize Google Generative AI Embeddings
embeddings = GoogleGenerativeAIEmbeddings(
    model="models/text-embedding-004",
    google_api_key=os.environ['GOOGLE_API_KEY'],
    task_type="retrieval_document"
)

# Load the scraped data
with open('nvidia_docs.json') as f:
    data = json.load(f)

# Initialize Semantic Chunker
splitter = SemanticChunker(embeddings=embeddings)

# Chunk data
chunks = []
for entry in data:
    if entry['content'].strip():  # Ensure the content is not empty
        docs = splitter.create_documents([entry['content']])
        for doc in docs:
            content = doc.page_content.strip()
            if content:  # Ensure each chunk is not empty
                chunks.append({
                    'url': entry['url'],
                    'content': content,
                    'embedding': embeddings.embed_query(content)
                })

# Save the chunks
with open('nvidia_chunks.json', 'w') as f:
    json.dump(chunks, f)


In [None]:
from milvus import default_server
default_server.start()

In [None]:
from pymilvus import connections

connections.connect(
   host='127.0.0.1',
   port=default_server.listen_port)

In [None]:
from pymilvus import FieldSchema, CollectionSchema, DataType, Collection

fields = [
    FieldSchema(name="url", dtype=DataType.VARCHAR, max_length=500, is_primary=True),
    FieldSchema(name="content", dtype=DataType.VARCHAR, max_length =65535),
    FieldSchema(name="embedding", dtype=DataType.FLOAT_VECTOR, dim=768)
]
collection_schema = CollectionSchema(fields=fields, schema="DenseVector")
collection_name_ivf = "ivf_embeddings"

# Define IVF parameters
nlist = 128
metric = "L2" 

collection = Collection(name=collection_name_ivf, schema=collection_schema, use_index="IVF_FLAT", params={"nlist": nlist, "metric": metric})

entity = []
for chunk in chunks:
    dic = {}
    dic['url'] = chunk['url']
    dic['content'] = chunk['content']
    dic['embedding'] = chunk['embedding']
    entity.append(dic)

collection.insert(entity)

In [1]:
import os
from dotenv import load_dotenv
load_dotenv()

import google.generativeai as genai
genai.configure(api_key=os.environ['GOOGLE_API_KEY'])
llm = genai.GenerativeModel('models/gemini-1.5-pro')

Query Expansion

In [7]:
def query_expansion(query):
    prompt = f"""
    System: You are a helpful expert technical research assistant. Provide an example answer to the given question, that might be found in a document like an web scraped data. 
    
    User: {query}
    """

    return llm.generate_content(prompt).text

modified_query = query_expansion("How to install CUDA on Linux?")

In [9]:
modified_query

"## Installing CUDA on Linux - Example using Ubuntu 20.04 and CUDA 11.4\n\nWhile specifics vary depending on your Linux distribution and chosen CUDA version, here's a common installation method:\n\n**1. Verify GPU Compatibility:**\n\n    * Run `lspci | grep -i nvidia` to confirm your GPU is NVIDIA and supported. \n    * Cross-reference your GPU model with the CUDA Toolkit documentation for compatibility.\n\n**2. Download CUDA Toolkit:**\n\n    * Visit the NVIDIA Developer website: [https://developer.nvidia.com/cuda-downloads](https://developer.nvidia.com/cuda-downloads)\n    * Select your Linux distribution, version (e.g., Ubuntu 20.04), architecture (x86_64), installer type (runfile recommended), and click Download.\n\n**3. Install CUDA Toolkit:**\n\n    * Open a terminal.\n    * Navigate to the download directory: `cd Downloads`\n    * Make the file executable: `chmod +x cuda_11.4.1_470.57.05_linux.run`\n    * Run the installer: `sudo ./cuda_11.4.1_470.57.05_linux.run`\n    * Follow 

In [10]:
import json
import torch
from transformers import DPRContextEncoder, DPRContextEncoderTokenizer, DPRQuestionEncoder, DPRQuestionEncoderTokenizer
from rank_bm25 import BM25Okapi

# Load passages and embeddings from JSON
with open('nvidia_chunks.json', 'r') as f:
    data = json.load(f)

# Extract passages and embeddings into separate lists
passages = [entry['content'] for entry in data]
embeddings = torch.tensor([entry['embedding'] for entry in data])

# Initialize BM25
bm25 = BM25Okapi([passage.split() for passage in passages])

# Load DPR models and tokenizers
context_encoder = DPRContextEncoder.from_pretrained('facebook/dpr-ctx_encoder-single-nq-base')
context_tokenizer = DPRContextEncoderTokenizer.from_pretrained('facebook/dpr-ctx_encoder-single-nq-base')
question_encoder = DPRQuestionEncoder.from_pretrained('facebook/dpr-question_encoder-single-nq-base')
question_tokenizer = DPRQuestionEncoderTokenizer.from_pretrained('facebook/dpr-question_encoder-single-nq-base')

def encode_query(query):
    inputs = context_tokenizer(query, return_tensors='pt', padding=True, truncation=True, max_length=512)
    with torch.no_grad():
        query_embedding = context_encoder(**inputs).pooler_output
    return query_embedding

def retrieve_passages_dpr(query_embedding, embeddings, passages, top_k=3):
    similarities = torch.matmul(query_embedding, embeddings.T).squeeze(0)
    top_k_indices = torch.topk(similarities, k=top_k).indices
    return [(passages[idx], similarities[idx].item()) for idx in top_k_indices]

def retrieve_passages_bm25(query, passages, top_k=3):
    bm25_scores = bm25.get_scores(query.split())
    top_k_indices = torch.topk(torch.tensor(bm25_scores), k=top_k).indices
    return [(passages[idx], bm25_scores[idx]) for idx in top_k_indices]

def hybrid_retriever(query, embeddings, passages, top_k=3, alpha=0.5):
    query_embedding = encode_query(query)
    
    assert query_embedding.shape[1] == embeddings.shape[1], f"Query embedding size {query_embedding.shape} does not match passage embedding size {embeddings.shape}"
    
    dpr_results = retrieve_passages_dpr(query_embedding, embeddings, passages, top_k)
    bm25_results = retrieve_passages_bm25(query, passages, top_k)

    # Combine DPR and BM25 results
    combined_scores = {}
    for passage, score in dpr_results:
        combined_scores[passage] = combined_scores.get(passage, 0) + alpha * score
    for passage, score in bm25_results:
        combined_scores[passage] = combined_scores.get(passage, 0) + (1 - alpha) * score

    sorted_passages = sorted(combined_scores.items(), key=lambda item: item[1], reverse=True)
    return sorted_passages[:top_k]

top_passages = hybrid_retriever(modified_query, embeddings, passages)
print(top_passages)


Some weights of the model checkpoint at facebook/dpr-ctx_encoder-single-nq-base were not used when initializing DPRContextEncoder: ['ctx_encoder.bert_model.pooler.dense.bias', 'ctx_encoder.bert_model.pooler.dense.weight']
- This IS expected if you are initializing DPRContextEncoder from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DPRContextEncoder from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
The tokenizer class you load from this checkpoint is not the same type as the class this function is called from. It may result in unexpected tokenization. 
The tokenizer class you load from this checkpoint is 'DPRQuestionEncoderTokenizer'. 
The class this function is called from is 'DPRContextEncoderTokeniz

[('Runfile Installationï\x83\x81\n\nBasic instructions can be found in the Quick Start Guide. Read on for more detailed instructions. This section describes the installation and configuration of CUDA when using the standalone installer. The standalone installer is a â\x80\x9c.runâ\x80\x9d file and is completely self-contained. 8.1. Runfile Overviewï\x83\x81\n\nThe Runfile installation installs the NVIDIA Driver and CUDA Toolkit via an interactive ncurses-based interface. The installation steps are listed below. Distribution-specific instructions on disabling the Nouveau drivers as well as steps for verifying device node creation are also provided. Finally, advanced options for the installer and uninstallation steps are detailed below. The Runfile installation does not include support for cross-platform development. For cross-platform development, see the CUDA Cross-Platform Environment section. 8.2. Installationï\x83\x81\n\n\nPerform the pre-installation actions. Disable the Nouveau dr

In [11]:
def generate_answer(query, context):
    prompt = f"""
    You are given this context and a query, based on the context and your own knowledge, answer the query.

    Context:
    {context}

    Query:
    {query}
    """

    answer = llm.generate_content(prompt).text
    print(answer)

In [12]:
print(generate_answer("How to install CUDA on linux?", top_passages))

This document provides a pretty thorough guide on installing CUDA on Linux. Here's a summarized breakdown based on the information:

**Installation Methods**

CUDA on Linux can be installed using various methods depending on your Linux distribution:

* **RPM Package:** Suitable for distributions like Red Hat, CentOS, Fedora, SUSE Linux Enterprise Server, and OpenSUSE.
* **Debian Package:** Ideal for Debian and Ubuntu-based systems.
* **Runfile:** A self-contained installer that works across various distributions. It offers an interactive, ncurses-based installation process. 
* **Conda:** Available if you use the Conda package manager. 

**Common Steps (Especially Relevant for Runfile Installation)**

1. **Prerequisites:**
   * **GCC Compiler:** Ensure you have it installed as it's essential for CUDA development.
   * **Kernel Headers and Development Packages:**  Match these to your running kernel version for driver compatibility. 

2. **Disable Nouveau Drivers:**
   * Nouveau is an ope