In [2]:
pip install bs4

Collecting bs4
  Using cached bs4-0.0.2-py2.py3-none-any.whl.metadata (411 bytes)
Collecting beautifulsoup4 (from bs4)
  Using cached beautifulsoup4-4.12.3-py3-none-any.whl.metadata (3.8 kB)
Collecting soupsieve>1.2 (from beautifulsoup4->bs4)
  Using cached soupsieve-2.6-py3-none-any.whl.metadata (4.6 kB)
Using cached bs4-0.0.2-py2.py3-none-any.whl (1.2 kB)
Using cached beautifulsoup4-4.12.3-py3-none-any.whl (147 kB)
Using cached soupsieve-2.6-py3-none-any.whl (36 kB)
Installing collected packages: soupsieve, beautifulsoup4, bs4
Successfully installed beautifulsoup4-4.12.3 bs4-0.0.2 soupsieve-2.6
Note: you may need to restart the kernel to use updated packages.


In [1]:
import os
import requests
from bs4 import BeautifulSoup
from urllib.parse import urljoin
from langchain.vectorstores import Chroma
from langchain.schema import Document
from langchain_community.embeddings import OllamaEmbeddings

def scrape_and_store_embeddings(start_url, max_depth=5):
    embedding = OllamaEmbeddings(model="nomic-embed-text")
    visited_urls = set()
    documents = []

    def recursive_scrape(url, depth):
        if depth > max_depth or url in visited_urls:
            return

        visited_urls.add(url)
        print(f"Scraping (Depth {depth}): {url}")  # Print the current URL being scraped
        
        try:
            response = requests.get(url)
            soup = BeautifulSoup(response.text, 'html.parser')
            
            # Extract text content
            text = soup.get_text(separator=' ', strip=True)
            
            # Create a Document object
            doc = Document(page_content=text, metadata={"source": url})
            documents.append(doc)

            # Find all links and recursively scrape them
            for link in soup.find_all('a', href=True):
                next_url = urljoin(url, link['href'])
                if next_url.startswith(start_url):  # Only follow links within the same domain
                    print(f"Found link: {next_url}")  # Print each found link
                    recursive_scrape(next_url, depth + 1)
        except Exception as e:
            print(f"Error scraping {url}: {e}")

    # Start the recursive scraping
    print(f"Starting scrape from: {start_url}")
    recursive_scrape(start_url, 1)

    # Create Chroma vector store
    persist_directory = os.path.join(os.getcwd(), "course-logistics-retriever")
    vectorstore = Chroma.from_documents(documents, embedding, persist_directory=persist_directory)

    # Create a retriever from the Chroma vector store
    retriever = vectorstore.as_retriever()

    print(f"\nScraping completed.")
    print(f"Chroma DB is stored in: {persist_directory}")
    print(f"Directory exists: {os.path.exists(persist_directory)}")
    print(f"Contents of the directory: {os.listdir(persist_directory)}")
    print(f"Number of documents scraped and stored: {len(documents)}")
    print(f"Total unique URLs visited: {len(visited_urls)}")

    return retriever

# Usage
start_url = "https://canvas.illinois.edu/courses/49537"
retriever_course_logistics = scrape_and_store_embeddings(start_url)

Starting scrape from: https://canvas.illinois.edu/courses/49537
Scraping (Depth 1): https://canvas.illinois.edu/courses/49537
Found link: https://canvas.illinois.edu/courses/49537
Found link: https://canvas.illinois.edu/courses/49537#content
Scraping (Depth 2): https://canvas.illinois.edu/courses/49537#content
Found link: https://canvas.illinois.edu/courses/49537
Found link: https://canvas.illinois.edu/courses/49537#content
Found link: https://canvas.illinois.edu/courses/49537
Found link: https://canvas.illinois.edu/courses/49537
Found link: https://canvas.illinois.edu/courses/49537
Found link: https://canvas.illinois.edu/courses/49537
Found link: https://canvas.illinois.edu/courses/49537
Found link: https://canvas.illinois.edu/courses/49537/external_tools/13687
Scraping (Depth 3): https://canvas.illinois.edu/courses/49537/external_tools/13687
Found link: https://canvas.illinois.edu/courses/49537/external_tools/13687
Found link: https://canvas.illinois.edu/courses/49537/external_tools/

In [1]:
import torch
a = torch.tensor([[0, 2, 4], [1, 3, 5]])
b = torch.tensor([[0, 7], [8, 9], [10, 11]])
c = torch.matmul(a, b)  # or alternatively: c = a @ b
print("c=", c)

c= tensor([[56, 62],
        [74, 89]])


In [1]:
import torch

# Define matrix A
a = torch.tensor([[2.0, 4.0, 3.0],
                  [9.0, 6.0, 8.0],
                  [11.0, 13.0, 10.0]], dtype=torch.float32)
print("Matrix A:")
print(a)

# Compute the inverse of A
a_inversed = torch.inverse(a)
print("\nInverse of A:")
print(a_inversed)

Matrix A:
tensor([[ 2.,  4.,  3.],
        [ 9.,  6.,  8.],
        [11., 13., 10.]])

Inverse of A:
tensor([[-0.7719, -0.0175,  0.2456],
        [-0.0351, -0.2281,  0.1930],
        [ 0.8947,  0.3158, -0.4211]])
