In [1]:
import torch
print(torch.cuda.is_available())
print(torch.cuda.get_device_name(0))

True
NVIDIA GeForce GTX 1650


In [1]:
import numpy as np
print(np.__version__)


1.26.4


In [3]:
import torch
import time

print("Is CUDA available?", torch.cuda.is_available())

x = torch.rand(10000, 10000).to("cuda")
print("Tensor is on:", x.device)

y = x @ x
print("Computation done on GPU")

time.sleep(10)  # Time to check nvidia-smi



Is CUDA available? True
Tensor is on: cuda:0
Computation done on GPU


In [4]:
import torch

# Check available CUDA devices
print("CUDA available:", torch.cuda.is_available())
print("CUDA device count:", torch.cuda.device_count())
print("Current CUDA device index:", torch.cuda.current_device())
print("Current CUDA device name:", torch.cuda.get_device_name(torch.cuda.current_device()))


CUDA available: True
CUDA device count: 1
Current CUDA device index: 0
Current CUDA device name: NVIDIA GeForce GTX 1650


In [5]:
import torch
import time

# Set matrix size (increase for heavier computation)
MATRIX_SIZE = 10000

# CPU Benchmark
print("Running on CPU...")
x_cpu = torch.rand(MATRIX_SIZE, MATRIX_SIZE)
y_cpu = torch.rand(MATRIX_SIZE, MATRIX_SIZE)

start_cpu = time.time()
z_cpu = x_cpu @ y_cpu
end_cpu = time.time()

cpu_time = end_cpu - start_cpu
print(f"CPU Time: {cpu_time:.4f} seconds\n")

# GPU Benchmark
if torch.cuda.is_available():
    print("Running on GPU...")
    device = torch.device("cuda")

    x_gpu = torch.rand(MATRIX_SIZE, MATRIX_SIZE, device=device)
    y_gpu = torch.rand(MATRIX_SIZE, MATRIX_SIZE, device=device)

    # Warm-up (optional but helps with fair timing)
    _ = x_gpu @ y_gpu

    # Wait for GPU to finish previous ops
    torch.cuda.synchronize()

    start_gpu = time.time()
    z_gpu = x_gpu @ y_gpu
    torch.cuda.synchronize()  # Ensure GPU finished
    end_gpu = time.time()

    gpu_time = end_gpu - start_gpu
    print(f"GPU Time: {gpu_time:.4f} seconds")

    # Speedup
    speedup = cpu_time / gpu_time
    print(f"\n💡 Speedup (CPU / GPU): {speedup:.2f}x")
else:
    print("CUDA not available.")


Running on CPU...
CPU Time: 33.5799 seconds

Running on GPU...
GPU Time: 1.5834 seconds

💡 Speedup (CPU / GPU): 21.21x


In [7]:
pip install ipywidgets

Collecting ipywidgetsNote: you may need to restart the kernel to use updated packages.

  Downloading ipywidgets-8.1.7-py3-none-any.whl.metadata (2.4 kB)
Collecting widgetsnbextension~=4.0.14 (from ipywidgets)
  Downloading widgetsnbextension-4.0.14-py3-none-any.whl.metadata (1.6 kB)
Collecting jupyterlab_widgets~=3.0.15 (from ipywidgets)
  Downloading jupyterlab_widgets-3.0.15-py3-none-any.whl.metadata (20 kB)
Downloading ipywidgets-8.1.7-py3-none-any.whl (139 kB)
Downloading jupyterlab_widgets-3.0.15-py3-none-any.whl (216 kB)
Downloading widgetsnbextension-4.0.14-py3-none-any.whl (2.2 MB)
   ---------------------------------------- 0.0/2.2 MB ? eta -:--:--
   --------- ------------------------------ 0.5/2.2 MB 2.4 MB/s eta 0:00:01
   ------------------- -------------------- 1.0/2.2 MB 2.6 MB/s eta 0:00:01
   ---------------------------- ----------- 1.6/2.2 MB 2.6 MB/s eta 0:00:01
   ---------------------------------------- 2.2/2.2 MB 2.6 MB/s eta 0:00:00
Installing collected packages

In [9]:
from sentence_transformers import SentenceTransformer
import torch
import time

# Generate 100 dummy documents, each ~100 words
def generate_dummy_documents(num_docs=100, words_per_doc=100):
    documents = []
    for i in range(num_docs):
        doc = " ".join([f"word{i}_{j}" for j in range(words_per_doc)])
        documents.append(doc)
    return documents

docs = generate_dummy_documents()

# ---- CPU Embedding ----
print("\n======================")
print("🧠 Starting CPU Embedding")
print("======================")
device = "cpu"
model_cpu = SentenceTransformer("all-MiniLM-L6-v2", device=device)
start_cpu = time.time()
print("🔄 Progress on CPU:")
embeddings_cpu = model_cpu.encode(docs, show_progress_bar=True)
end_cpu = time.time()
print(f"✅ CPU Embedding Completed in {end_cpu - start_cpu:.2f} seconds")

# ---- GPU Embedding ----
if torch.cuda.is_available():
    print("\n======================")
    print("⚡ Starting GPU Embedding")
    print("======================")
    device = "cuda"
    model_gpu = SentenceTransformer("all-MiniLM-L6-v2", device=device)
    start_gpu = time.time()
    print("🔄 Progress on GPU:")
    embeddings_gpu = model_gpu.encode(docs, show_progress_bar=True)
    end_gpu = time.time()
    print(f"✅ GPU Embedding Completed in {end_gpu - start_gpu:.2f} seconds")
else:
    print("\n❌ CUDA is not available. Skipping GPU embedding.")



🧠 Starting CPU Embedding
🔄 Progress on CPU:


Batches: 100%|██████████| 4/4 [00:16<00:00,  4.12s/it]


✅ CPU Embedding Completed in 16.56 seconds

⚡ Starting GPU Embedding
🔄 Progress on GPU:


  attn_output = torch.nn.functional.scaled_dot_product_attention(
Batches: 100%|██████████| 4/4 [00:03<00:00,  1.29it/s]

✅ GPU Embedding Completed in 3.12 seconds





In [11]:
docs

['word0_0 word0_1 word0_2 word0_3 word0_4 word0_5 word0_6 word0_7 word0_8 word0_9 word0_10 word0_11 word0_12 word0_13 word0_14 word0_15 word0_16 word0_17 word0_18 word0_19 word0_20 word0_21 word0_22 word0_23 word0_24 word0_25 word0_26 word0_27 word0_28 word0_29 word0_30 word0_31 word0_32 word0_33 word0_34 word0_35 word0_36 word0_37 word0_38 word0_39 word0_40 word0_41 word0_42 word0_43 word0_44 word0_45 word0_46 word0_47 word0_48 word0_49 word0_50 word0_51 word0_52 word0_53 word0_54 word0_55 word0_56 word0_57 word0_58 word0_59 word0_60 word0_61 word0_62 word0_63 word0_64 word0_65 word0_66 word0_67 word0_68 word0_69 word0_70 word0_71 word0_72 word0_73 word0_74 word0_75 word0_76 word0_77 word0_78 word0_79 word0_80 word0_81 word0_82 word0_83 word0_84 word0_85 word0_86 word0_87 word0_88 word0_89 word0_90 word0_91 word0_92 word0_93 word0_94 word0_95 word0_96 word0_97 word0_98 word0_99',
 'word1_0 word1_1 word1_2 word1_3 word1_4 word1_5 word1_6 word1_7 word1_8 word1_9 word1_10 word1_11 word1_

In [21]:
from langchain_huggingface import HuggingFaceEmbeddings
import torch
import time

# Dummy document for testing
docs1 = ["This is a test document for embedding performance comparison."] * 100
docs2 = ["This is a test document for embedding performance comparison."] * 100000
# ---- CPU ----
print("\n======================")
print("🧠 Starting CPU Embedding")
print("======================")
cpu_embedder = HuggingFaceEmbeddings(
    model_name="all-MiniLM-L6-v2",
    model_kwargs={"device": "cpu"}
)
start_cpu = time.time()
embeddings_cpu = cpu_embedder.embed_documents(docs1)
end_cpu = time.time()
print(f"✅ CPU Time Taken: {end_cpu - start_cpu:.2f} seconds")

# ---- GPU ----
if torch.cuda.is_available():
    print("\n======================")
    print("⚡ Starting GPU Embedding")
    print("======================")
    gpu_embedder = HuggingFaceEmbeddings(
        model_name="all-MiniLM-L6-v2",
        model_kwargs={"device": "cuda"}
    )
    start_gpu = time.time()
    embeddings_gpu = gpu_embedder.embed_documents(docs2)  # Increase workload for GPU
    end_gpu = time.time()
    print(f"✅ GPU Time Taken: {end_gpu - start_gpu:.2f} seconds")
else:
    print("\n❌ CUDA is not available. Skipping GPU test.")
# Compare embeddings
print("\nEmbedding Comparison:")    


🧠 Starting CPU Embedding
✅ CPU Time Taken: 0.93 seconds

⚡ Starting GPU Embedding
✅ GPU Time Taken: 52.27 seconds

Embedding Comparison:
