In [1]:
import torch
from sentence_transformers import SentenceTransformer
from transformers import AutoTokenizer, AutoModelForCausalLM
import warnings
warnings.filterwarnings('ignore')

# Check if GPU is available
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")

# Initialize a lightweight sentence transformer for embeddings
# This model is much smaller and works well on T4 GPUs
print("Loading embedding model...")
sentence_model = SentenceTransformer('all-MiniLM-L6-v2')
print("✓ Embedding model loaded successfully!")

# Initialize a smaller language model for generation
# Using GPT-2 medium as it's lightweight but capable
print("Loading generation model...")
tokenizer = AutoTokenizer.from_pretrained('gpt2-medium')
# Add padding token if it doesn't exist
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token

generation_model = AutoModelForCausalLM.from_pretrained('gpt2-medium').to(device)
print("✓ Generation model loaded successfully!")

print(f"\nModel setup complete:")
print(f"- Embedding model: all-MiniLM-L6-v2 (384 dimensions)")
print(f"- Generation model: GPT-2 Medium (~355M parameters)")
print(f"- Memory efficient for T4 GPU in Google Colab")

Using device: cpu
Loading embedding model...


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md: 0.00B [00:00, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

✓ Embedding model loaded successfully!
Loading generation model...


tokenizer_config.json:   0%|          | 0.00/26.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/718 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/1.04M [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


model.safetensors:   0%|          | 0.00/1.52G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/124 [00:00<?, ?B/s]

✓ Generation model loaded successfully!

Model setup complete:
- Embedding model: all-MiniLM-L6-v2 (384 dimensions)
- Generation model: GPT-2 Medium (~355M parameters)
- Memory efficient for T4 GPU in Google Colab
