In [None]:
pip install langchain openai faiss-cpu



In [None]:
# Simple RAG Q&A System using LangChain
# A basic document Q&A system that can answer questions about your documents

import os
from langchain.document_loaders import TextLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import FAISS
from langchain.llms import OpenAI
from langchain.chains import RetrievalQA
from langchain.prompts import PromptTemplate

class SimpleRAGQA:
    def __init__(self, openai_api_key):
        """Initialize the RAG Q&A system"""
        os.environ["OPENAI_API_KEY"] = openai_api_key
        self.embeddings = OpenAIEmbeddings()
        self.llm = OpenAI(temperature=0)
        self.vectorstore = None
        self.qa_chain = None

    def load_and_process_documents(self, file_path):
        """Load documents and create vector store"""
        print("Loading documents...")

        # Load document
        loader = TextLoader(file_path)
        documents = loader.load()

        # Split documents into chunks
        text_splitter = RecursiveCharacterTextSplitter(
            chunk_size=1000,
            chunk_overlap=200
        )
        texts = text_splitter.split_documents(documents)

        print(f"Created {len(texts)} document chunks")

        # Create vector store
        print("Creating embeddings...")
        self.vectorstore = FAISS.from_documents(texts, self.embeddings)

        # Create QA chain
        self.qa_chain = RetrievalQA.from_chain_type(
            llm=self.llm,
            chain_type="stuff",
            retriever=self.vectorstore.as_retriever(search_kwargs={"k": 3})
        )

        print("RAG system ready!")

    def ask_question(self, question):
        """Ask a question and get an answer"""
        if not self.qa_chain:
            return "Please load documents first!"

        print(f"\nQuestion: {question}")
        result = self.qa_chain.run(question)
        print(f"Answer: {result}")
        return result

def main():
    # Replace with your OpenAI API key
    API_KEY = "your-openai-api-key-here"

    # Create sample document (you can replace this with your own file)
    sample_text = """
    Artificial Intelligence (AI) is a branch of computer science that aims to create intelligent machines.
    Machine Learning is a subset of AI that enables computers to learn without being explicitly programmed.
    Deep Learning is a subset of Machine Learning that uses neural networks with multiple layers.

    Natural Language Processing (NLP) is a field of AI that helps computers understand human language.
    Computer Vision is another field of AI that enables computers to interpret visual information.

    Transformers are a type of neural network architecture that has revolutionized NLP.
    BERT and GPT are popular transformer models used for various NLP tasks.
    """

    # Save sample document
    with open("sample_document.txt", "w") as f:
        f.write(sample_text)

    # Initialize RAG system
    rag_system = SimpleRAGQA(API_KEY)

    # Load documents
    rag_system.load_and_process_documents("sample_document.txt")

    # Ask questions
    questions = [
        "What is Machine Learning?",
        "What are Transformers?",
        "What is the difference between AI and Machine Learning?",
        "What are some popular transformer models?"
    ]

    for question in questions:
        rag_system.ask_question(question)
        print("-" * 50)

if __name__ == "__main__":
    main()

ModuleNotFoundError: Module langchain_community.document_loaders not found. Please install langchain-community to access this module. You can install it using `pip install -U langchain-community`

In [None]:
!pip install -q langchain transformers accelerate bitsandbytes sentence-transformers faiss-cpu pypdf

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m67.0/67.0 MB[0m [31m8.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m31.3/31.3 MB[0m [31m31.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m304.6/304.6 kB[0m [31m14.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m363.4/363.4 MB[0m [31m4.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m13.8/13.8 MB[0m [31m100.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m24.6/24.6 MB[0m [31m29.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m883.7/883.7 kB[0m [31m45.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m664.8/664.8 MB[0m [31m2.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

In [None]:
pip install -U langchain-community

Collecting langchain-community
  Downloading langchain_community-0.3.26-py3-none-any.whl.metadata (2.9 kB)
Collecting dataclasses-json<0.7,>=0.5.7 (from langchain-community)
  Downloading dataclasses_json-0.6.7-py3-none-any.whl.metadata (25 kB)
Collecting pydantic-settings<3.0.0,>=2.4.0 (from langchain-community)
  Downloading pydantic_settings-2.10.1-py3-none-any.whl.metadata (3.4 kB)
Collecting httpx-sse<1.0.0,>=0.4.0 (from langchain-community)
  Downloading httpx_sse-0.4.1-py3-none-any.whl.metadata (9.4 kB)
Collecting marshmallow<4.0.0,>=3.18.0 (from dataclasses-json<0.7,>=0.5.7->langchain-community)
  Downloading marshmallow-3.26.1-py3-none-any.whl.metadata (7.3 kB)
Collecting typing-inspect<1,>=0.4.0 (from dataclasses-json<0.7,>=0.5.7->langchain-community)
  Downloading typing_inspect-0.9.0-py3-none-any.whl.metadata (1.5 kB)
Collecting python-dotenv>=0.21.0 (from pydantic-settings<3.0.0,>=2.4.0->langchain-community)
  Downloading python_dotenv-1.1.1-py3-none-any.whl.metadata (24 k

In [None]:
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter

# Upload your PDF (click the folder icon on the left)
loader = PyPDFLoader("/content/sample_data/AL.pdf")  # Update path if needed
pages = loader.load()

# Split into chunks
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000,
    chunk_overlap=200
)
docs = text_splitter.split_documents(pages)

In [None]:
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS

# Use free local embeddings
embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")

# Store in FAISS vector DB
db = FAISS.from_documents(docs, embeddings)
db.save_local("faiss_index")  # Save for later queries

  embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md: 0.00B [00:00, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

In [None]:
from langchain.llms import HuggingFacePipeline
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
import torch

# Load Llama 2 (7B quantized for low GPU memory)
model_id = "TheBloke/Llama-2-7B-Chat-GGML"
tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(
    model_id,
    device_map="auto",
    torch_dtype=torch.float16
)

# Create LangChain pipeline
llm_pipeline = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    max_new_tokens=512,
    temperature=0.7
)

llm = HuggingFacePipeline(pipeline=llm_pipeline)

config.json:   0%|          | 0.00/29.0 [00:00<?, ?B/s]

You are using the default legacy behaviour of the <class 'transformers.models.llama.tokenization_llama.LlamaTokenizer'>. This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it means, and thoroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565 - if you loaded a llama tokenizer from a GGUF file you can ignore this message


TypeError: not a string

In [None]:
from langchain.chains import RetrievalQA

# Load FAISS index
retriever = db.as_retriever(search_kwargs={"k": 3})

# RAG chain
qa_chain = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=retriever
)

# Ask a question
query = "What is the main idea of this document?"
answer = qa_chain.run(query)
print(answer)

In [None]:
# Example query
question = "Summarize this document in 3 bullet points."
result = qa_chain({"query": question})
print(result["result"])

In [None]:
!pip install -q torch transformers sentencepiece

In [None]:

import torch
from transformers import AutoTokenizer, AutoModelForCausalLM

In [None]:
tokenizer = AutoTokenizer.from_pretrained("gpt2")  # Small model for learning

text = "welcome to baytech innovation in kanyakumari dist,i am working here"
tokens = tokenizer(text, return_tensors="pt")  # Convert to PyTorch tensors
print("Tokens:", tokens)
print("Decoded:", tokenizer.decode(tokens["input_ids"][0]))

Tokens: {'input_ids': tensor([[   86,  9571,   284, 15489, 13670, 11044,   287,   479,  1092,   461,
           388,  2743,  1233,    11,    72,   716,  3111,   994]]), 'attention_mask': tensor([[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]])}
Decoded: welcome to baytech innovation in kanyakumari dist,i am worked here


In [None]:
model = AutoModelForCausalLM.from_pretrained("gpt2")

# Inspect model architecture
print("Model layers:", model)
print("Total parameters:", sum(p.numel() for p in model.parameters()))

Model layers: GPT2LMHeadModel(
  (transformer): GPT2Model(
    (wte): Embedding(50257, 768)
    (wpe): Embedding(1024, 768)
    (drop): Dropout(p=0.1, inplace=False)
    (h): ModuleList(
      (0-11): 12 x GPT2Block(
        (ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
        (attn): GPT2Attention(
          (c_attn): Conv1D(nf=2304, nx=768)
          (c_proj): Conv1D(nf=768, nx=768)
          (attn_dropout): Dropout(p=0.1, inplace=False)
          (resid_dropout): Dropout(p=0.1, inplace=False)
        )
        (ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
        (mlp): GPT2MLP(
          (c_fc): Conv1D(nf=3072, nx=768)
          (c_proj): Conv1D(nf=768, nx=3072)
          (act): NewGELUActivation()
          (dropout): Dropout(p=0.1, inplace=False)
        )
      )
    )
    (ln_f): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
  )
  (lm_head): Linear(in_features=768, out_features=50257, bias=False)
)
Total parameters: 124439808


In [None]:
output = model.generate(
    tokens["input_ids"],
    max_length=50,
    do_sample=True,
    temperature=0.7
)
print("Generated:", tokenizer.decode(output[0]))

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Generated: welcome to baytech innovation in kanyakumari dist,i am worked here for over 2 years," said the driver.

"This is the first time we are working with you in kanyakumari dist.i and


#next

In [None]:
!pip install -q torch transformers datasets
from transformers import AutoTokenizer, GPT2LMHeadModel, GPT2Config, Trainer, TrainingArguments
from datasets import Dataset
import torch

# Download Shakespeare dataset
!wget https://raw.githubusercontent.com/karpathy/char-rnn/master/data/tinyshakespeare/input.txt -O shakespeare.txt
with open("shakespeare.txt", "r") as f:
    text = f.read()

# Split text into chunks for training
text_chunks = [text[i:i+1024] for i in range(0, len(text), 1024)]

--2025-06-26 09:11:41--  https://raw.githubusercontent.com/karpathy/char-rnn/master/data/tinyshakespeare/input.txt
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.109.133, 185.199.110.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 1115394 (1.1M) [text/plain]
Saving to: ‘shakespeare.txt’


2025-06-26 09:11:41 (27.5 MB/s) - ‘shakespeare.txt’ saved [1115394/1115394]



In [None]:
tokenizer = AutoTokenizer.from_pretrained("gpt2")
tokenizer.pad_token = tokenizer.eos_token  # Set padding token

def tokenize_function(examples):
    return tokenizer(examples["text"], truncation=True, max_length=512, padding="max_length")

# Create Hugging Face Dataset
dataset = Dataset.from_dict({"text": text_chunks})
dataset = dataset.map(tokenize_function, batched=True)
dataset = dataset.remove_columns(["text"])  # Remove raw text column

# Format for causal LM training
dataset = dataset.map(lambda x: {"labels": x["input_ids"]}, batched=True)

Map:   0%|          | 0/1090 [00:00<?, ? examples/s]

Map:   0%|          | 0/1090 [00:00<?, ? examples/s]

In [None]:
config = GPT2Config(
    vocab_size=tokenizer.vocab_size,
    n_embd=128,  # Increased from 64 for better learning
    n_layer=4,
    n_head=4,
    n_positions=512
)
model = GPT2LMHeadModel(config)

In [None]:
training_args = TrainingArguments(
    output_dir="/content/sample_data/results",
    per_device_train_batch_size=2,
    num_train_epochs=3,  # Increased from 1
    logging_steps=100,
    save_steps=500,
    eval_strategy="steps",
    eval_steps=500,
    fp16=True,  # Enable mixed precision training
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=dataset,
    eval_dataset=dataset.select(range(10)),  # Small eval set
)

In [None]:
trainer.train()

`loss_type=None` was set in the config but it is unrecognised.Using the default loss: `ForCausalLMLoss`.


Step,Training Loss,Validation Loss
500,6.3401,5.919342
1000,4.6838,4.374318
1500,4.2074,3.953655


TrainOutput(global_step=1635, training_loss=5.61755651071531, metrics={'train_runtime': 3323.976, 'train_samples_per_second': 0.984, 'train_steps_per_second': 0.492, 'total_flos': 7969489551360.0, 'train_loss': 5.61755651071531, 'epoch': 3.0})

#llm basic

In [3]:
import nltk
from sklearn.feature_extraction.text import TfidfVectorizer

nltk.download('punkt')
nltk.download('stopwords')
import nltk
nltk.download('punkt_tab')
text = "Learning Large Language Models is fun and powerful!"
from nltk.tokenize import word_tokenize
tokens = word_tokenize(text.lower())
print(tokens)


[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package punkt_tab to /root/nltk_data...


['learning', 'large', 'language', 'models', 'is', 'fun', 'and', 'powerful', '!']


[nltk_data]   Unzipping tokenizers/punkt_tab.zip.


In [4]:
import torch
import torch.nn.functional as F

def scaled_dot_product_attention(q, k, v):
    d_k = q.size(-1)
    scores = torch.matmul(q, k.transpose(-2, -1)) / d_k**0.5
    weights = F.softmax(scores, dim=-1)
    return torch.matmul(weights, v)

q = torch.rand(1, 5, 64)
k = torch.rand(1, 5, 64)
v = torch.rand(1, 5, 64)

out = scaled_dot_product_attention(q, k, v)
print(out.shape)


torch.Size([1, 5, 64])


In [11]:
!rm -rf ~/.cache/huggingface/datasets
!rm -rf ~/.cache/huggingface/hub
!rm -rf ~/.cache/huggingface/transformers




In [12]:
!pip install -U datasets transformers


Collecting datasets
  Downloading datasets-3.6.0-py3-none-any.whl.metadata (19 kB)
Collecting transformers
  Downloading transformers-4.53.0-py3-none-any.whl.metadata (39 kB)
Collecting fsspec<=2025.3.0,>=2023.1.0 (from fsspec[http]<=2025.3.0,>=2023.1.0->datasets)
  Downloading fsspec-2025.3.0-py3-none-any.whl.metadata (11 kB)
Downloading datasets-3.6.0-py3-none-any.whl (491 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m491.5/491.5 kB[0m [31m8.6 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading transformers-4.53.0-py3-none-any.whl (10.8 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m10.8/10.8 MB[0m [31m56.2 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading fsspec-2025.3.0-py3-none-any.whl (193 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m193.6/193.6 kB[0m [31m15.9 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: fsspec, transformers, datasets
  Attempting uninstall: fsspec
    Found existing installation: f

In [1]:
from datasets import load_dataset

# Force reload to prevent cache issues
raw_dataset = load_dataset("imdb", download_mode="force_redownload")
dataset = raw_dataset["train"].shuffle(seed=42).select(range(500))


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


README.md: 0.00B [00:00, ?B/s]

train-00000-of-00001.parquet:   0%|          | 0.00/21.0M [00:00<?, ?B/s]

test-00000-of-00001.parquet:   0%|          | 0.00/20.5M [00:00<?, ?B/s]

unsupervised-00000-of-00001.parquet:   0%|          | 0.00/42.0M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/25000 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/25000 [00:00<?, ? examples/s]

Generating unsupervised split:   0%|          | 0/50000 [00:00<?, ? examples/s]

In [2]:
from transformers import BertTokenizer, BertForSequenceClassification, Trainer, TrainingArguments

# Tokenizer
tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")

# Tokenize
def tokenize(batch):
    return tokenizer(batch["text"], padding=True, truncation=True)

dataset = dataset.map(tokenize, batched=True)
dataset.set_format("torch", columns=["input_ids", "attention_mask", "label"])

# Load model
model = BertForSequenceClassification.from_pretrained("bert-base-uncased")

# Training setup
training_args = TrainingArguments(
    output_dir="./results",
    num_train_epochs=1,
    per_device_train_batch_size=8,
    logging_steps=10,
    logging_dir="./logs",
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=dataset,
)

trainer.train()


tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

Map:   0%|          | 0/500 [00:00<?, ? examples/s]

model.safetensors:   0%|          | 0.00/440M [00:00<?, ?B/s]

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


<IPython.core.display.Javascript object>

[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize?ref=models
wandb: Paste an API key from your profile and hit enter:

 ··········




ValueError: API key must be 40 characters long, yours was 164

In [10]:
# Step 3: Load IMDb dataset and take a small subset (for demo)
raw_dataset = load_dataset("imdb")
dataset = raw_dataset["train"].shuffle(seed=42).select(range(500))

# Step 4: Load tokenizer
tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")

# Step 5: Tokenization function
def tokenize(batch):
    return tokenizer(batch["text"], padding=True, truncation=True)

dataset = dataset.map(tokenize, batched=True)
dataset.set_format("torch", columns=["input_ids", "attention_mask", "label"])

# Step 6: Load pre-trained BERT model for classification
model = BertForSequenceClassification.from_pretrained("bert-base-uncased")

# Step 7: Define training arguments
training_args = TrainingArguments(
    output_dir="./results",
    num_train_epochs=1,
    per_device_train_batch_size=8,
    logging_steps=10,
    logging_dir="./logs",
)

# Step 8: Create Trainer and start training
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=dataset,
)

trainer.train()

ValueError: Invalid pattern: '**' can only be an entire path component

In [1]:
# Step 1: Install Transformers (if not installed)
!pip install -q transformers

# Step 2: Import and download model
from transformers import AutoTokenizer, AutoModel
import os
import shutil

# Step 3: Define local save path
save_dir = "/content/all-MiniLM-L6-v2"

# Step 4: Load and save model + tokenizer
tokenizer = AutoTokenizer.from_pretrained("sentence-transformers/all-MiniLM-L6-v2")
model = AutoModel.from_pretrained("sentence-transformers/all-MiniLM-L6-v2")

tokenizer.save_pretrained(save_dir)
model.save_pretrained(save_dir)

# Step 5: Zip the folder
shutil.make_archive("/content/all-MiniLM-L6-v2", 'zip', save_dir)

# Step 6: Provide download link
from google.colab import files
files.download("/content/all-MiniLM-L6-v2.zip")


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

#static data

In [3]:
import torch
import numpy as np
from transformers import AutoTokenizer, AutoModel
from sklearn.metrics.pairwise import cosine_similarity

# Load local model
model_path = "/content/all-MiniLM-L6-v2"
tokenizer = AutoTokenizer.from_pretrained(model_path)
model = AutoModel.from_pretrained(model_path)

# Predefined FAQs (question → answer mapping)
faqs = [
    {"question": "How do I reset my password?", "answer": "Go to the login page and click on 'Forgot Password'."},
    {"question": "How can I contact support?", "answer": "Email us at support@example.com."},
    {"question": "What is your refund policy?", "answer": "We offer a 30-day money-back guarantee."},
    {"question": "How to upgrade my account?", "answer": "Go to your profile settings and click on 'Upgrade'."},
]


In [4]:
def get_sentence_embedding(text):
    inputs = tokenizer(text, return_tensors='pt', truncation=True, padding=True)
    with torch.no_grad():
        output = model(**inputs)
    embeddings = output.last_hidden_state
    attention_mask = inputs['attention_mask']
    mask = attention_mask.unsqueeze(-1).expand(embeddings.size())
    masked_embeddings = embeddings * mask
    summed = torch.sum(masked_embeddings, 1)
    counts = torch.clamp(mask.sum(1), min=1e-9)
    mean_pooled = summed / counts
    return mean_pooled.squeeze().numpy()


In [5]:
# Compute embeddings for all FAQs
faq_embeddings = [get_sentence_embedding(faq["question"]) for faq in faqs]


In [6]:
def answer_question(user_question):
    query_embedding = get_sentence_embedding(user_question)
    similarities = cosine_similarity([query_embedding], faq_embeddings)[0]
    best_match_idx = np.argmax(similarities)

    print(f"\nUser question: {user_question}")
    print(f"Best matched FAQ: {faqs[best_match_idx]['question']}")
    print(f"Bot Answer: {faqs[best_match_idx]['answer']}")


In [10]:
# Example user questions
answer_question("How do I get a food?")
#answer_question("How do I contact customer support
# answer_question("Is there a way to get my money refunded?")
# answer_question("Need help upgrading plan")



User question: How do I get a food?
Best matched FAQ: How to upgrade my account?
Bot Answer: Go to your profile settings and click on 'Upgrade'.


#dynmaic answer(worked good)

In [95]:
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM

# Use a small, fast version of FLAN-T5
model_name = "google/flan-t5-large"

tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSeq2SeqLM.from_pretrained(model_name)


tokenizer_config.json: 0.00B [00:00, ?B/s]

spiece.model:   0%|          | 0.00/792k [00:00<?, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json: 0.00B [00:00, ?B/s]

config.json:   0%|          | 0.00/662 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/3.13G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/147 [00:00<?, ?B/s]

In [96]:
def generate_answer(question, context=None):
    # Optional: provide extra context (e.g. document or FAQ paragraph)
    if context:
        prompt = f"Context: {context}\nQuestion: {question}\nAnswer:"
    else:
        prompt = f"Question: {question}\nAnswer:"

    inputs = tokenizer(prompt, return_tensors="pt", truncation=True)

    outputs = model.generate(
        **inputs,
        max_length=512,
        temperature=0.7,
        top_p=0.9,
        do_sample=True,
        num_return_sequences=1,
    )
    return tokenizer.decode(outputs[0], skip_special_tokens=True)


In [97]:
# Try asking dynamic questions
#print(generate_answer("what is your name?"))
#print(generate_answer("What is the capital of India"))
print(generate_answer("How can I get a refund on my order?"))


You can return the product within 30 days from receipt of your order for a full refund.


#Build a Simple RAG Pipeline with MiniLM + Flan-T5

In [76]:
!pip install -q transformers sentence-transformers


[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m363.4/363.4 MB[0m [31m1.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m13.8/13.8 MB[0m [31m75.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m24.6/24.6 MB[0m [31m53.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m883.7/883.7 kB[0m [31m35.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m664.8/664.8 MB[0m [31m1.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m211.5/211.5 MB[0m [31m6.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m56.3/56.3 MB[0m [31m13.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m127.9/127.9 MB[0m [31m7.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

In [98]:
# Sample documents (can be FAQs, docs, website text)
documents = [
    "To reset your password, go to the login page and click on 'Forgot Password'.",
    "For refund requests, please contact our support team within 30 days of purchase.",
    "You can upgrade your plan by going to your account settings.",
    "Support is available 24/7 through email and live chat."
]


In [99]:
from sentence_transformers import SentenceTransformer
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity

retriever = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
doc_embeddings = retriever.encode(documents)


In [100]:
def retrieve_context(user_question, k=1):
    query_embedding = retriever.encode([user_question])
    sims = cosine_similarity(query_embedding, doc_embeddings)[0]
    top_k_idx = sims.argsort()[-k:][::-1]
    top_k_contexts = [documents[i] for i in top_k_idx]
    return " ".join(top_k_contexts)


In [101]:
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM

model_name = "google/flan-t5-large"
tokenizer = AutoTokenizer.from_pretrained(model_name)
generator = AutoModelForSeq2SeqLM.from_pretrained(model_name)

def generate_answer(question):
    context = retrieve_context(question)
    prompt = f"Context: {context}\nQuestion: {question}\nAnswer:"

    inputs = tokenizer(prompt, return_tensors="pt", truncation=True)
    outputs = generator.generate(
        **inputs,
        max_length=128,
        temperature=0.7,
        top_p=0.9,
        do_sample=True,
        num_return_sequences=1,
    )

    return tokenizer.decode(outputs[0], skip_special_tokens=True)


In [102]:
print(generate_answer("How can I upgrade my subscription?"))
#print(generate_answer("How can I upgrade my subscription?"))


by going to your account settings
