In [1]:
# Install required libraries
!pip install faiss-cpu sentence-transformers
!pip install transformers accelerate
# Import required libraries
import faiss
import json
import pandas as pd
from sentence_transformers import SentenceTransformer
import numpy as np
from huggingface_hub import login
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline


Collecting faiss-cpu
  Downloading faiss_cpu-1.10.0-cp310-cp310-manylinux_2_28_x86_64.whl.metadata (4.4 kB)
Downloading faiss_cpu-1.10.0-cp310-cp310-manylinux_2_28_x86_64.whl (30.7 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m30.7/30.7 MB[0m [31m61.1 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: faiss-cpu
Successfully installed faiss-cpu-1.10.0


**FAISS for Vector Search**
This section embeds documents and uses FAISS for similarity search in our RAG pipeline.
The data that we are using to train the RAG is set of 1000 blogs written by Seth Godin. I have generated this dataset which updates every month with the help of a Kaggle scheduled runs of python scripts.

**Data Cleaning**

In [2]:
csv_url = "https://raw.githubusercontent.com/AnantShinde/sethsblog_genai/main/blog_posts_df_wo_null_val_1_to_1000.csv"
df = pd.read_csv(csv_url)
df = df.dropna(subset=["Content"])  # Remove rows without content

**Reorganize data**

Rearrange data to hold title and content pairs

In [3]:
# Combine Title + Content for better semantic representation
documents = (df['Title'].fillna('') + ". " + df['Content'].fillna('')).tolist()

**Generate embeddings**

In [4]:
# Load the embedding model
embedder = SentenceTransformer('all-MiniLM-L6-v2')

modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/10.5k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

In [5]:
model = SentenceTransformer('all-MiniLM-L6-v2')
embeddings = model.encode(documents, show_progress_bar=True)

Batches:   0%|          | 0/32 [00:00<?, ?it/s]

**Rank Embeddings**

Creating a FAISS index for fast vector similarity search — which is a core step in a RAG (Retrieval-Augmented Generation) pipeline

In [6]:
embedding_dim = embeddings.shape[1]
index = faiss.IndexFlatL2(embedding_dim)
index.add(np.array(embeddings))

**Search relevant blogs**

Define a function to search top 5 most similar blog to the query and return
1. Title
2. Content
3. Date

In [7]:
def search_blog(query, k=5):
    query_embedding = embedder.encode([query])  # ✅ CORRECT — 'embedder' is your sentence-transformer
#    query_embedding = model.encode([query])
    distances, indices = index.search(query_embedding, k)
    results = df.iloc[indices[0]]
    return results[['Title', 'Content', 'Date']]

# Example use case
Demonstrate output of the search_blog fucntion

In [8]:
search_blog("Marketing insights about human behavior")

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Unnamed: 0,Title,Content,Date
949,But it’s not that hard!,Mark Hurstsent me over to:tangentialism: Shake...,"July 20, 2005"
874,What Every Good Marketer Knows,“Godin reinforces what good marketers know.”Th...,
740,The Tolstoy Rule,All marketing failures are alike; every market...,"March 3, 2005"
126,Marketers are lying scum,Here’s an interview I did with India Times. I ...,
900,Buzz marketing,Ron McDaniel would like you to check out:Buzz ...,


**Setup Llama model for text generation**

In [9]:
# Load LLaMA 3 
from huggingface_hub import login
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline

# Load HF token
with open("/kaggle/input/hf-rag-key/HF_RAG_KEY.txt", "r") as f:
    hf_token = f.read().strip()

login(token=hf_token)

# Load LLaMA model
#model_id = "meta-llama/Meta-Llama-3-8B"
#tokenizer = AutoTokenizer.from_pretrained(model_id, token=hf_token)
#model = AutoModelForCausalLM.from_pretrained(model_id, token=hf_token)
#llama_chat = pipeline("text-generation", model=model, tokenizer=tokenizer)

In [10]:
# Load Timy LLaMA to work with limited memory 
model_id = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
tokenizer = AutoTokenizer.from_pretrained(model_id, token=hf_token)
model = AutoModelForCausalLM.from_pretrained(model_id, token=hf_token)
llama_chat = pipeline("text-generation", model=model, tokenizer=tokenizer)

tokenizer_config.json:   0%|          | 0.00/1.29k [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/500k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.84M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/551 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/608 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/2.20G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/124 [00:00<?, ?B/s]

Device set to use cuda:0


**A function to generate a response in chatbot**

In [11]:
def ask_with_llama(query, k=3):
    context_df = search_blog(query, k)
    context = ""
    for _, row in context_df.iterrows():
        context += f"Title: {row['Title']}\nContent: {row['Content']}\nDate: {row['Date']}\n\n"

    prompt = f"""Use the blog excerpts below to answer the question.

Context:
********************************************
{context}
********************************************
Question: {query}
Answer:"""
    response = llama_chat(prompt, max_new_tokens=300, do_sample=True, temperature=0.7)
    return response[0]['generated_text']

In [12]:
#!pip install streamlit

In [13]:
#import streamlit as st

#st.title("🧠 Seth Godin Blog Chatbot")
#st.write("Ask a question, and I'll answer using Seth's blog posts!")

#user_query = st.text_input("Your question:", placeholder="e.g., What does Seth say about fear and creativity?")

#if user_query:
 #   with st.spinner("Thinking..."):
  #      answer = ask_with_llama(user_query)
   #     st.subheader("📘 Answer")
    #    st.write(answer)

In [14]:
user_query = "What does Seth say about fear and creativity?"
answer = ask_with_llama(user_query)
print(answer)

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Use the blog excerpts below to answer the question.

Context:
********************************************
Title: “I just write this stuff down”
Content: A new interview for you.One Degree – Five Questions For Seth Godin.
Date: nan

Title: [the] Jason Murphy Show reviews Free Prize Inside
Content: …Once Seth has established the importance of a Free Prize he then describes how to Sell The Idea, or present it, to the company. This isn’t just your boss or the suits in the boardroom. This includes your whole company. Your boss, your peers, your reports, your department, your other department, the marketing team, the frontline…everybody. He has a long list of example Tactics that he gives to help you do just that. In my opinion, this is the strongest and most helpful chapter of the book. Everything from the Really Bad Powerpoint to the Painting a Portrait. Seth knocks out a lot of myths about idea presentations and hones in an the most overlooked and underestimated tactics of getting your p