## Model: distilgpt2

In [7]:
import os
import logging
import requests
from pinecone import Pinecone
from dotenv import load_dotenv
from sentence_transformers import SentenceTransformer
from langchain.prompts import PromptTemplate
from transformers import pipeline

verify Pinecone integration

In [None]:
import os
import logging
import requests
from pinecone import Pinecone
from dotenv import load_dotenv
from sentence_transformers import SentenceTransformer
from langchain.prompts import PromptTemplate
from transformers import pipeline
# Set up logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

def verify_pinecone_vectors(index_name='youtube-transcripts', expected_count=615):
    """Verify vectors in Pinecone index."""
    load_dotenv()
    pinecone_api_key = os.getenv('PINECONE_API_KEY')
    if not pinecone_api_key:
        logging.error('PINECONE_API_KEY not found in .env')
        raise ValueError('PINECONE_API_KEY not found')

    # Initialize Pinecone
    pc = Pinecone(api_key=pinecone_api_key)
    if index_name not in [idx['name'] for idx in pc.list_indexes()]:
        logging.error(f'Index {index_name} not found')
        raise ValueError(f'Index {index_name} not found')

    # Connect to index
    index = pc.Index(index_name)
    
    # Get index stats
    stats = index.describe_index_stats()
    total_vectors = stats['total_vector_count']
    
    # Verify vector count
    logging.info(f'Total vectors in index {index_name}: {total_vectors}')
    if total_vectors == expected_count:
        logging.info(f'Verification successful: {total_vectors} vectors match expected count {expected_count}')
    else:
        logging.warning(f'Verification failed: Found {total_vectors} vectors, expected {expected_count}')

    # Sample a few vector IDs to confirm format
    sample_ids = [f'{i}_0' for i in range(1, 6)]  # Check first chunk of videos 1-5
    for sample_id in sample_ids:
        try:
            result = index.fetch(ids=[sample_id])
            if sample_id in result['vectors']:
                logging.info(f'Found vector for chunk_id {sample_id}')
            else:
                logging.warning(f'No vector found for chunk_id {sample_id}')
        except Exception as e:
            logging.error(f'Error fetching chunk_id {sample_id}: {e}')

    return total_vectors

# Run verification
verify_pinecone_vectors()

2025-06-26 14:13:27,125 - INFO - Total vectors in index youtube-transcripts: 615
2025-06-26 14:13:27,125 - INFO - Verification successful: 615 vectors match expected count 615
2025-06-26 14:13:27,326 - INFO - Found vector for chunk_id 1_0
2025-06-26 14:13:27,528 - INFO - Found vector for chunk_id 2_0
2025-06-26 14:13:27,707 - INFO - Found vector for chunk_id 3_0
2025-06-26 14:13:27,887 - INFO - Found vector for chunk_id 4_0
2025-06-26 14:13:28,065 - INFO - Found vector for chunk_id 5_0


615

Implement Retrieval-Augmented Generation (RAG) Pipeline

In [None]:

# Set up logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

def retrieve_chunks(query, index_name='youtube-transcripts', top_k=5):
    """Retrieve top-k transcript chunks from Pinecone."""
    load_dotenv()
    pinecone_api_key = os.getenv('PINECONE_API_KEY')
    if not pinecone_api_key:
        logging.error('PINECONE_API_KEY not found in .env')
        raise ValueError('PINECONE_API_KEY not found')

    # Initialize Pinecone and SentenceTransformer
    pc = Pinecone(api_key=pinecone_api_key)
    index = pc.Index(index_name)
    embedder = SentenceTransformer('all-MiniLM-L6-v2')

    # Encode query
    query_embedding = embedder.encode(query, show_progress_bar=False).tolist()

    # Query Pinecone
    results = index.query(vector=query_embedding, top_k=top_k, include_metadata=True)
    chunks = [match['metadata']['text'] for match in results['matches']]
    
    logging.info(f'Retrieved {len(chunks)} chunks for query: {query}')
    return chunks

In [10]:
def generate_response(query, chunks):
    """Generate response using retrieved chunks and a lightweight LLM."""
    # Combine chunks into context
    context = "\n".join(chunks) if chunks else "No relevant information found."
    
    # Define prompt template
    prompt_template = PromptTemplate(
        input_variables=["context", "query"],
        template="Based on the following context, answer the query concisely:\nContext: {context}\nQuery: {query}\nAnswer:"
    )
    
    # Format prompt
    prompt = prompt_template.format(context=context, query=query)
    
    # Placeholder for lightweight LLM (e.g., Grok via xAI API)
    # Replace with actual API call if available, or use HuggingFace model
    try:
        llm = pipeline('text-generation', model='distilgpt2', device=-1)
        response = llm(prompt, max_length=150, truncation=True, do_sample=True, num_return_sequences=1)[0]['generated_text']
        answer = response.split('Answer:')[-1].strip() if 'Answer:' in response else response.strip()
    except Exception as e:
        logging.error(f'LLM request failed: {e}')
        answer = "Error generating response. Using context directly:\n" + context[:200]
    
    logging.info(f'Generated response for query: {query}')
    return answer

# Example usage
query = "What is an AI agent in ServiceNow?"
chunks = retrieve_chunks(query)
response = generate_response(query, chunks)
print(f"Query: {query}")
print(f"Response: {response}")

2025-06-26 14:13:28,136 - INFO - Use pytorch device_name: cpu
2025-06-26 14:13:28,137 - INFO - Load pretrained SentenceTransformer: all-MiniLM-L6-v2
2025-06-26 14:13:30,870 - INFO - Retrieved 5 chunks for query: What is an AI agent in ServiceNow?
Device set to use cpu
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Both `max_new_tokens` (=256) and `max_length`(=150) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)
2025-06-26 14:13:46,144 - INFO - Generated response for query: What is an AI agent in ServiceNow?


Query: What is an AI agent in ServiceNow?
Response: that we are going to go the other direction, maybe you can have some kind of ai agent in your team to help you figure out what to do with that.
So let me just say that you are interested in working with ai agency in this area and you are a really strong supporter of the A-Team. There are a number of organizations that have been involved with the A-Team and we all want to see some of our new A-Team members and they are very excited to work with us.
So it is truly interesting to see how this can be a really good way for us to get more people to come and work with us.
Now that we have been getting a lot of people involved in the A-Team there is a lot of interest in the A-Team. We have the backing of a few people who are really passionate about this and we are looking forward to working with them as well.
The A-Team has been working on a lot of A-Team members and it seems to be the very first step in understanding the A-Team and the A-Tea

Built Chatbot interface

In [13]:
test_queries = [
    "What is ITSM in ServiceNow?",
    "Explain CMDB relationships.",
    "How does Incident Management work?"
]

for query in test_queries:
    chunks = retrieve_chunks(query)
    result = generate_response(query, chunks)
    print(f"Q: {query}\nA: {result}\n")

2025-06-26 14:14:57,731 - INFO - Use pytorch device_name: cpu
2025-06-26 14:14:57,734 - INFO - Load pretrained SentenceTransformer: all-MiniLM-L6-v2


2025-06-26 14:14:59,910 - INFO - Retrieved 5 chunks for query: What is ITSM in ServiceNow?
Device set to use cpu
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Both `max_new_tokens` (=256) and `max_length`(=150) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)
2025-06-26 14:15:13,096 - INFO - Generated response for query: What is ITSM in ServiceNow?
2025-06-26 14:15:13,147 - INFO - Use pytorch device_name: cpu
2025-06-26 14:15:13,148 - INFO - Load pretrained SentenceTransformer: all-MiniLM-L6-v2


Q: What is ITSM in ServiceNow?
A: it is a command line system and it is easy to use for this.
So if you want to use service operation work face if you want to use service operation work face if you want to use service operation work face if you would like to use service operation work face if you would like to use service operation work face if you would like to use service operation work face if you would like to use service operation work face if you would like to use service operation work face if you would like to use service operation work face if you would like to use service operation work face if you would like to use service operation work face if you would like to use service operation work face if you would like to use service operation work face if you would like to use service operation work face if you would like to use service operation work face if you would like to use service operation work face if you would like to use service operation work face if you would like to

2025-06-26 14:15:15,839 - INFO - Retrieved 5 chunks for query: Explain CMDB relationships.
Device set to use cpu
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Both `max_new_tokens` (=256) and `max_length`(=150) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)
2025-06-26 14:15:30,655 - INFO - Generated response for query: Explain CMDB relationships.
2025-06-26 14:15:30,704 - INFO - Use pytorch device_name: cpu
2025-06-26 14:15:30,705 - INFO - Load pretrained SentenceTransformer: all-MiniLM-L6-v2


Q: Explain CMDB relationships.
A: b this approach can still work right i mean they dont have to be bound down to say that you know i need to get my cmdb like 70 to ready state exactly it pretty much decoupled yeah and i think i mentioned this very briefly in the beginning that whole data strategy the service now ha around work for data fabric and the rapridb which is kind of pretty advanced way of ive been talking about it this past week, i think i mentioned this very briefly in the beginning that whole data strategy the service now ha around work for data fabric and the rapridb which is kind of pretty advanced way of ive been talking about it this past week, i think i mentioned this very briefly in the beginning that whole data strategy the service now ha around work for data fabric and the rapridb which is kind of pretty advanced way of ive been talking about it this past week, i think i mentioned this very briefly in the beginning that whole data strategy the service now ha around w

2025-06-26 14:15:33,689 - INFO - Retrieved 5 chunks for query: How does Incident Management work?
Device set to use cpu
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Both `max_new_tokens` (=256) and `max_length`(=150) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)
2025-06-26 14:15:48,844 - INFO - Generated response for query: How does Incident Management work?


Q: How does Incident Management work?
A: knowledge that they already have.
You also mentioned that a local solution which uses a local service provider with the same customer base is a great solution that can be used for multiple business processes.
To summarize, this solution will be more than just one. It will be a great solution that will solve the biggest problem of all for you and your team and the team. You will continue to be able to get the whole team involved and if you can find something that will help you get the whole business involved and if you can help with the solution, you can be the best solution in the world.
Now that you‡ have been through that journey and you are ready to head back to your normal role of business management you will be looking back to the work you did for the first time, your next step is to get started and start looking forward to making your work more manageable.
Now that you have been through that journey and you are ready to head back to your n