## Model: distilgpt2

In [1]:
%pip install pinecone-client==3.0.0 --quiet

Note: you may need to restart the kernel to use updated packages.


In [2]:
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
from sentence_transformers import SentenceTransformer
from dotenv import load_dotenv

from dotenv import load_dotenv

from nltk.tokenize import word_tokenize
from nltk.stem import WordNetLemmatizer
from nltk.tokenize import sent_tokenize

from langchain_core.documents.base import Document
from langchain_text_splitters.character import RecursiveCharacterTextSplitter

from pinecone import Pinecone, ServerlessSpec
import os
import re
import logging
import pandas as pd
import nltk

In [3]:
load_dotenv()
logging.basicConfig(level=logging.INFO)
PINECONE_API_KEY = os.getenv("PINECONE_API_KEY")
PINECONE_INDEX_NAME = "youtube-transcripts"

In [4]:
embedder = SentenceTransformer("all-MiniLM-L6-v2")
tokenizer = AutoTokenizer.from_pretrained("google/flan-t5-large")
model = AutoModelForSeq2SeqLM.from_pretrained("google/flan-t5-large")

INFO:sentence_transformers.SentenceTransformer:Use pytorch device_name: cpu
INFO:sentence_transformers.SentenceTransformer:Load pretrained SentenceTransformer: all-MiniLM-L6-v2


In [5]:
from pinecone import Pinecone

pc = Pinecone(api_key=PINECONE_API_KEY)
index = pc.Index(PINECONE_INDEX_NAME)

In [6]:
def retrieve_chunks(query, top_k=10):
    query_embedding = embedder.encode(query, show_progress_bar=False).tolist()
    results = index.query(vector=query_embedding, top_k=top_k, include_metadata=True)
    seen = set()
    filtered = []
    for match in results["matches"]:
        text = match["metadata"].get("text", "").strip().lower()
        if text and text not in seen and len(text.split()) > 10:
            seen.add(text)
            filtered.append(match["metadata"]["text"])
    return filtered

In [9]:
# Example: set your query string here if not already defined
query = "What is ITSM in ServiceNow?"

query_keywords = query.lower().split()
chunks = retrieve_chunks(query)
chunks = [c for c in chunks if any(kw in c.lower() for kw in query_keywords)]

In [10]:
chunks = retrieve_chunks(query)
print("\n🔍 Retrieved Context Chunks:\n" + "-"*50)
for c in chunks:
    print(c[:250] + "\n" + "-"*50)


🔍 Retrieved Context Chunks:
--------------------------------------------------
the one used by servicenow to begin this process amelia paced json code shes copied from pingdum certain mandatory field must be configured at a minimum amelia finish up by configuring any additional field mapping needed based on what pingdum is prov
--------------------------------------------------
sure so a i said service operation work face which strongly recommend that you use service operation work face if you would like to use task intelligence for itsm because you would be able to use the complete power of both the product in service oper
--------------------------------------------------
available for service operation workspace so if you could repeat maybe what you said about word available sure so a i said service operation work face which strongly recommend that you use service operation work face if you would like to use task int
--------------------------------------------------
evans from the

In [11]:
def summarize_chunks(chunks, query):
    combined = " ".join(chunks)
    prompt = (
        f"You are an expert in ServiceNow. Based on the content below, answer the question in a well-structured, professional way.\n\n"
        f"Question: {query}\n\n"
        f"Context:\n{combined}\n\n"
        f"Answer (in 4-6 detailed sentences):"
    )
    inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=1024)
    outputs = model.generate(**inputs, max_new_tokens=400)
    summary = tokenizer.decode(outputs[0], skip_special_tokens=True)
    return summary

In [12]:
def generate_answer_from_summary(query, summary):
    prompt = (
        "You are a professional ServiceNow consultant. "
        "Based on the summary below, answer the question in a clear, complete, and professional tone. "
        "Your answer should be at least 4–6 sentences and include key terminology or examples when applicable.\n\n"
        f"Summary:\n{summary}\n\n"
        f"Question: {query}\nAnswer:"
    )
    inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=1024)
    outputs = model.generate(**inputs, max_new_tokens=300)
    return tokenizer.decode(outputs[0], skip_special_tokens=True)

In [13]:
test_queries = [
    "What is ITSM in ServiceNow?",
    "Explain CMDB relationships.",
    "How does Incident Management work?",
    "what are the AI capabilities in ServiceNow?"
]

In [14]:
for query in test_queries:
    try:
        chunks = retrieve_chunks(query)
        if not chunks:
            print(f"⚠️ No relevant context found for query: {query}\n")
            continue

        answer = summarize_chunks(chunks, query)
        print(f"Q: {query}\nA: {answer}\n{'='*80}\n")

    except Exception as e:
        print(f"❌ Error for query '{query}': {e}")

Q: What is ITSM in ServiceNow?
A: event management

Q: Explain CMDB relationships.
A: data fabric and the rapridb

Q: How does Incident Management work?
A: the key is being proactive versus reactive and having the right supporting system to be able to achieve that in part one we saw how an issue with a company missioncritical internal ordering service wa causing a flood of new ticket in our service engineer queue we then showed how in parallel the operation engineer wa able to quickly determine probable root cause of the outage and trigger a service

Q: what are the AI capabilities in ServiceNow?
A: event apps and ai agent to optimize business process resulting in more efficient operation and better employee productivity

