#NLP Analysis of the Book of Isaiah: Techniques and Applications



In [None]:
# .env.example
# Copy this file to .env and fill in your credentials.
# NEVER commit your real .env to version control.

API_KEY=YOUR_API_KEY
BIBLE_ID=de4e12af7f28f599-02
BASE_URL=https://api.scripture.api.bible/v1/bibles

GEMINI_API_KEY=YOUR_GEMINI_KEY


Install Required Packages

In [None]:
# Uninstall conflicting packages
!pip uninstall -y numpy gensim scipy -q

# Install required packages with specific versions
!pip install -q python-dotenv requests pandas sentence-transformers umap-learn plotly spacy vaderSentiment beautifulsoup4 transformers torch numpy==1.26.4 matplotlib tqdm scikit-learn gensim==4.3.2 scipy==1.11.4 seaborn google-generativeai==0.8.4

# Install spaCy language model
!python -m spacy download en_core_web_sm -q

# Verify installations (optional)
!pip list | grep -E 'numpy|gensim|scipy|google-generativeai|spacy'

# Restart the kernel to reload dependencies
import os
os._exit(00)

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m61.0/61.0 kB[0m [31m738.2 kB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m60.4/60.4 kB[0m [31m1.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m18.3/18.3 MB[0m [31m53.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m26.7/26.7 MB[0m [31m14.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m36.4/36.4 MB[0m [31m29.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m126.0/126.0 kB[0m [31m7.0 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m363.4/363.4 MB[0m [31m1.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m13.8/13.8 MB[0m [31m88.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

In [None]:
# Environment and API
import os
from dotenv import load_dotenv
import requests
import time
import concurrent.futures

# Text Processing
import re
from bs4 import BeautifulSoup
import spacy

# Data Handling
import pandas as pd
import numpy as np
from tqdm import tqdm

# NLP and Machine Learning
from transformers import BertTokenizer, BertModel
from transformers import BartTokenizer, BartForConditionalGeneration
import torch
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.decomposition import LatentDirichletAllocation
from sklearn.feature_extraction import text
from sentence_transformers import SentenceTransformer
from gensim import corpora
from gensim.models import CoherenceModel

# Google Gemini SDK
import google.generativeai as genai

Load Environment Variables

In [None]:
load_dotenv()
API_KEY = os.getenv("API_KEY")
BIBLE_ID = os.getenv("BIBLE_ID")
BASE_URL = os.getenv("BASE_URL")
GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")

API Ingestion Functions

In [None]:
# API headers
headers = {"api-key": API_KEY}

# Fetch books from API
def get_books():
    url = f"{BASE_URL}/{BIBLE_ID}/books"
    response = requests.get(url, headers=headers)
    response.raise_for_status()
    return response.json()["data"]

# Fetch chapters for a book
def get_chapters(book_id):
    url = f"{BASE_URL}/{BIBLE_ID}/books/{book_id}/chapters"
    response = requests.get(url, headers=headers)
    response.raise_for_status()
    return response.json()["data"]

# Fetch verses for a chapter
def get_verses(chapter_id):
    url = f"{BASE_URL}/{BIBLE_ID}/chapters/{chapter_id}/verses"
    response = requests.get(url, headers=headers)
    response.raise_for_status()
    return response.json()["data"]

# Fetch verse details
def get_verse_detail(verse_id):
    url = f"{BASE_URL}/{BIBLE_ID}/verses/{verse_id}"
    response = requests.get(url, headers=headers)
    response.raise_for_status()
    return response.json()["data"]

# Ingest Isaiah verses
def ingest_isaiah():
    books = get_books()
    isaiah_book = next((book for book in books if "isaiah" in book["name"].lower()), None)
    if not isaiah_book:
        raise ValueError("Book of Isaiah not found.")
    isaiah_book_id = isaiah_book["id"]
    print(f"Found Isaiah with book ID: {isaiah_book_id}")
    chapters = get_chapters(isaiah_book_id)
    print(f"Found {len(chapters)} chapters in Isaiah.")
    isaiah_verses = []
    for chapter in chapters:
        chapter_id = chapter["id"]
        print(f"Processing Chapter: {chapter.get('reference', chapter_id)}")
        verses = get_verses(chapter_id)
        for verse in verses:
            verse_id = verse["id"]
            verse_detail = get_verse_detail(verse_id)
            reference = verse_detail.get("reference", "")
            parts = reference.split()
            if len(parts) >= 2:
                book_name = parts[0]
                chapter_verse = parts[1]
                if ":" in chapter_verse:
                    chapter_num, verse_num = chapter_verse.split(":")
                else:
                    chapter_num, verse_num = chapter_verse, ""
            else:
                book_name, chapter_num, verse_num = "", "", ""
            isaiah_verses.append({
                "book": book_name,
                "chapter": chapter_num,
                "verse": verse_num,
                "text": verse_detail.get("content", "")
            })
            time.sleep(0.1)  # Rate limiting
    print(f"Ingested {len(isaiah_verses)} verses from Isaiah.")
    return isaiah_verses

Ingest verses

In [None]:
verses = ingest_isaiah()
df_verses = pd.DataFrame(verses)

Found Isaiah with book ID: ISA
Found 67 chapters in Isaiah.
Processing Chapter: Isaiah
Processing Chapter: Isaiah 1
Processing Chapter: Isaiah 2
Processing Chapter: Isaiah 3
Processing Chapter: Isaiah 4
Processing Chapter: Isaiah 5
Processing Chapter: Isaiah 6
Processing Chapter: Isaiah 7
Processing Chapter: Isaiah 8
Processing Chapter: Isaiah 9
Processing Chapter: Isaiah 10
Processing Chapter: Isaiah 11
Processing Chapter: Isaiah 12
Processing Chapter: Isaiah 13
Processing Chapter: Isaiah 14
Processing Chapter: Isaiah 15
Processing Chapter: Isaiah 16
Processing Chapter: Isaiah 17
Processing Chapter: Isaiah 18
Processing Chapter: Isaiah 19
Processing Chapter: Isaiah 20
Processing Chapter: Isaiah 21
Processing Chapter: Isaiah 22
Processing Chapter: Isaiah 23
Processing Chapter: Isaiah 24
Processing Chapter: Isaiah 25
Processing Chapter: Isaiah 26
Processing Chapter: Isaiah 27
Processing Chapter: Isaiah 28
Processing Chapter: Isaiah 29
Processing Chapter: Isaiah 30
Processing Chapter: Is

Text Cleaning

In [None]:
# Load spaCy model for text cleaning
nlp = spacy.load("en_core_web_sm")

# Clean text from HTML and preprocess
def enhanced_clean_text(html_text):
    if not isinstance(html_text, str):
        return ""
    soup = BeautifulSoup(html_text, "html.parser")
    text = soup.get_text(separator=" ")
    text = re.sub(r'\s+', ' ', text)
    text = re.sub(r'\d+', '', text)
    text = re.sub(r'[^\w\s]', '', text)
    text = text.lower().strip()
    doc = nlp(text)
    tokens = [token.lemma_ for token in doc if not token.is_stop and token.is_alpha]
    return " ".join(tokens).strip()

# Apply cleaning
df_verses["text"] = df_verses["text"].apply(enhanced_clean_text)
df_verses = df_verses.dropna(subset=['book'])
print("Preview of processed data:")
print(df_verses[["book", "chapter", "verse", "text"]].head(3))
df_verses.to_csv("isaiah_cleaned_text.csv", index=False)

Preview of processed data:
     book chapter verse                                               text
0                                                      book prophet isaiah
1  Isaiah       1     1  vision isaiah son amoz see concern judah jerus...
2  Isaiah       1     2  hear o heaven ear o earth lord hath speak nour...


Word Embeddings

In [None]:
# Set up BERT for embeddings
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
model = BertModel.from_pretrained('bert-base-uncased').to(device)
model.eval()

# Generate word embeddings
def get_word_embeddings(text, max_length=128):
    inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True, max_length=max_length).to(device)
    with torch.no_grad():
        outputs = model(**inputs)
    embeddings = outputs.last_hidden_state[0]
    tokens = tokenizer.convert_ids_to_tokens(inputs["input_ids"][0])
    word_embeddings = []
    current_word = ""
    current_emb = None
    for token, emb in zip(tokens, embeddings):
        if token.startswith("##"):
            current_word += token[2:]
            if current_emb is not None:
                current_emb = (current_emb + emb) / 2
        else:
            if current_word and current_emb is not None:
                word_embeddings.append((current_word, emb.cpu().numpy()))
            current_word = token
            current_emb = emb
    if current_word and current_emb is not None:
        word_embeddings.append((current_word, current_emb.cpu().numpy()))
    return word_embeddings

print("Generating word embeddings for Isaiah verses...")
all_word_embeddings = [get_word_embeddings(text) for text in tqdm(df_verses["text"].tolist())]

# Average embeddings per word
word_to_embeddings = {}
for verse_idx, verse_embs in enumerate(all_word_embeddings):
    for word, emb in verse_embs:
        if word not in word_to_embeddings:
            word_to_embeddings[word] = []
        word_to_embeddings[word].append((verse_idx, emb))
word_embeddings_avg = {word: np.mean([emb for _, emb in embs], axis=0) for word, embs in word_to_embeddings.items()}
print("Average word embeddings calculated.")

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/440M [00:00<?, ?B/s]

Generating word embeddings for Isaiah verses...


100%|██████████| 1293/1293 [00:12<00:00, 104.77it/s]


Average word embeddings calculated.


Clustering and Visualization

In [None]:
# Prepare word embeddings for clustering
words = list(word_embeddings_avg.keys())
embeddings = np.array([word_embeddings_avg[word] for word in words])

# Step 1: Compute WCSS for a range of cluster numbers to find the optimal k
k_range = range(2, 16)  # Test 2 to 15 clusters
wcss = []
for k in k_range:
    kmeans = KMeans(n_clusters=k, random_state=42, n_init=10)
    kmeans.fit(embeddings)
    wcss.append(kmeans.inertia_)
    print(f"WCSS for k={k}: {wcss[-1]:.2f}")

# Step 2: Plot the Elbow Curve to identify the optimal k
plt.figure(figsize=(8, 5))
plt.plot(k_range, wcss, marker='o')
plt.xlabel('Number of Clusters (k)')
plt.ylabel('Within-Cluster Sum of Squares (WCSS)')
plt.title('Elbow Method for Optimal Number of Clusters')
plt.savefig('elbow_curve.png', dpi=300)
plt.close()
print("Elbow curve plot saved to 'elbow_curve.png'")

# Step 3: Select the optimal number of clusters (elbow point)
diff = np.diff(wcss)  # First derivative
diff2 = np.diff(diff)  # Second derivative
optimal_k = list(k_range)[np.argmax(diff2) + 1]  # Elbow point
print(f"Optimal number of clusters: {optimal_k}")

# Step 4: Perform K-Means clustering with the optimal k
kmeans = KMeans(n_clusters=optimal_k, random_state=42, n_init=10)
clusters = kmeans.fit_predict(embeddings)

# Step 5: Output the top 10 words per cluster based on proximity to centroid
print("\nTop 10 words per cluster (closest to centroid):")
cluster_centers = kmeans.cluster_centers_
for cluster_id in range(optimal_k):
    # Get indices of words in this cluster
    cluster_indices = np.where(clusters == cluster_id)[0]
    cluster_words = [words[i] for i in cluster_indices]
    cluster_embeddings = embeddings[cluster_indices]

    # Compute distances to the cluster centroid
    centroid = cluster_centers[cluster_id]
    distances = np.linalg.norm(cluster_embeddings - centroid, axis=1)

    # Sort words by distance (ascending) and take top 10
    sorted_indices = np.argsort(distances)
    top_words = [cluster_words[i] for i in sorted_indices[:10]]

    print(f"Cluster {cluster_id}: {', '.join(top_words)}")

# Step 6: Reduce to 3D with UMAP and visualize
reducer = umap.UMAP(n_components=3, random_state=42, n_jobs=1)
embeddings_3d = reducer.fit_transform(embeddings)
df_vis = pd.DataFrame({'word': words, 'x': embeddings_3d[:, 0], 'y': embeddings_3d[:, 1], 'z': embeddings_3d[:, 2], 'cluster': clusters})
fig = plt.figure(figsize=(12, 10))
ax = fig.add_subplot(111, projection='3d')
cmap = plt.cm.get_cmap('tab10', optimal_k)
for cluster_id in range(optimal_k):
    cluster_points = df_vis[df_vis['cluster'] == cluster_id]
    ax.scatter(cluster_points['x'], cluster_points['y'], cluster_points['z'], label=f'Cluster {cluster_id}', s=15, color=cmap(cluster_id))
ax.set_xlim(df_vis['x'].min(), df_vis['x'].max())
ax.set_ylim(df_vis['y'].min(), df_vis['y'].max())
ax.set_zlim(df_vis['z'].min(), df_vis['z'].max())
ax.set_xlabel('UMAP1')
ax.set_ylabel('UMAP2')
ax.set_zlabel('UMAP3')
ax.set_title(f'3D UMAP of Word Clusters (k={optimal_k})')
ax.view_init(elev=15, azim=45)
ax.dist = 8
ax.legend(loc='upper left', bbox_to_anchor=(1.05, 1))
plt.savefig('isaiah_clusters_zoomed.png', dpi=300, bbox_inches='tight')
plt.close()
print("3D UMAP visualization saved to 'isaiah_clusters_zoomed.png'")

WCSS for k=2: 154200.89
WCSS for k=3: 144756.42
WCSS for k=4: 139926.20
WCSS for k=5: 135475.44
WCSS for k=6: 132080.62
WCSS for k=7: 129706.39
WCSS for k=8: 127501.47
WCSS for k=9: 126340.32
WCSS for k=10: 125626.02
WCSS for k=11: 123430.36
WCSS for k=12: 123310.77
WCSS for k=13: 122242.59
WCSS for k=14: 121852.41
WCSS for k=15: 120142.55
Elbow curve plot saved to 'elbow_curve.png'
Optimal number of clusters: 3

Top 10 words per cluster (closest to centroid):
Cluster 0: forth, away, water, place, land, people, way, hand, work, mountain
Cluster 1: prevail, [SEP], heal, blaspheme, gomorrah, bank, languish, rereward, immanuel, knee
Cluster 2: tongue, fear, isle, form, help, hungry, shame, stream, rebuke, bowel


  cmap = plt.cm.get_cmap('tab10', optimal_k)


3D UMAP visualization saved to 'isaiah_clusters_zoomed.png'


TF-IDF and LDA

In [None]:

# Define stop words
default_stop_words = text.ENGLISH_STOP_WORDS
custom_stop_words = list(default_stop_words.union({"shall", "ye", "thee", "thy", "thou", "unto", "hath"}))

# Set up CountVectorizer with custom stop words
vectorizer = CountVectorizer(stop_words=custom_stop_words, max_df=0.8, min_df=2)
dtm = vectorizer.fit_transform(df_verses['text'].tolist())

# Prepare texts and dictionary for coherence calculation
texts = [text.split() for text in df_verses['text']]
from gensim import corpora
dictionary = corpora.Dictionary(texts)

# Function to get top words for each topic
def get_top_words(lda_model, feature_names, N=10):
    top_words = []
    for topic in lda_model.components_:
        top_indices = topic.argsort()[::-1][:N]
        top_words.append([feature_names[i] for i in top_indices])
    return top_words

# Test a range of topic numbers and compute coherence scores
topic_range = range(2, 16)  # Testing 2 to 15 topics
coherence_scores = []
feature_names = vectorizer.get_feature_names_out()

for num_topics in topic_range:
    lda_model = LatentDirichletAllocation(n_components=num_topics, random_state=42, n_jobs=-1, verbose=0)
    lda_model.fit(dtm)
    top_words = get_top_words(lda_model, feature_names, N=10)
    coherence_model = CoherenceModel(topics=top_words, texts=texts, dictionary=dictionary, coherence='c_v')
    coherence_scores.append(coherence_model.get_coherence())
    print(f"Coherence for {num_topics} topics: {coherence_scores[-1]:.4f}")

# Plot coherence scores
plt.figure(figsize=(8, 5))
plt.plot(topic_range, coherence_scores, marker='o')
plt.xlabel('Number of Topics')
plt.ylabel('Coherence Score')
plt.title('Coherence Score vs. Number of Topics')
plt.savefig('coherence_scores.png', dpi=300)
plt.close()
print("Coherence scores plot saved to 'coherence_scores.png'")

# Select the optimal number of topics
optimal_num_topics = topic_range[np.argmax(coherence_scores)]
print(f"Optimal number of topics: {optimal_num_topics}")

# Fit the final LDA model with the optimal number of topics
lda = LatentDirichletAllocation(n_components=optimal_num_topics, random_state=42, n_jobs=-1, verbose=1)
lda.fit(dtm)

# Add topic distributions to DataFrame
topic_distributions = lda.transform(dtm)
for i in range(optimal_num_topics):
    df_verses[f'topic_{i}'] = topic_distributions[:, i]
df_verses['dominant_topic'] = topic_distributions.argmax(axis=1)

# Preview and save DataFrame with topics
print("\nSample verses with dominant topics:")
print(df_verses[['book', 'chapter', 'verse', 'text', 'dominant_topic']].head(5))
df_verses.to_csv("isaiah_with_topics.csv", index=False)
print("Data with topic distributions saved to 'isaiah_with_topics.csv'")

# Extract and print top words per topic
top_words_per_topic = []
for topic_idx, topic in enumerate(lda.components_):
    top_features_idx = topic.argsort()[::-1][:10]
    top_features = [feature_names[i] for i in top_features_idx]
    top_words_per_topic.append(top_features)
    print(f"Topic {topic_idx}: {', '.join(top_features)}")

# Create and save topic table
df_top_words = pd.DataFrame(top_words_per_topic).T
df_top_words.columns = [f'Topic {i}' for i in range(optimal_num_topics)]
colors = plt.cm.get_cmap('tab10', optimal_num_topics)
fig, ax = plt.subplots(figsize=(12, 6))
ax.axis('off')
table = ax.table(cellText=df_top_words.values, colLabels=df_top_words.columns, loc='center', cellLoc='center',
                 colColours=[colors(i) for i in range(optimal_num_topics)])
table.auto_set_font_size(False)
table.set_fontsize(10)
table.scale(1.2, 1.2)
fig.suptitle(f'Top 10 Words per Topic in the Book of Isaiah ({optimal_num_topics} Topics)', fontsize=14)
plt.savefig('isaiah_top_words_per_topic.png', dpi=300, bbox_inches='tight')
plt.close()
print("Top words per topic table saved to 'isaiah_top_words_per_topic.png'")

# Plot and save topic distribution bar chart
topic_columns = [f'topic_{i}' for i in range(optimal_num_topics)]
topic_means = df_verses[topic_columns].mean()
plt.figure(figsize=(10, 6))
plt.bar(range(optimal_num_topics), topic_means, color=[colors(i) for i in range(optimal_num_topics)])
plt.xlabel('Topic')
plt.ylabel('Average Probability')
plt.title(f'Average Topic Probability Across Verses in the Book of Isaiah ({optimal_num_topics} Topics)')
plt.xticks(range(optimal_num_topics), [f'Topic {i}' for i in range(optimal_num_topics)])
plt.savefig('isaiah_topic_distribution.png', dpi=300)
plt.close()
print("Topic distribution bar chart saved to 'isaiah_topic_distribution.png'")

Coherence for 2 topics: 0.3530
Coherence for 3 topics: 0.3639
Coherence for 4 topics: 0.3886
Coherence for 5 topics: 0.3469
Coherence for 6 topics: 0.4045
Coherence for 7 topics: 0.3602
Coherence for 8 topics: 0.3243
Coherence for 9 topics: 0.3578
Coherence for 10 topics: 0.3689
Coherence for 11 topics: 0.3683
Coherence for 12 topics: 0.3668
Coherence for 13 topics: 0.3527
Coherence for 14 topics: 0.3549
Coherence for 15 topics: 0.3457
Coherence scores plot saved to 'coherence_scores.png'
Optimal number of topics: 6
iteration: 1 of max_iter: 10
iteration: 2 of max_iter: 10
iteration: 3 of max_iter: 10
iteration: 4 of max_iter: 10
iteration: 5 of max_iter: 10
iteration: 6 of max_iter: 10
iteration: 7 of max_iter: 10
iteration: 8 of max_iter: 10
iteration: 9 of max_iter: 10
iteration: 10 of max_iter: 10

Sample verses with dominant topics:
     book  chapter  verse                                               text  \
0     NaN      NaN    NaN                                book prophet 

  colors = plt.cm.get_cmap('tab10', optimal_num_topics)


Top words per topic table saved to 'isaiah_top_words_per_topic.png'
Topic distribution bar chart saved to 'isaiah_topic_distribution.png'


Agentic Summarization

In [None]:
import os
from dotenv import load_dotenv
import pandas as pd
import google.generativeai as genai
import time

# Load environment variables from .env
load_dotenv()
GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
if not GEMINI_API_KEY:
    raise ValueError("GEMINI_API_KEY not set in .env file")

# Configure Google Gemini API
genai.configure(api_key=GEMINI_API_KEY)

# Define topic names
topic_names = {
    0: "God’s Guidance and Glory",
    1: "Rebellion and False Worship",
    2: "Israel’s Redemption and Mission",
    3: "Judgment and Deliverance",
    4: "Peace and Everlasting Joy",
    5: "Righteous Suffering and Salvation"
}

# Base model initialization function
def get_model():
    return genai.GenerativeModel("gemini-2.0-flash", generation_config={"temperature": 0.5, "max_output_tokens": 1024})

# Agent Functions for Each Topic
def agent_guidance_glory(text: str) -> str:
    model = get_model()
    prompt = "As an expert in biblical analysis, summarize the following verses from the Book of Isaiah that focus on 'God’s Guidance and Glory'. Highlight themes of divine leadership and splendor:\n\n"
    try:
        response = model.generate_content(f"{prompt}{text}")
        return response.text.strip()
    except Exception as e:
        print(f"Error in Guidance and Glory Agent: {e}")
        return "Summarization failed."

def agent_rebellion_worship(text: str) -> str:
    model = get_model()
    prompt = "As an expert in biblical analysis, summarize the following verses from the Book of Isaiah that focus on 'Rebellion and False Worship'. Emphasize themes of disobedience and idolatry:\n\n"
    try:
        response = model.generate_content(f"{prompt}{text}")
        return response.text.strip()
    except Exception as e:
        print(f"Error in Rebellion and Worship Agent: {e}")
        return "Summarization failed."

def agent_redemption_mission(text: str) -> str:
    model = get_model()
    prompt = "As an expert in biblical analysis, summarize the following verses from the Book of Isaiah that focus on 'Israel’s Redemption and Mission'. Focus on themes of salvation and purpose:\n\n"
    try:
        response = model.generate_content(f"{prompt}{text}")
        return response.text.strip()
    except Exception as e:
        print(f"Error in Redemption and Mission Agent: {e}")
        return "Summarization failed."

def agent_judgment_deliverance(text: str) -> str:
    model = get_model()
    prompt = "As an expert in biblical analysis, summarize the following verses from the Book of Isaiah that focus on 'Judgment and Deliverance'. Highlight themes of divine justice and rescue:\n\n"
    try:
        response = model.generate_content(f"{prompt}{text}")
        return response.text.strip()
    except Exception as e:
        print(f"Error in Judgment and Deliverance Agent: {e}")
        return "Summarization failed."

def agent_peace_joy(text: str) -> str:
    model = get_model()
    prompt = "As an expert in biblical analysis, summarize the following verses from the Book of Isaiah that focus on 'Peace and Everlasting Joy'. Emphasize themes of harmony and eternal happiness:\n\n"
    try:
        response = model.generate_content(f"{prompt}{text}")
        return response.text.strip()
    except Exception as e:
        print(f"Error in Peace and Joy Agent: {e}")
        return "Summarization failed."

def agent_suffering_salvation(text: str) -> str:
    model = get_model()
    prompt = "As an expert in biblical analysis, summarize the following verses from the Book of Isaiah that focus on 'Righteous Suffering and Salvation'. Focus on themes of affliction and redemption:\n\n"
    try:
        response = model.generate_content(f"{prompt}{text}")
        return response.text.strip()
    except Exception as e:
        print(f"Error in Suffering and Salvation Agent: {e}")
        return "Summarization failed."

# Synthesis Agent
def agent_synthesis(summaries: dict) -> str:
    model = get_model()
    prompt = "As an expert in biblical synthesis, take the following topic-specific summaries from the Book of Isaiah and create a coherent overall summary that ties together the key themes:\n\n"
    combined_summaries = prompt
    for topic_id, summary in summaries.items():
        combined_summaries += f"Summary for '{topic_names[topic_id]}':\n{summary}\n\n"
    try:
        response = model.generate_content(combined_summaries)
        return response.text.strip()
    except Exception as e:
        print(f"Error in Synthesis Agent: {e}")
        return "Synthesis failed."

# Main Function to Orchestrate Agents
def main():
    # Load the CSV file
    try:
        df = pd.read_csv("/content/isaiah_with_topics.csv")
    except FileNotFoundError:
        print("Error: 'isaiah_with_topics.csv' not found in /content/. Please upload the file.")
        return

    # Map topic IDs to agent functions
    topic_agents = {
        0: agent_guidance_glory,
        1: agent_rebellion_worship,
        2: agent_redemption_mission,
        3: agent_judgment_deliverance,
        4: agent_peace_joy,
        5: agent_suffering_salvation
    }

    # Summarize each topic
    print("Summarizing topics with Google Gemini agents...")
    topic_summaries = {}
    for topic_id, group in df.groupby("dominant_topic"):
        verses = " ".join(group['text'].tolist())
        if topic_id in topic_agents:
            summary = topic_agents[topic_id](verses)
            topic_summaries[topic_id] = summary
            print(f"Summary for '{topic_names[topic_id]}':\n{summary}\n")
            time.sleep(0.1)  # Rate limit delay
        else:
            print(f"No agent defined for topic ID {topic_id}")

    # Synthesize the summaries
    print("Synthesizing summaries into a coherent overview...")
    final_summary_text = agent_synthesis(topic_summaries)
    print("Final Combined Summary of the Book of Isaiah:\n", final_summary_text)

if __name__ == "__main__":
    main()

Summarizing topics with Google Gemini agents...
Summary for 'God’s Guidance and Glory':
Isaiah's prophecies, as presented in this excerpt, consistently highlight God's guiding hand and glorious reign, emphasizing themes of divine leadership and splendor.  Several key strands weave together to express this:

**1. Divine Judgment and Restoration:**  God's judgment is not an end in itself but a prelude to restoration and redemption.  The verses depict a corrupt Jerusalem, characterized by injustice, oppression of the poor and vulnerable (widows, orphans), and moral decay ("harlot," "murderer," "rebellious prince"). God's judgment, however, is aimed at cleansing this corruption, paving the way for a righteous and flourishing Zion ("Zion shall redeem judgment," "convert righteousness"). This judgment is not arbitrary but a necessary step towards establishing a just society.

**2. God as Shepherd and Protector:** God is portrayed as a shepherd guiding his people ("Lord…doth away…stay staff…s