In [1]:
import os
import pandas as pd
import re
from llama_index.core import SimpleDirectoryReader, VectorStoreIndex, Settings
from llama_index.core.node_parser import SentenceSplitter
from llama_index.llms.nvidia import NVIDIA
from llama_index.embeddings.nvidia import NVIDIAEmbedding


[nltk_data] Downloading package punkt_tab to
[nltk_data]     /home/polabs2/venv_nvidia_llamaindex/lib/python3.10/si
[nltk_data]     te-packages/llama_index/core/_static/nltk_cache...
[nltk_data]   Package punkt_tab is already up-to-date!


In [2]:
df = pd.read_csv('/home/polabs2/Code/RPG_teacher/data/chapter_summary_notes.csv')
df

Unnamed: 0,document_type,document,chapter,data_type,text,key
0,text_book,digital_marketing,7,chapter_name,WEB DEVELOPMENT AND DESIGN,
1,text_book,digital_marketing,7,summary_note,Designing your website according to best pract...,
2,text_book,digital_marketing,7,summary_note,Developing a strong stable and usable websit...,
3,text_book,digital_marketing,7,summary_note,Creating a suitable mobile web experience for ...,
4,text_book,digital_marketing,7,summary_note,Enhancing user experience through design and g...,
...,...,...,...,...,...,...
62,fantasy_novel,hobbit,17,summary_note,"In the morning, as Bard returns to bargain wit...",
63,fantasy_novel,hobbit,18,chapter_name,The Return Journey,
64,fantasy_novel,hobbit,18,summary_note,"When Bilbo awakes, he learns that Thorin is mo...",
65,fantasy_novel,hobbit,19,chapter_name,The Last Stage,


In [3]:
# Separate the textbook and fantasy novel data
textbook_df = df[df['document_type'] == 'text_book']
fantasy_df = df[df['document_type'] == 'fantasy_novel']

# Get unique chapters from both datasets
textbook_chapters = textbook_df['chapter'].unique()
fantasy_chapters = fantasy_df['chapter'].unique()

# For simplicity, we'll assume chapters correspond by their chapter numbers
common_chapters = set(textbook_chapters).intersection(set(fantasy_chapters))

print(f"Common chapters: {common_chapters}")


Common chapters: {12, 13, 7}


In [4]:
# Directory to save combined documents
COMBINED_DOCS_DIR = '/home/polabs2/Code/RPG_teacher/data/combined'
os.makedirs(COMBINED_DOCS_DIR, exist_ok=True)

for chapter in common_chapters:
    # Extract textbook data for the chapter
    chapter_textbook = textbook_df[textbook_df['chapter'] == chapter]
    chapter_fantasy = fantasy_df[fantasy_df['chapter'] == chapter]
    
    # Get the chapter names (assuming one per chapter)
    textbook_chapter_name = chapter_textbook[chapter_textbook['data_type'] == 'chapter_name']['text'].iloc[0]
    fantasy_chapter_name = chapter_fantasy[chapter_fantasy['data_type'] == 'chapter_name']['text'].iloc[0]
    
    # Combine summary notes
    textbook_summaries = chapter_textbook[chapter_textbook['data_type'] == 'summary_note']['text'].tolist()
    fantasy_summaries = chapter_fantasy[chapter_fantasy['data_type'] == 'summary_note']['text'].tolist()
    
    # Combine questions
    textbook_questions = chapter_textbook[chapter_textbook['data_type'] == 'question']['text'].tolist()
    
    # Format the combined content
    combined_content = f"""
**Fantasy Chapter Name:**
{fantasy_chapter_name}

**Fantasy Chapter Summary:**
{' '.join(fantasy_summaries)}

**Textbook Chapter Name:**
{textbook_chapter_name}

**Textbook Chapter Summary:**
{' '.join(textbook_summaries)}

**Sample Questions:**
{' '.join(textbook_questions)}
"""
    # Save the combined content to a file
    combined_doc_path = os.path.join(COMBINED_DOCS_DIR, f'chapter_{chapter}_combined.txt')
    with open(combined_doc_path, 'w', encoding='utf-8') as f:
        f.write(combined_content)


In [5]:
# Load combined documents
combined_documents = SimpleDirectoryReader(COMBINED_DOCS_DIR).load_data()


In [11]:
# Initialize the LLM
Settings.llm = NVIDIA(model="meta/llama-3.1-405b-instruct")

# Initialize the embedding model
Settings.embed_model = NVIDIAEmbedding(model="NV-Embed-QA", truncate="END")

import os
import getpass

#nvidia_api_key = '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'

if not os.environ.get("NVIDIA_API_KEY", "").startswith("nvapi-"):
    nvidia_api_key = getpass.getpass("Enter your NVIDIA API key: ")
    assert nvidia_api_key.startswith("nvapi-"), f"{nvidia_api_key[:5]}... is not a valid key"
    os.environ["NVIDIA_API_KEY"] = nvidia_api_key



In [12]:
# Set up the text splitter
Settings.text_splitter = SentenceSplitter(chunk_size=400)

# Create the index
index = VectorStoreIndex.from_documents(combined_documents)


In [23]:
from llama_index.prompts import PromptTemplate
custom_prompt_template = PromptTemplate(template="""
You are a storyteller who combines fantasy adventures with educational content.

Given the following information:

**Fantasy Chapter Name:**
{fantasy_chapter_name}

**Fantasy Chapter Summary:**
{fantasy_summary}

**Textbook Chapter Name:**
{textbook_chapter_name}

**Textbook Chapter Summary:**
{textbook_summary}

**Sample Questions:**
{sample_questions}

Create a fun and immersive RPG setting that blends the fantasy narrative with the educational topics. Incorporate the sample questions into the storyline as challenges or puzzles the characters must solve.

Please provide the RPG setting in a descriptive and engaging manner.
""")


ModuleNotFoundError: No module named 'llama_index.prompts'

In [24]:
from llama_index.core import Settings
from llama_index.llms.nvidia import NVIDIA
from llama_index.embeddings.nvidia import NVIDIAEmbedding
from llama_index.core.node_parser import SentenceSplitter

# Initialize the LLM
Settings.llm = NVIDIA(model="meta/llama-3.1-405b-instruct")

# Initialize the embedding model
Settings.embed_model = NVIDIAEmbedding(model="NV-Embed-QA", truncate="END")

# Set up the text splitter
Settings.text_splitter = SentenceSplitter(chunk_size=400)

# (Optional) Set other global settings if needed
# Settings.num_output = 512
# Settings.context_window = 3900

from llama_index.core import VectorStoreIndex

# Load combined documents
combined_documents = SimpleDirectoryReader(COMBINED_DOCS_DIR).load_data()

# Create the index
index = VectorStoreIndex.from_documents(combined_documents)


In [25]:
def extract_section(text, section_name):
    pattern = rf"\*\*{section_name}:\*\*(.*?)(?=\n\*\*|$)"
    match = re.search(pattern, text, re.DOTALL)
    return match.group(1).strip() if match else ''

def generate_rpg_setting(chapter):
    # Create a query engine
    query_engine = index.as_query_engine(similarity_top_k=1)
    
    # Formulate the query to retrieve the specific chapter's combined document
    query_str = f"chapter_{chapter}_combined.txt"
    
    # Retrieve the relevant document
    response = query_engine.query(query_str)
    
    # Check if the response contains source nodes
    if not response or not response.source_nodes:
        print(f"No document found for chapter {chapter}")
        return None
    
    # Extract the content from the retrieved document
    doc_content = response.source_nodes[0].node.get_content()
    
    # Extract sections using the helper function
    fantasy_chapter_name = extract_section(doc_content, 'Fantasy Chapter Name')
    fantasy_summary = extract_section(doc_content, 'Fantasy Chapter Summary')
    textbook_chapter_name = extract_section(doc_content, 'Textbook Chapter Name')
    textbook_summary = extract_section(doc_content, 'Textbook Chapter Summary')
    sample_questions = extract_section(doc_content, 'Sample Questions')
    
    # Generate the response using the LLM from Settings
    response_text = Settings.llm.predict(
        prompt=custom_prompt_template,
        fantasy_chapter_name=fantasy_chapter_name,
        fantasy_summary=fantasy_summary,
        textbook_chapter_name=textbook_chapter_name,
        textbook_summary=textbook_summary,
        sample_questions=sample_questions
    )
    
    return response_text



In [26]:
# Test with a specific chapter
chapter_to_test = 7  # Replace with the chapter you want to test
rpg_setting = generate_rpg_setting(chapter_to_test)

if rpg_setting:
    print("Generated RPG Setting:")
    print(rpg_setting)


ValidationError: 1 validation error for LLMPredictStartEvent
template
  value is not a valid dict (type=type_error.dict)

In [30]:
import os
import pandas as pd
import re
from llama_index.core import Settings
from llama_index.core import SimpleDirectoryReader, VectorStoreIndex
from llama_index.core.node_parser import SentenceSplitter
from llama_index.llms.nvidia import NVIDIA
from llama_index.embeddings.nvidia import NVIDIAEmbedding
from llama_index.core import PromptTemplate

# Set NVIDIA API Key
if not os.environ.get("NVIDIA_API_KEY", "").startswith("nvapi-"):
    nvidia_api_key = getpass.getpass("Enter your NVIDIA API key: ")
    assert nvidia_api_key.startswith("nvapi-"), f"{nvidia_api_key[:5]}... is not a valid key"
    os.environ["NVIDIA_API_KEY"] = nvidia_api_key

# Initialize the LLM using the Settings object
Settings.llm = NVIDIA(model="meta/llama-3.1-405b-instruct")

# Initialize the embedding model
Settings.embed_model = NVIDIAEmbedding(model="NV-Embed-QA", truncate="END")

# Set up the text splitter
Settings.text_splitter = SentenceSplitter(chunk_size=400)

# Load combined documents
COMBINED_DOCS_DIR = '/home/polabs2/Code/RPG_teacher/data/combined'  # Replace with your actual path
combined_documents = SimpleDirectoryReader(COMBINED_DOCS_DIR).load_data()

# Create the index
index = VectorStoreIndex.from_documents(combined_documents)

# Define the PromptTemplate
custom_prompt_template = PromptTemplate(template="""
You are a storyteller who combines fantasy adventures with educational content.

Given the following information:

**Fantasy Chapter Name:**
{fantasy_chapter_name}

**Fantasy Chapter Summary:**
{fantasy_summary}

**Textbook Chapter Name:**
{textbook_chapter_name}

**Textbook Chapter Summary:**
{textbook_summary}

**Sample Questions:**
{sample_questions}

Create a fun and immersive RPG setting that blends the fantasy narrative with the educational topics. Incorporate the sample questions into the storyline as challenges or puzzles the characters must solve.

Please provide the RPG setting in a descriptive and engaging manner.
""")

# Helper function to extract sections
def extract_section(text, section_name):
    pattern = rf"\*\*{section_name}:\*\*(.*?)(?=\n\*\*|$)"
    match = re.search(pattern, text, re.DOTALL)
    return match.group(1).strip() if match else ''

# Function to generate the RPG setting
def generate_rpg_setting(chapter):
    # Create a query engine
    query_engine = index.as_query_engine(similarity_top_k=10)
    
    # Formulate the query to retrieve the specific chapter's combined document
    query_str = f"chapter_{chapter}_combined.txt"
    
    # Retrieve the relevant document
    response = query_engine.query(query_str)
    
    # Check if the response contains source nodes
    if not response or not response.source_nodes:
        print(f"No document found for chapter {chapter}")
        return None
    
    # Extract the content from the retrieved document
    doc_content = response.source_nodes[0].node.get_content()
    
    # Extract sections using the helper function
    fantasy_chapter_name = extract_section(doc_content, 'Fantasy Chapter Name')
    fantasy_summary = extract_section(doc_content, 'Fantasy Chapter Summary')
    textbook_chapter_name = extract_section(doc_content, 'Textbook Chapter Name')
    textbook_summary = extract_section(doc_content, 'Textbook Chapter Summary')
    sample_questions = extract_section(doc_content, 'Sample Questions')
    
    # Generate the response using the LLM from Settings
    response_text = Settings.llm.predict(
        prompt=custom_prompt_template,
        fantasy_chapter_name=fantasy_chapter_name,
        fantasy_summary=fantasy_summary,
        textbook_chapter_name=textbook_chapter_name,
        textbook_summary=textbook_summary,
        sample_questions=sample_questions
    )
    
    return response_text



Generated RPG Setting:
**Welcome to "Quest for the Lonely Mountain: A Web Development Odyssey"**

You find yourself standing in front of the cozy lodge of Beorn, a half-man, half-bear creature. The warm glow of the setting sun casts a golden light on the lush green surroundings, and the sound of gentle chirping fills the air. Gandalf, your wise and powerful guide, leads you to the entrance of the lodge, where Beorn greets you with a hearty welcome.

As you enter the lodge, you notice a peculiar sight: a large, wooden table in the center of the room, adorned with strange objects and contraptions. Beorn explains that this is his "Web Development Workshop," where he crafts and maintains his own website, showcasing his remarkable skills as a master woodworker and beekeeper.

Beorn approaches you with a concerned expression. "Friends, I sense that you are not only weary travelers but also seekers of knowledge. Your quest to reclaim the Lonely Mountain will require more than bravery and stre

In [None]:
# Test the function
chapter_to_test = 7  # Replace with the chapter you want to test
rpg_setting = generate_rpg_setting(chapter_to_test)

if rpg_setting:
    print("Generated RPG Setting:")
    print(rpg_setting)
