<a href="https://colab.research.google.com/github/SubnetSage/ai-coding-journey/blob/main/E_book_Converter.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
#Run this cell first
!pip install PyPDF2
!pip install crewai
!pip install langchain
!pip install langchain-community
!pip install ollama

In [None]:
import time
from crewai import Agent, Task, Crew
from crewai_tools import tool
from langchain_community.llms import Ollama
from PyPDF2 import PdfReader

# Initialize the LLM model
ollama_llm = Ollama(model="llama3.1")

# Function to read and convert each page of the PDF into a course module
def convert_pdf_to_module(file_path, llm_model):
    reader = PdfReader(file_path)
    modules = []
    for i, page in enumerate(reader.pages):
        text = page.extract_text()
        if text:
            response = llm_model.generate(prompts=[f"Convert the following text into a course module:\n\n{text}"])
            module = response.generations[0][0].text
            modules.append(f"Module {i+1}:\n{module}\n")
    return "\n".join(modules)

# Track the start time
start_time = time.time()

# Load the document and convert each page into a module
pdf_modules = convert_pdf_to_module("marketing_materials.pdf", ollama_llm)

# Track the end time
end_time = time.time()
elapsed_time = end_time - start_time
elapsed_minutes = elapsed_time / 60

# Print the elapsed time
print(f"Script took {elapsed_minutes:.2f} minutes to run.")

# Define the course module tool function
@tool("PDF_to_Module")
def pdf_to_module_tool(pdf_path: str) -> str:
    """This tool converts the content of a given PDF file into course modules."""
    return convert_pdf_to_module(pdf_path, ollama_llm)

# Template for agent creation
def create_agent(role, goal, backstory, tools, llm, verbose=True, allow_delegation=True):
    return Agent(
        role=role,
        goal=goal,
        backstory=backstory,
        tools=tools,
        llm=llm,
        verbose=verbose,
        allow_delegation=allow_delegation
    )

# Template for task creation
def create_task(agent, description, expected_output):
    return Task(
        agent=agent,
        description=description,
        expected_output=expected_output
    )

pdf_to_module_role = 'Instructional Designer'
pdf_to_module_goal = 'Convert each page of the provided PDF into a course module.'
pdf_to_module_backstory = f"""
    You are an instructional designer with extensive experience in creating educational content.
    Your task is to read and rewrite each page of the given PDF document, converting it into a structured and coherent course module.
    Ensure that the content is clear, educational, and engaging, with practical examples and exercises to reinforce learning.
    Here is the guide for reference: {pdf_modules[:500]}... (truncated)
"""

pdf_to_module_tools = [pdf_to_module_tool]

# Create the course module agent using the template
pdf_to_module_agent = create_agent(pdf_to_module_role, pdf_to_module_goal, pdf_to_module_backstory, pdf_to_module_tools, ollama_llm)

# Create the course module task using the template
module_task = create_task(pdf_to_module_agent, "Convert each page of the provided PDF document into a course module.", "A course module for each page of the PDF document.")

# Instantiate the Crew with the module creation process
crew = Crew(
    agents=[pdf_to_module_agent],
    tasks=[module_task],
    verbose=2  # Adjust the logging level as needed (1 or 2)
)

# Get the crew to start working on the module creation task
results = crew.kickoff()

# Extract the course modules from the task result
final_modules = results[0].output

# Print the final course modules
print(final_modules)

In [None]:
'''
YOU DO NOT NEED TO RUN THIS CELL!!!
YOU DO NOT NEED TO RUN THIS CELL!!!
You'll need Google Colab Pro to access the console and install Phi3 mini. Once you have console access, type "ollama pull phi3".
This will download Microsoft's Phi3 mini model.
After you have installed Phi3 successfully, right click anywhere in the blank space under "sample_data" upload your PDF and run the code above.
I can not make any promises how fast this will work on large documents as I can't account for the hardware you choose to use.
A normal PDF with a fewer than 20 pages might be alright to work with in this notebook.

'''