In [4]:
BOOK_CONTEXT = "Techniques used in NLP"
GENRE = "Artificial Intelligence"

In [5]:
from langchain.llms import Ollama
from langchain.prompts import  PromptTemplate

# Initialize the LLM (GPT-3.5 or GPT-4)
llm = Ollama(model='llama3.2',temperature=0.7)

# Define a prompt template to generate the book structure in JSON
structure_prompt_template = """
Book Context: {context}
Book Genre: {genre}

Based on the book context and above provided information, create a book structure in JSON format with the following details:
1. Book title (at least 4 titles).
2. Chapters, each with headings and subheadings.



Return the structure in JSON format like this:
{{
  "title": ["Book Title1", "Book Title2", ...],
  "chapters": [
    {{
      "chapter_title": "Chapter 1",
      "headings": [
        {{
          "heading": "Heading 1",
          "subheadings": ["Subheading 1", "Subheading 2"]
        }}
      ]
    }},
    ...
  ]
}}

don't return anything except the JSON structure.
"""

# Function to generate book structure
def generate_book_structure(context,genre):
    prompt = PromptTemplate(input_variables=["context","genre"], template=structure_prompt_template).format(context=context, genre=genre)
    structure_response = llm(prompt)
    return structure_response


In [6]:
p = generate_book_structure(context=BOOK_CONTEXT, genre=GENRE)
print(p)

{
  "title": [
    "Introduction to NLP with AI",
    "Text Preprocessing and Tokenization Techniques",
    "Machine Learning for NLP Applications",
    "Deep Learning Models in Natural Language Processing"
  ],
  "chapters": [
    {
      "chapter_title": "Chapter 1: Introduction to NLP",
      "headings": [
        {
          "heading": "1.1 Overview of NLP",
          "subheadings": ["Subheading 1.1", "Subheading 1.2"]
        },
        {
          "heading": "1.2 Types of NLP",
          "subheadings": ["Subheading 1.3", "Subheading 1.4"]
        }
      ]
    },
    {
      "chapter_title": "Chapter 2: Text Preprocessing and Tokenization",
      "headings": [
        {
          "heading": "2.1 Text Cleaning Techniques",
          "subheadings": ["Subheading 2.1", "Subheading 2.2"]
        },
        {
          "heading": "2.2 Tokenization Methods",
          "subheadings": ["Subheading 2.3", "Subheading 2.4"]
        }
      ]
    },
    {
      "chapter_title": "Chapter 3: Ma

In [7]:
# convert text to json
import json
import re
# data = json.loads(p)
def parse_json(input_string):
    # Remove leading and trailing whitespace
    input_string = input_string.strip()

    # Check if the input is in the 'json' block format
    if input_string.startswith('```json'):
        # Extract the JSON content between the backticks
        json_content = re.search(r'```json\s*(.*?)\s*```', input_string, re.DOTALL)
        if json_content:
            json_string = json_content.group(1)
        else:
            raise ValueError("Invalid JSON format")
    elif input_string.startswith('```'):
        # Extract the JSON content between the backticks
        json_content = re.search(r'```\s*(.*?)\s*```', input_string, re.DOTALL)
        if json_content:
            json_string = json_content.group(1)
        else:
            raise ValueError("Invalid JSON format")
    else:
        # Treat the input as normal JSON
        json_string = input_string

    # Attempt to parse the JSON
    try:
        return json.loads(json_string)
    except json.JSONDecodeError as e:
        raise ValueError(f"Failed to decode JSON: {e}")

In [8]:
import pypandoc

# Function to convert markdown to a .docx file
def markdown_to_docx(markdown_content, output_file):
    try:
        # Attempt to convert the markdown content to docx and save it
        output = pypandoc.convert_text(markdown_content, 'docx', format='md', outputfile=output_file)
        
        # The output is 0 if successful
        if output == 0 or output == "":
            print(f"File '{output_file}' created successfully!")
        else:
            print(f"Failed to create file '{output_file}'. Output: {output}")
    except OSError as e:
        # Print the error if pandoc isn't found or the conversion fails
        print(f"Conversion failed. Error: {e}")




In [9]:
import subprocess
import tempfile
import os

def append_markdown_to_docx(large_docx_path, markdown_content, output_docx_path):
    # Step 1: Create a temporary markdown file from in-memory content
    with tempfile.NamedTemporaryFile(delete=False, suffix=".md") as temp_md_file:
        temp_md_file.write(markdown_content.encode('utf-8'))
        temp_md_file.flush()  # Make sure the content is written to disk
        temp_md_file_path = temp_md_file.name
    
    # Step 2: Convert the temporary markdown file to a temporary docx file using Pandoc
    temp_docx_file_path = tempfile.mktemp(suffix=".docx")
    subprocess.run(['pandoc', temp_md_file_path, '-o', temp_docx_file_path])
    
    # Step 3: Append the converted docx to the large docx file
    subprocess.run(['pandoc', large_docx_path, temp_docx_file_path, '-o', output_docx_path])

    # Step 4: Clean up the temporary files
    os.remove(temp_md_file_path)
    os.remove(temp_docx_file_path)

# # Example usage
# large_docx_path = 'Chapter_1_Introduction.docx'  # Path to the large docx file
# markdown_content = "# Chapter 2\nThis is the content of chapter 2 in markdown format."  # Your in-memory markdown
# output_docx_path = 'merged_output.docx'  # Output docx file after merging

# append_markdown_to_docx(large_docx_path, markdown_content, output_docx_path)


In [10]:
def generate_doc_file(file_path,content):
    if os.path.exists(file_path):
        append_markdown_to_docx(file_path, content, file_path)
    else:
        markdown_to_docx(content, file_path)

In [None]:
chapter_brief_template = """
Imagine you are an author crafting a book. You need to write a compelling introduction for a specific chapter, emphasizing its overall theme and significance. You have the following details:

Book Context: {context}
Book Genre: {genre}

Chapter Title: {chapter}
Headings to be Covered: {headings}

Please provide a markdown-formatted introduction that outlines the main ideas and themes of the chapter. The introduction should capture the essence of what will be discussed, including relevant examples or a narrative, while avoiding any specific mention of the headings or subheadings. Focus on setting the stage for the reader, highlighting the chapter's importance and intriguing aspects without breaking it down into bullet points, as those will be addressed later in detail.

"""

chapter_prompt = PromptTemplate(input_variables=["context","genre","chapter","headings"], template=chapter_brief_template)

t = chapter_prompt.format(context=BOOK_CONTEXT, genre=GENRE, chapter=data['chapters'][0]['chapter_title'], headings=data['chapters'][0]['headings'],)
print(t)