In [None]:
import openai
import tiktoken
import tempfile
import IPython
import structlog
import random
import enum
import re
import requests
import subprocess
import concurrent.futures
import retrying
from github import Github
import string, os
import markdown2
from ebooklib import epub
logger = structlog.getLogger()
openai.api_key_path = '/home/jong/.openai_key'

In [3]:
class Chat:
    def __init__(self, system, max_length=4096//2):
        self._system = system
        self._max_length = max_length
        self._history = [
            {"role": "system", "content": self._system},
        ]

    @classmethod
    def num_tokens_from_messages(cls, messages, model="gpt-3.5-turbo"):
        """Returns the number of tokens used by a list of messages."""
        encoding = tiktoken.encoding_for_model(model)
        num_tokens = 0
        for message in messages:
            num_tokens += 4  # every message follows <im_start>{role/name}\n{content}<im_end>\n
            for key, value in message.items():
                num_tokens += len(encoding.encode(value))
                if key == "name":  # if there's a name, the role is omitted
                    num_tokens += -1  # role is always required and always 1 token
        num_tokens += 2  # every reply is primed with <im_start>assistant
        return num_tokens

    @retrying.retry(stop_max_attempt_number=5, wait_fixed=2000)
    def message(self, next_msg=None):
        # TODO: Optimize this if slow through easy caching
        while len(self._history) > 1 and self.num_tokens_from_messages(self._history) > self._max_length:
            logger.info(f'Popping message: {self._history.pop(1)}')
        if next_msg is not None:
            self._history.append({"role": "user", "content": next_msg})
        logger.info('requesting openai.Chat...')
        resp = openai.ChatCompletion.create(
            model="gpt-3.5-turbo",
            messages=self._history,
        )
        logger.info('received openai.Chat...')
        text = resp.choices[0].message.content
        self._history.append({"role": "assistant", "content": text})
        return text

In [4]:
class Image:
    class Size(enum.Enum):
        SMALL = "256x256"
        MEDIUM = "512x512"
        LARGE = "1024x1024"

    @classmethod
    @retrying.retry(stop_max_attempt_number=5, wait_fixed=2000)
    def create(cls, prompt, n=1, size=Size.SMALL):
        logger.info('requesting openai.Image...')
        resp = openai.Image.create(prompt=prompt, n=n, size=size.value)
        logger.info('received openai.Image...')
        if n == 1: return resp["data"][0]
        return resp["data"]

In [210]:
Image.create('Detective Yoda')

2023-04-13 18:06:21 [info     ] requesting openai.Image...
2023-04-13 18:06:26 [info     ] received openai.Image...


<OpenAIObject at 0x7efc28c114f0> JSON: {
  "url": "https://oaidalleapiprodscus.blob.core.windows.net/private/org-ct6DYQ3FHyJcnH1h6OA3fR35/user-qvFBAhW3klZpvcEY1psIUyDK/img-v4XrxHZVPaa43agYj2g3PM40.png?st=2023-04-13T23%3A38%3A04Z&se=2023-04-14T01%3A38%3A04Z&sp=r&sv=2021-08-06&sr=b&rscd=inline&rsct=image/png&skoid=6aaadede-4fb3-4698-a8f6-684d7786b067&sktid=a48cca56-e6da-484e-a814-9c849652bcb3&skt=2023-04-13T17%3A15%3A26Z&ske=2023-04-14T17%3A15%3A26Z&sks=b&skv=2021-08-06&sig=HqHMc9GFzXuqWd04QhJoRAn8HApywj7jnC3HQ32lBbI%3D"
}

In [5]:
def convert_md_files_to_epub(book_id, book_title, md_files, output_file):
    # Create a new EPUB book
    book = epub.EpubBook()

    # Set metadata
    book.set_identifier(book_id)
    book.set_title(book_title)
    book.set_language('en')
    book.add_author("Jonathan Grant")

    # Create a list to store the EPUB chapters
    epub_chapters = []

    # Define a CSS style for code blocks
    code_style = '''
    pre {
        background-color: #f5f5f5;
        border: 1px solid #ccc;
        padding: 10px;
        overflow-x: auto;
    }
    '''

    # Add the CSS style to the book
    style = epub.EpubItem(
        uid="code_style",
        file_name="styles/code_style.css",
        media_type="text/css",
        content=code_style,
    )
    book.add_item(style)

    for idx, md_file in enumerate(md_files):
        # Read the contents of the .md file
        with open(md_file, 'r') as file:
            md_content = file.read()

        # Convert the .md content to HTML, enabling 'fenced-code-blocks' extra
        html_content = markdown2.markdown(md_content, extras=["fenced-code-blocks"])

        # Create an EPUB chapter
        epub_chapter = epub.EpubHtml(
            title=f'Chapter {idx + 1}',
            file_name=f'chapter_{idx + 1}.xhtml',
            content=html_content,
        )

        # Link the CSS style to the chapter
        epub_chapter.add_item(style)
        epub_chapter.add_link(href="styles/code_style.css", rel="stylesheet", type="text/css")

        # Add the chapter to the book and the list of chapters
        book.add_item(epub_chapter)
        epub_chapters.append(epub_chapter)

    # Create a book spine (required for the EPUB format)
    book.spine = ['nav'] + epub_chapters

    # Create and add a Table of Contents
    book.toc = epub_chapters
    book.add_item(epub.EpubNcx())
    book.add_item(epub.EpubNav())

    # Write the EPUB file
    epub.write_epub(output_file, book)

In [24]:
class EBookWriter:
    theme_description = "Make sure it's engaging, relevant, and educational. Incorporate references to published journals when relevant. Include entertaining facts or jokes when appropriate."

    themes = {
        "Sherlock Holmes mystery": theme_description,
        "Dracula story": theme_description,
        "Frankenstein's Monster story": theme_description,
        "Robin Hood story": theme_description,
        "King Arthur and the Knights of the Round Table story": theme_description,
        "Greek Mythology epic": theme_description,
        "Alice in Wonderland trippy story": theme_description,
        "Wizard of Oz parable": theme_description,
    }

    def __init__(self, topic, nchapters=16, include_code=True, theme=None, theme_extra=None, author_style=None, include_next=True, publish={'gh', 'amazon'}):
        self.topic = topic
        self.nchapters = nchapters
        self.include_code = include_code
        self.theme = theme
        self.theme_extra = theme_extra
        self.author_style = author_style
        self.include_next = include_next
        self.publish = publish

    def get_chapters(self):
        chat = Chat("You are EBookGPT. Generate chapters for a textbook topic.")
        resp = chat.message(f'Write the table of contents for a textbook about {self.topic} involving {self.nchapters} chapters. Just return the ordered list of chapters and nothing else. Do not include a conclusion.')
        chapter_pattern = re.compile(r'\d+\.\s+.*')
        chapters = chapter_pattern.findall(resp)
        if not chapters:
            logger.warning(f'Could not parse message for chapters! Message:\n{resp}')
        return chapters
    
    def get_special_guest_for_chapter(self, chapter):
        chat = Chat(f'''You are EBookGPT. You write chapters for textbooks on {self.topic}.
You will respond with just the name of a special guest who should appear in a chapter given to you.
Only respond with the name. Do not say anything else.''')
        return chat.message(f'Who is a good special guest for a chapter on {chapter}?')
    
    def write_chapter(self, all_chapters, curr_chapter, chapter_idx, guest_chance=0.5, image_chance=0.0):
        text = []
        theme = self.theme if self.theme is not None else random.choice(list(self.themes.keys()))
        theme_extra = self.theme_extra if self.theme_extra is not None else self.themes[theme]
        system = f'''You are EBookGPT. You write chapters for textbooks on {self.topic} in the form of a {theme}.
The {theme} must teach and be solved by {self.topic}{" code. Make sure to include code samples." if self.include_code else ""}.
{theme_extra}
Write all responses in fancy github md format.
Do not say responses to the user such as "sure".'''
        if self.author_style is not None:
            system += f' Write in the style of {self.author_style}.'
        chat = Chat(system)
        msg = f'You are writing a book about {self.topic}. Write the introduction to the chapter about {curr_chapter}. Write in fancy github md format.'
        guest = None
        if random.uniform(0, 1) <= guest_chance:
            guest = self.get_special_guest_for_chapter(curr_chapter)
            msg += f' Include special guest {guest} in this chapter.'
        if all_chapters is not None:
            if chapter_idx > 0:
                msg += f' The last chapter was {all_chapters[chapter_idx-1]}.'
        resp = chat.message(msg)
        text.append(resp)
        msg = f'Write the {theme} to the chapter teaching {curr_chapter}. Write in fancy github md format.'
        if guest is not None:
            msg += f' Include special guest {guest}'
        resp = chat.message(msg)
        text.append(resp)
        if self.include_code:
            msg = f'Explain the code used to resolve the {theme}. Write in fancy github md format.'
        else:
            msg = "Write a conclusion for the above."
        resp = chat.message(msg)
        text.append(resp)
        # Add image
        if random.uniform(0, 1) <= image_chance:
            img_prompt = chat.message("Write a DALL-E image generation prompt for this chapter in less than 1000 characters.").replace("\n", " ")[:1000]
            img = Image.create(img_prompt)["url"]
            # [![name](link to image on GH)](link to your URL)
            img_md = f"![{img_prompt}]({img})\n\n"
            text.insert(0, img_md)
        if self.include_next:
            # Add link to next page
            text.append(f'\n\n[Next Chapter]({chapter_idx+1:02d}_Chapter{chapter_idx+1:02d}.md)')
        return '\n'.join(text)

    def write_book(self, book):
        topic_normal = self.topic.translate(str.maketrans('', '', string.punctuation+' '))
        with tempfile.TemporaryDirectory() as tmpdir:
            outdir = f'{tmpdir}/{topic_normal}/'
            os.makedirs(outdir, exist_ok=True)
            for i, page in enumerate(book):
                page_title = f'{i:02d}_Chapter{i:02d}.md'
                with open(outdir+page_title, 'w') as f:
                    f.write(page)
            if 'gh' in self.publish:
                self.publish_book_gh(outdir, topic_normal)
            if 'amazon' in self.publish:
                self.publish_book_kdp(outdir, topic_normal)

    def publish_book_gh(self, bookdir, topic_normal):
        gtoken = os.environ["GITHUB_TOKEN"]
        g = Github(gtoken)
        org = g.get_organization("EBookGPT")
        org.create_repo(topic_normal)
        subprocess.check_output(f"git init", cwd=bookdir, shell=True)
        subprocess.check_output(f"ln -sfn 00* README.md", cwd=bookdir, shell=True)
        subprocess.check_output(f"git add .", cwd=bookdir, shell=True)
        subprocess.check_output(f"git remote add origin git@github.com:EBookGPT/{topic_normal}.git", cwd=bookdir, shell=True)
        subprocess.check_output(f"git commit -am 'Book' && git checkout -b main && git push origin main -u", cwd=bookdir, shell=True)
        
    def publish_book_kdp(self, bookdir, topic_normal):
        # Make epub
        epub_file = tempfile.NamedTemporaryFile().name + '.epub'
        convert_md_files_to_epub(topic_normal, self.topic, sorted([os.path.join(bookdir, x) for x in os.listdir(bookdir) if 'Chapter' in x and not x.startswith('00_')]), epub_file)
        logger.info(f"EPUB ready for upload: {epub_file}")
        return epub_file

    def make_cover(self):
        return Image.create(f"""{self.topic}, 4k Award Winning, Concept, Digital Art""")["b64_json"]
        
    def run(self, nthreads=None):
        # get chapters of book
        chapters = self.get_chapters()
        # Add conclusion/summary
        if "conclusion" not in chapters[-1].lower():
            chapters.append(f'{len(chapters)+1}. Conclusion of {" ".join(chapters)}')
        # Write chapters
        text = [None] * len(chapters)
        with concurrent.futures.ThreadPoolExecutor(max_workers=nthreads or len(chapters)) as thread_pool:
            tasks = {}
            for i, curr_chapter in enumerate(chapters):
                tasks[thread_pool.submit(self.write_chapter, chapters, curr_chapter, i+1)] = i
            for future in concurrent.futures.as_completed(tasks):
                idx = tasks[future]
                text[idx] = future.result()
        book = ['Table Of Contents:\n\n'+'\n'.join(chapters[:-1] + [f"{len(chapters)}. Conclusion"])] + text
        try:
            self.write_book(book)
        except Exception as e:
            logger.exception(e)
        return book

In [25]:
topic = "Frankenstein's Neural Network: Deep Learning with Victor Frankenstein and TensorFlow"
nchapters = 42
include_code = True
theme = "Frankenstein story"
theme_extra = "Make sure it's engaging, relevant, and educational. Incorporate references to published journals when relevant. Include entertaining facts or jokes when appropriate."
author_style = "Mary Shelley and Andrew Ng"
writer = EBookWriter(topic, nchapters, include_code, theme, theme_extra, author_style, include_next=False, publish={'amazon'})
book = writer.run()

2023-04-25 23:34:46 [info     ] requesting openai.Chat...
2023-04-25 23:35:21 [info     ] received openai.Chat...
2023-04-25 23:35:27 [info     ] received openai.Chat...
2023-04-25 23:35:27 [info     ] requesting openai.Chat...
2023-04-25 23:35:27 [info     ] requesting openai.Chat...
2023-04-25 23:35:27 [info     ] requesting openai.Chat...
2023-04-25 23:35:27 [info     ] requesting openai.Chat...
2023-04-25 23:35:27 [info     ] requesting openai.Chat...
2023-04-25 23:35:27 [info     ] requesting openai.Chat...
2023-04-25 23:35:27 [info     ] requesting openai.Chat...
2023-04-25 23:35:27 [info     ] requesting openai.Chat...
2023-04-25 23:35:27 [info     ] requesting openai.Chat...
2023-04-25 23:35:27 [info     ] requesting openai.Chat...
2023-04-25 23:35:27 [info     ] requesting openai.Chat...
2023-04-25 23:35:27 [info     ] requesting openai.Chat...
2023-04-25 23:35:27 [info     ] requesting openai.Chat...
2023-04-25 23:35:27 [info     ] requesting openai.Chat...
2023-04-25 23:35:2

In [27]:
book[0]

"Table Of Contents:\n\n1. Introduction to Artificial Intelligence and Neural Networks\n2. Understanding TensorFlow and its Applications\n3. Overview of Frankenstein's Neural Network\n4. Victor Frankenstein's Contribution to Neural Networks\n5. Historical Development of Deep Learning\n6. Key Principles and Concepts in Deep Learning\n7. Preprocessing of Data for Neural Networks\n8. Designing a Neural Network Architecture\n9. Supervised Learning with Frankenstein's Neural Network\n10. Unsupervised Learning with Frankenstein's Neural Network\n11. Reinforcement Learning with Frankenstein's Neural Network\n12. Convolutional Neural Networks for Image Recognition\n13. Natural Language Processing with Frankenstein's Neural Network\n14. Recurrent Neural Networks for Sequential Data\n15. Autoencoders and Generative Models\n16. Deep Q-Learning for Reinforcement Learning\n17. Transfer Learning and Fine-Tuning in Neural Networks\n18. Hyperparameter Tuning and Optimization\n19. Visualization and Inte

In [9]:
g = Github(os.environ["GITHUB_TOKEN"])
org = g.get_organization("EBookGPT")
# Create the repository under the organization
repo = org.create_repo("Test")

In [20]:
EBookWriter.theme_description

"Make sure it's engaging, relevant, and educational. Incorporate references to published journals when relevant. Include entertaining facts or jokes when appropriate."

In [12]:
# TODO: Cover Art from Dall-e/other as part of repo
# TODO: Amazon KDP integration

In [13]:
# e = EBookWriter('Fine Tuning Large Language Models in PyTorch', include_code=True, nchapters=16)
# book = e.run()

In [10]:
bdir = "/home/jong/bookout/SurveyofLossFunctionsforDeepLearning"
convert_md_files_to_epub('id1234567890', "Survey of Loss Functions for Deep Learning: A Sherlock Holmes Mystery", sorted([os.path.join(bdir, x) for x in os.listdir(bdir) if 'Chapter' in x]), "/home/jong/loss.epub")