In [165]:
import openai
import tiktoken
import tempfile
import IPython
import structlog
import random
import enum
import re
import requests
import subprocess
import concurrent.futures
import retrying
import string, os
logger = structlog.getLogger()
openai.api_key_path = '/home/jong/.openai_key'

In [188]:
class Chat:
    def __init__(self, system, max_length=4096//2):
        self._system = system
        self._max_length = max_length
        self._history = [
            {"role": "system", "content": self._system},
        ]

    @classmethod
    def num_tokens_from_messages(cls, messages, model="gpt-3.5-turbo"):
        """Returns the number of tokens used by a list of messages."""
        encoding = tiktoken.encoding_for_model(model)
        num_tokens = 0
        for message in messages:
            num_tokens += 4  # every message follows <im_start>{role/name}\n{content}<im_end>\n
            for key, value in message.items():
                num_tokens += len(encoding.encode(value))
                if key == "name":  # if there's a name, the role is omitted
                    num_tokens += -1  # role is always required and always 1 token
        num_tokens += 2  # every reply is primed with <im_start>assistant
        return num_tokens

    @retrying.retry(stop_max_attempt_number=5, wait_fixed=2000)
    def message(self, next_msg=None):
        # TODO: Optimize this if slow through easy caching
        while len(self._history) > 1 and self.num_tokens_from_messages(self._history) > self._max_length:
            logger.info(f'Popping message: {self._history.pop(1)}')
        if next_msg is not None:
            self._history.append({"role": "user", "content": next_msg})
        logger.info('requesting openai.Chat...')
        resp = openai.ChatCompletion.create(
            model="gpt-3.5-turbo",
            messages=self._history,
        )
        logger.info('received openai.Chat...')
        text = resp.choices[0].message.content
        self._history.append({"role": "assistant", "content": text})
        return text

In [209]:
class Image:
    class Size(enum.Enum):
        SMALL = "256x256"
        MEDIUM = "512x512"
        LARGE = "1024x1024"

    @classmethod
    @retrying.retry(stop_max_attempt_number=5, wait_fixed=2000)
    def create(cls, prompt, n=1, size=Size.SMALL):
        logger.info('requesting openai.Image...')
        resp = openai.Image.create(prompt=prompt, n=n, size=size.value)
        logger.info('received openai.Image...')
        if n == 1: return resp["data"][0]
        return resp["data"]

In [210]:
Image.create('Detective Yoda')

2023-04-13 18:06:21 [info     ] requesting openai.Image...
2023-04-13 18:06:26 [info     ] received openai.Image...


<OpenAIObject at 0x7efc28c114f0> JSON: {
  "url": "https://oaidalleapiprodscus.blob.core.windows.net/private/org-ct6DYQ3FHyJcnH1h6OA3fR35/user-qvFBAhW3klZpvcEY1psIUyDK/img-v4XrxHZVPaa43agYj2g3PM40.png?st=2023-04-13T23%3A38%3A04Z&se=2023-04-14T01%3A38%3A04Z&sp=r&sv=2021-08-06&sr=b&rscd=inline&rsct=image/png&skoid=6aaadede-4fb3-4698-a8f6-684d7786b067&sktid=a48cca56-e6da-484e-a814-9c849652bcb3&skt=2023-04-13T17%3A15%3A26Z&ske=2023-04-14T17%3A15%3A26Z&sks=b&skv=2021-08-06&sig=HqHMc9GFzXuqWd04QhJoRAn8HApywj7jnC3HQ32lBbI%3D"
}

In [230]:
class EBookWriter:
    theme_description = "Make sure it's engaging, relevant, and educational. Incorporate references to published journals when relevant. Include entertaining facts or jokes when appropriate."

    themes = {
        "Sherlock Holmes mystery": theme_description,
        "Dracula story": theme_description,
        "Frankenstein's Monster story": theme_description,
        "Robin Hood story": theme_description,
        "King Arthur and the Knights of the Round Table story": theme_description,
        "Greek Mythology epic": theme_description,
        "Alice in Wonderland trippy story": theme_description,
        "Wizard of Oz parable": theme_description,
    }

    def __init__(self, topic, nchapters=16, include_code=True, theme=None, theme_extra=None, author_style=None):
        self.topic = topic
        self.nchapters = nchapters
        self.include_code = include_code
        self.theme = theme
        self.theme_extra = theme_extra
        self.author_style = author_style

    def get_chapters(self):
        chat = Chat("You are EBookGPT. Generate chapters for a textbook topic.")
        resp = chat.message(f'Write the table of contents for a textbook about {self.topic} involving {self.nchapters} chapters. Just return the ordered list of chapters and nothing else. Do not include a conclusion.')
        chapter_pattern = re.compile(r'\d+\.\s+.*')
        chapters = chapter_pattern.findall(resp)
        if not chapters:
            logger.warning(f'Could not parse message for chapters! Message:\n{resp}')
        return chapters
    
    def get_special_guest_for_chapter(self, chapter):
        chat = Chat(f'''You are EBookGPT. You write chapters for textbooks on {self.topic}.
You will respond with just the name of a special guest who should appear in a chapter given to you.
Only respond with the name. Do not say anything else.''')
        return chat.message(f'Who is a good special guest for a chapter on {chapter}?')
    
    def write_chapter(self, last_chapter, curr_chapter, chapter_idx, guest_chance=0.5, image_chance=1.0):
        text = []
        theme = self.theme if self.theme is not None else random.choice(list(self.themes.keys()))
        theme_extra = self.theme_extra if self.theme_extra is not None else self.themes[theme]
        system = f'''You are EBookGPT. You write chapters for textbooks on {self.topic} in the form of a {theme}.
The {theme} must teach and be solved by {self.topic}{" code. Make sure to include code samples." if self.include_code else ""}.
{theme_extra}
Write all responses in fancy github md format.
Do not say responses to the user such as "sure".'''
        if self.author_style is not None:
            system += f' Write in the style of {self.author_style}.'
        chat = Chat(system)
        msg = f'You are writing a book about {self.topic}. Write the introduction to the chapter about {curr_chapter}. Write in fancy github md format.'
        guest = None
        if random.uniform(0, 1) <= guest_chance:
            guest = self.get_special_guest_for_chapter(curr_chapter)
            msg += f' Include special guest {guest} in this chapter.'
        if last_chapter is not None:
            msg += f' Last chapter was about {last_chapter}.'
        resp = chat.message(msg)
        text.append(resp)
        msg = f'Write the {theme} and resolution to the chapter teaching {curr_chapter}. Write in fancy github md format.'
        if guest is not None:
            msg += f' Include special guest {guest}'
        resp = chat.message(msg)
        text.append(resp)
        if self.include_code:
            msg = f'Explain the code used to resolve the {theme}. Write in fancy github md format.'
        else:
            msg = "Write a conclusion for the above."
        resp = chat.message(msg)
        text.append(resp)
        # Add image
        if random.uniform(0, 1) <= image_chance:
            img_prompt = chat.message("Write a DALL-E image generation prompt for this chapter in less than 1000 characters.").replace("\n", " ")[:1000]
            img = Image.create(img_prompt)["url"]
            # [![name](link to image on GH)](link to your URL)
            img_md = f"![{img_prompt}]({img})\n\n"
            text.insert(0, img_md)
        # Add link to next page
        text.append(f'\n\n[Next Chapter]({chapter_idx+1:02d}_Chapter{chapter_idx+1:02d}.md)')
        return '\n'.join(text)

    def write_book(self, book):
        topic_normal = self.topic.translate(str.maketrans('', '', string.punctuation+' '))
        outdir = f'/home/jong/bookout/{topic_normal}/'
        os.makedirs(outdir, exist_ok=True)
        for i, page in enumerate(book):
            page_title = f'{i:02d}_Chapter{i:02d}.md'
            with open(outdir+page_title, 'w') as f:
                f.write(page)
        self.publish_book(outdir, topic_normal)

    def publish_book(self, bookdir, topic_normal):
        subprocess.check_output(f"gh repo create --public EBookGPT/{topic_normal}", cwd=bookdir, shell=True)
        subprocess.check_output(f"git init", cwd=bookdir, shell=True)
        subprocess.check_output(f"ln -sfn 00* README.md", cwd=bookdir, shell=True)
        subprocess.check_output(f"git add .", cwd=bookdir, shell=True)
        subprocess.check_output(f"git remote add origin git@github.com:EBookGPT/{topic_normal}.git", cwd=bookdir, shell=True)
        subprocess.check_output(f"git commit -am 'Book' && git checkout -b main && git push origin main -u", cwd=bookdir, shell=True)

    def make_cover(self):
        return Image.create(f"""{self.topic}, 4k Award Winning, Concept, Digital Art""")["b64_json"]
        
    def run(self, nthreads=None):
        # get chapters of book
        chapters = self.get_chapters()
        # Add conclusion/summary
        chapters.append(f'{len(chapters)+1}. Conclusion of {" ".join(chapters)}')
        # Write chapters
        text = [None] * len(chapters)
        with concurrent.futures.ThreadPoolExecutor(max_workers=nthreads or len(chapters)) as thread_pool:
            tasks = {}
            for i, prev_chapter, curr_chapter in zip(range(1_0000), [None] + chapters, chapters):
                tasks[thread_pool.submit(self.write_chapter, prev_chapter, curr_chapter, i+1)] = i
            for future in concurrent.futures.as_completed(tasks):
                idx = tasks[future]
                text[idx] = future.result()
        book = ['Table Of Contents:\n\n'+'\n'.join(chapters[:-1] + [f"{len(chapters)}. Conclusion"])] + text
        try:
            self.write_book(book)
        except Exception as e:
            logger.exception(e)
        return book

In [231]:
topic = "AI Alignment"
nchapters = 8
include_code = True
theme = None
theme_extra = None
author_style = "Arthur Conan Doyle"
writer = EBookWriter(topic, nchapters, include_code, theme, theme_extra, author_style)
book = writer.run()

2023-04-13 18:41:08 [info     ] requesting openai.Chat...
2023-04-13 18:41:11 [info     ] received openai.Chat...
2023-04-13 18:41:11 [info     ] requesting openai.Chat...
2023-04-13 18:41:11 [info     ] requesting openai.Chat...
2023-04-13 18:41:11 [info     ] requesting openai.Chat...
2023-04-13 18:41:11 [info     ] requesting openai.Chat...
2023-04-13 18:41:11 [info     ] requesting openai.Chat...
2023-04-13 18:41:11 [info     ] requesting openai.Chat...
2023-04-13 18:41:11 [info     ] requesting openai.Chat...
2023-04-13 18:41:11 [info     ] requesting openai.Chat...
2023-04-13 18:41:11 [info     ] requesting openai.Chat...
2023-04-13 18:41:11 [info     ] received openai.Chat...
2023-04-13 18:41:11 [info     ] requesting openai.Chat...
2023-04-13 18:41:11 [info     ] received openai.Chat...
2023-04-13 18:41:11 [info     ] received openai.Chat...
2023-04-13 18:41:11 [info     ] requesting openai.Chat...
2023-04-13 18:41:11 [info     ] requesting openai.Chat...
2023-04-13 18:41:11 [i

Switched to a new branch 'main'
To github.com:EBookGPT/AIAlignment.git
 * [new branch]      main -> main


In [222]:
# TODO: Cover Art from Dall-e
# TODO: LeanPub integration

In [232]:
e = EBookWriter('Fine Tuning Large Language Models in PyTorch', include_code=True, nchapters=16)
book = e.run()
# IPython.display.HTML(f'<img src="data:image/png;base64,{e.make_cover()}" />')

2023-04-13 18:43:27 [info     ] requesting openai.Chat...
2023-04-13 18:43:29 [info     ] requesting openai.Chat...
2023-04-13 18:43:37 [info     ] received openai.Chat...
2023-04-13 18:43:37 [info     ] requesting openai.Chat...
2023-04-13 18:43:37 [info     ] requesting openai.Chat...
2023-04-13 18:43:37 [info     ] requesting openai.Chat...
2023-04-13 18:43:37 [info     ] requesting openai.Chat...
2023-04-13 18:43:37 [info     ] requesting openai.Chat...
2023-04-13 18:43:37 [info     ] requesting openai.Chat...
2023-04-13 18:43:37 [info     ] requesting openai.Chat...
2023-04-13 18:43:37 [info     ] requesting openai.Chat...
2023-04-13 18:43:37 [info     ] requesting openai.Chat...
2023-04-13 18:43:37 [info     ] requesting openai.Chat...
2023-04-13 18:43:37 [info     ] requesting openai.Chat...
2023-04-13 18:43:37 [info     ] requesting openai.Chat...
2023-04-13 18:43:37 [info     ] requesting openai.Chat...
2023-04-13 18:43:37 [info     ] requesting openai.Chat...
2023-04-13 18:43

Switched to a new branch 'main'
To github.com:EBookGPT/FineTuningLargeLanguageModelsinPyTorch.git
 * [new branch]      main -> main
