# Idea is to take a paper / pdf and convert it to a lecture that explains it intuitively.

### Steps:
1. Download PDF and get text
2. Ask ChatGPT what concepts (in order) need to be understood to understand the paper
3. Create video Script
4. Create video images
5. Add audio on top of images
6. Join and done

### What to display in video?
* Ask Chat for slide text (md format or something) given a paragraph

### Video Outline:
1. Concepts / Building blocks
2. Paper summary
3. Each part of the paper (ask Chat for parts)
4. Conclusion and Implications

In [1]:
from ChatPodcastGPT import Chat, PodcastChat, OpenAITTS
import collections
import concurrent.futures
import os
import feedparser
import structlog
import itertools
import enum
import io
import re
import json
import tempfile
import PyPDF2
import sys
from bs4 import BeautifulSoup
import requests
import retrying
import openai
import random
import IPython.display
import datetime
import PIL
import PIL.Image
import PIL.ImageDraw
import PIL.ImageFont
import base64
from pydub import AudioSegment
import tempfile
from moviepy.editor import ImageClip, concatenate_videoclips, AudioFileClip
import numpy as np
import io
import subprocess
import os
import functools
import logging
import librosa
import soundfile as sf
import threading
import traceback
import inspect


# MODEL = 'gpt-3.5-turbo-16k'
MODEL = 'gpt-4-1106-preview'
MAX_TOKENS = 120_000
# MAX_TOKENS = 2_000
JOIN_NUM_DEFAULT = 300
SPEAKER_NAMES = ['Alfred', 'Alice']
SPEAKER_VOICES = [OpenAITTS(OpenAITTS.MAN), OpenAITTS(OpenAITTS.WOMAN)]
MAX_WORKERS = 4
flatten_list = lambda a: list(itertools.chain(*[x for x in a]))

In [2]:
def add_thread_id(logger, log_method, event_dict):
    """
    Add the thread ID to the event_dict if the current execution is within a thread.
    """
    if threading.current_thread() != threading.main_thread():
        event_dict["thread_id"] = threading.get_ident()
        event_dict["thread_name"] = threading.current_thread().name
    else:
        event_dict["thread_id"] = "[Main]"
    return event_dict

# Configure structlog
structlog.configure(
    processors=[
        structlog.stdlib.add_log_level,
        structlog.stdlib.add_logger_name,
        add_thread_id,
        structlog.stdlib.PositionalArgumentsFormatter(),
        structlog.processors.TimeStamper(fmt="iso"),
        structlog.dev.ConsoleRenderer()
    ],
    context_class=dict,
    logger_factory=structlog.stdlib.LoggerFactory(),
    wrapper_class=structlog.stdlib.BoundLogger,
    cache_logger_on_first_use=True,
)
logging.getLogger('moviepy').setLevel(logging.CRITICAL)
logging.basicConfig(stream=sys.stdout, format="%(message)s", level=logging.INFO)
logger = structlog.get_logger(level=logging.INFO)

In [3]:
logger.info("hello")

[2m2023-11-10T16:25:23.712777Z[0m [[32m[1minfo     [0m] [1mhello                         [0m [[34m[1m__main__[0m] [36mthread_id[0m=[35m[Main][0m


In [4]:
def log_exception(exception, func_name):
    """Log the exception with its stack trace and the function name."""
    stack_trace = traceback.format_exc()
    logger.error(f"Exception in {func_name}: {exception}\n{stack_trace}")
    return True

def retry_with_logging(stop_max_attempt_number=5, wait_fixed=2000):
    def decorator(func):
        @retrying.retry(stop_max_attempt_number=stop_max_attempt_number, wait_fixed=wait_fixed,
               retry_on_exception=lambda exception: log_exception(exception, func.__name__))
        def wrapper(*args, **kwargs):
            return func(*args, **kwargs)
        return wrapper
    return decorator

## 1. PDF to Text

In [5]:
def extract_text_from_pdf(pdf_path):
    with open(pdf_path, 'rb') as pdf_file:
        pdf = PyPDF2.PdfReader(pdf_file)

        text = ''
        for page_number in range(len(pdf.pages)):
            page = pdf.pages[page_number]
            text += page.extract_text()

        return text

In [6]:
# paper_path = '/Users/jong/Downloads/covid_garbage.pdf'
# paper_text = extract_text_from_pdf(paper_path)
# len(paper_text), paper_text[:1000]

In [7]:
def text_into_token_chunks(text, max_tokens=MAX_TOKENS, smoothing=0):
    """Split the text into parts based on tokens."""
    sentences = text.replace('\n', '').split(".")
    all_parts = []
    current_part = []
    for sentence in sentences:
        current_part.append(sentence + '.')
        if Chat.num_tokens_from_text(' '.join(current_part)) > max_tokens:
            part_text = ' '.join(current_part[:-1])
            all_parts.append(part_text)
            current_part = current_part[-(smoothing+1):]

    if current_part:
        all_parts.append(' '.join(current_part[:-1]))
    return all_parts

In [8]:
# paper_sections = text_into_token_chunks(paper_text, smoothing=3)
# len(paper_sections)

# 2. Concepts needed for understanding

In [9]:
def help_load_json(txt):
    if txt.startswith("```json\n"):
        return txt.split("```json\n", maxsplit=1)[1].rsplit("```")[0]
    return txt

@retry_with_logging()
def get_concepts(paper_section):
    chat = Chat('''Given some text from a scientific journal, return a JSON formatted list containing a few prerequisite concepts needed for understanding the paper.
Respond only a JSON list and nothing else.'''.replace('\n', ' '), max_length=MAX_TOKENS)
    resp = chat.message(paper_section, model=MODEL)
    try:
        data = json.loads(help_load_json(resp))
        assert isinstance(data, list)
    except:
        logger.critical(f"get_concepts Cannot parse resp: {resp}")
        raise
    return data

@retry_with_logging()
def merge_concepts(concepts):
    if len(concepts) <= 6:
        return concepts
    chat = Chat('''Given a list of concepts needed to understand a paper, reduce them to just 5 or fewer prerequisite concepts.
Only respond as a valid JSON list, and nothing else. Order the list from least to most complex.'''.replace('\n', ' '))
    resp = chat.message(str(concepts), model=MODEL)
    try:
        data = json.loads(help_load_json(resp))
        assert isinstance(data, list)
    except:
        logger.critical(f"merge_concepts Cannot parse resp: {resp}")
        raise
    assert isinstance(data, list)
    return data

def get_all_concepts(paper_sections):
    with concurrent.futures.ThreadPoolExecutor(max_workers=max(MAX_WORKERS//4, 1)) as tpe:
        concepts = [
            concept
            for concepts in tpe.map(get_concepts, paper_sections)
            for concept in concepts
        ]
    return merge_concepts(concepts)

In [10]:
# paper_prereqs = get_all_concepts(paper_sections)
# paper_prereqs

# 3. Create Video Script

In [11]:
# Create video script
def get_script_for_concepts(concepts):
    chat = Chat(f'''Given the following prerequisite concepts needed to understand a scientific paper, write a script for a video that explains them in an intuitive way.
Assume there's two speakers, {' and '.join(SPEAKER_NAMES)}.
Prefix each character's lines with their name and a :, like the following.
{SPEAKER_NAMES[0]}: Hello everyone.
{SPEAKER_NAMES[1]}: Indeed, hello!
Do not include any other script syntax.
Do not include a conclusion.'''.replace('\n', ' '), max_length=MAX_TOKENS)
    text = chat.message(str(concepts), model=MODEL)
    return text

def get_script_for_paper_section(paper_section):
    chat = Chat(f'''Given the following section of a scientific paper, write an educational script for a video that explains this in an intuitive way.
Assume there's two speakers, {' and '.join(SPEAKER_NAMES)}.
Prefix each character's lines with their name and a :, like the following.
{SPEAKER_NAMES[0]}: Hello everyone.
{SPEAKER_NAMES[1]}: Indeed, hello!
Do not include any other script syntax.'''.replace('\n', ' '), max_length=MAX_TOKENS)
    text = chat.message(str(paper_section), model=MODEL)
    return text

def get_entire_script(paper_prereqs, paper_sections, consolidate=False):
    all_scripts = [None] * (1+len(paper_sections))
    with concurrent.futures.ThreadPoolExecutor(max_workers=max(MAX_WORKERS//2, 1)) as tpe:
        runs = []
        if paper_prereqs:
            runs.append(tpe.submit(get_script_for_concepts, paper_prereqs))
        runs.extend([tpe.submit(get_script_for_paper_section, section) for section in paper_sections])
        for i, r in enumerate(concurrent.futures.as_completed(runs)):
            ridx = runs.index(r)
            all_scripts[ridx] = r.result()
            logger.info(f'Done with {i} / {len(runs)}')
    if consolidate:
        all_scripts = flatten_list(all_scripts)
        chat = Chat(f'''Consolidate the following scripts that go over a scientific paper in an intuitive way.
Make it less redundant, more fun, and only include one intro and outro.
Assume there's two speakers, {' and '.join(SPEAKER_NAMES)}.
Prefix each character's lines with their name and a :, like the following.
{SPEAKER_NAMES[0]}: Hello everyone.
{SPEAKER_NAMES[1]}: Indeed, hello!
Do not include any other script syntax.'''.replace('\n', ' '), max_length=MAX_TOKENS)
        text = chat.message(str(all_scripts), model=MODEL)
        all_scripts = [text]
    return all_scripts

In [12]:
# paper_script = get_entire_script(paper_prereqs, paper_sections)
# len(paper_script)

# 4. Video images

In [13]:
import time
import threading

class RateLimited:
    def __init__(self, max_per_minute):
        self.max_per_minute = max_per_minute
        self.current_minute = time.strftime('%M')
        self.lock = threading.Lock()
        self.calls = 0

    def __call__(self, fn):
        def wrapper(*args, **kwargs):
            run = False
            with self.lock:
                current_minute = time.strftime('%M')
                if current_minute != self.current_minute:
                    self.current_minute = current_minute
                    self.calls = 0
                if self.calls < self.max_per_minute:
                    self.calls += 1
                    run = True
            if run:
                return fn(*args, **kwargs)
            else:
                time.sleep(15)
                return wrapper(*args, **kwargs)
                    
        return wrapper

In [50]:
class AIImage:
    class Size(enum.Enum):
        LARGE = "1024x1024"
        LONG  = "1792x1024"

    @classmethod
    @RateLimited(12)
    @retry_with_logging()
    def create(cls, prompt, n=1, size=Size.LARGE):
        logger.info(f'asking openai.image {prompt}')
        resp = openai.OpenAI(api_key=openai.api_key).images.generate(prompt=prompt, n=n, size=size.value, model="dall-e-3", response_format='b64_json', timeout=45)
        logger.info('received openai.Image...')
        return resp.data[0].b64_json

In [15]:
# PIL.Image.open(io.BytesIO(base64.b64decode(AIImage.create('cute snail on a park bench'))))

In [16]:
@retry_with_logging()
def get_image_from_text(sentence):
    chat = Chat(f'''Given
the following sentence in a script, write a concise description of an image to display while this script is read.
Only write the short description and nothing else.
Do not include specific numbers or the character names.'''.replace('\n', ' '), max_length=MAX_TOKENS)
    prompt = chat.message(sentence, model=MODEL)
    img = AIImage.create(prompt)
    return sentence, img, prompt

def get_images_from_text(text):
    sentences = text.split('\n')

    with concurrent.futures.ThreadPoolExecutor(max_workers=max(MAX_WORKERS//2, 1)) as tpe:
        runs = []
        for sentence in sentences:
            if not sentence:
                continue
            runs.append(tpe.submit(get_image_from_text, sentence))
        images = [None] * len(runs)
        for r in concurrent.futures.as_completed(runs):
            ridx = runs.index(r)
            images[ridx] = r.result()
    return images

In [17]:
# images = get_images_from_text(paper_prereqs_script)
# len(images)

In [18]:
# for txt, img, prompt in images:
#     img = PIL.Image.open(io.BytesIO(base64.b64decode(img["b64_json"])))
#     IPython.display.display(txt)
#     IPython.display.display(prompt)
#     IPython.display.display(img)

# 5. Audio: Script to Speech

In [19]:
def speaker_sentence(sentence, names):
    for name in names:
        if sentence.startswith(f'{name}:'):
            return name, sentence[len(f'{name}:')+1:]
    return names[0], sentence

def script2speech(sentences, names, voices):
    with concurrent.futures.ThreadPoolExecutor(max_workers=MAX_WORKERS) as tpe:
        jobs = []
        for sentence in sentences:
            speaker, sentence = speaker_sentence(sentence, names)
            jobs.append(tpe.submit(voices[names.index(speaker)].tts, sentence))
        audios = [b''] * len(jobs)
        for future in concurrent.futures.as_completed(jobs):
            idx = jobs.index(future)
            audios[idx] = future.result()
    return audios

In [20]:
# audios = script2speech([x[0] for x in images], SPEAKER_NAMES, SPEAKER_VOICES)

In [21]:
# IPython.display.Audio(audios[2])

# 6. Join audio and video

In [22]:
def concatenate_videos(directory, output_file):
    # get list of video files in directory
    files = sorted([f for f in os.listdir(directory) if f.endswith(".mp4")], key=lambda x: int(x.split('.mp4')[0].split('_')[1]))

    # create a file that contains the list of all video files
    filenames_f = f'{directory}/_files.txt'
    with open(filenames_f, 'w') as f:
        for video_file in files:
            f.write(f"file '{directory}/{video_file}'\n")

    # concatenate all videos using FFmpeg
    command = f"ffmpeg -f concat -safe 0 -i {filenames_f} -c copy {output_file}"
    print(command)
    try:
        os.remove(output_file)
    except:
        pass
    subprocess.check_call(command, shell=True, stderr=subprocess.DEVNULL)


def process_one_clip(tmpdir, i, img, audio):
    # Convert audio bytes to pydub's AudioSegment
    # audio_segment = AudioSegment.from_file(io.BytesIO(audio))
    audio_path = f'{tmpdir}/audio_{i}.mp3'
    with open(audio_path, 'wb') as f:
        f.write(audio)
    # audio_segment.export(audio_path)
    audio_segment = AudioSegment.from_file(audio_path)

    # Create an ImageClip for this image and audio, with duration matching the audio
    duration = len(audio_segment) / 1000.0  # AudioSegment.length is in milliseconds
    # Convert PIL Image to numpy array
    np_image = np.array(img)
    video_clip = ImageClip(np_image, duration=duration)
    video_clip.fps = 30
    video_clip = video_clip.set_audio(AudioFileClip(audio_path))
    video_clip.write_videofile(
        f"{tmpdir}/clip_{i:0>3}.mp4", codec='libx264', audio_codec='aac',
        temp_audiofile=f'temp-audio-{i}.m4a', remove_temp=True,
        verbose=False, logger=None,
    )

def create_video(images, audios, outpath):
    with tempfile.TemporaryDirectory() as tmpdir:
        process_one = functools.partial(process_one_clip, tmpdir)
        with concurrent.futures.ThreadPoolExecutor(max_workers=MAX_WORKERS) as tpe:
            for i, _ in enumerate(tpe.map(process_one, range(len(images)), images, audios)):
                logger.info(f'Done with {i} / {len(images)}')
        # Concatenate all video clips
        concatenate_videos(tmpdir, outpath)

In [23]:
# outpath = '/Users/jong/Downloads/Cell_20230725/final_video.mp4'
# create_video([PIL.Image.open(io.BytesIO(base64.b64decode(img[1]["b64_json"]))) for img in images], audios, outpath)

In [24]:
class Runner:
    def __init__(self, paper_path, outpath, skip_prereqs=False):
        self.paper_path = paper_path
        self.outpath = outpath
        self.skip_prereqs = skip_prereqs

    def run(self, script=None):
        if script is None:
            self.paper_text = extract_text_from_pdf(self.paper_path)
            self.paper_sections = text_into_token_chunks(self.paper_text, smoothing=3)
            if self.skip_prereqs:
                self.paper_prereqs = None
            else:
                self.paper_prereqs = get_all_concepts(self.paper_sections)
            self.paper_script = get_entire_script(self.paper_prereqs, self.paper_sections)
        else:
            self.paper_script = script
    
        def process_one_part(script):
            images = get_images_from_text(script)
            audios = script2speech([x[0] for x in images], SPEAKER_NAMES, SPEAKER_VOICES)
            return images, audios

        with concurrent.futures.ThreadPoolExecutor(max_workers=MAX_WORKERS) as tpe:
            runs = [tpe.submit(process_one_part, script) for script in self.paper_script if script]
            images, audios = [None] * len(runs), [None] * len(runs)
            for i, r in enumerate(concurrent.futures.as_completed(runs)):
                ridx = runs.index(r)
                imgs, auds = r.result()
                images[ridx], audios[ridx] = imgs, auds
                logger.info(f'Got images and audio for {i} / {len(runs)}')
    
        self.images = flatten_list(images)
        self.audios = flatten_list(audios)
        create_video([PIL.Image.open(io.BytesIO(base64.b64decode(img))) for _txt, img, _prompt in self.images], self.audios, self.outpath)

In [47]:
%%time
SPEAKER_NAMES = ['Jerry', 'George'][::-1]
adjectives = 'intuitive, educational, and funny'
style = ' in the style of a hilarious conversation between Jerry Seinfeld and George Costanza'
title = """Title: Navigating MLR Model Evaluation - Mastering Hypothesis Tests in Multiple Regression!
Outline:
Introduction
Kickoff Remarks
Introduction to the importance of model evaluation in Multiple Linear Regression (MLR).
Overview of the different types of hypothesis tests in MLR.
Section 1: Hypothesis Testing in MLR
Basics of Hypothesis Testing
Refreshing the concept of hypothesis testing in statistical analysis.
Transition from Simple Linear Regression to MLR in the context of hypothesis testing.
Three Types of Hypothesis Tests in MLR
Test for a single slope parameter being 0.
Test for all slope parameters being 0.
Test for a subset of slope parameters being 0.
Section 2: The General Linear F-Test
Understanding the General Linear F-Test
Introduction and explanation of the General Linear F-test.
Its role and significance in performing hypothesis tests in MLR.
Applying the F-Test in MLR
Step-by-step guide on conducting the General Linear F-test.
Interpreting the results of the F-test in the context of MLR.
Section 3: Sequential Sums of Squares
Exploring Sequential Sums of Squares
What are Sequential Sums of Squares and their relevance in MLR?
How Sequential Sums of Squares tie into hypothesis testing.
Computational Approach
Demonstration of calculating Sequential Sums of Squares.
Practical examples and case studies.
Section 4: Performing Hypothesis Tests in MLR
Detailed Walkthrough
Step-by-step instructions on performing each of the three types of hypothesis tests in MLR.
Using software tools and coding examples for conducting these tests.
Interpretation of Test Results
How to interpret the outcomes of these hypothesis tests.
Understanding the implications of these tests for MLR model evaluation.
Section 5: Practical Application and Common Missteps
Real-World Applications
Applying these tests in different data scenarios.
Case studies showcasing the use of hypothesis testing in MLR.
Avoiding Pitfalls
Common errors to avoid in MLR hypothesis testing.
Best practices for robust and accurate model evaluation.
Conclusion
Summing Up the Lesson
Recap of the key points and their practical importance.
Preview of what's next in the course.
"""
chat = Chat(f'''Turn a video's info into a long script{style}.
Make it {adjectives}.
There are two speakers, {' and '.join(SPEAKER_NAMES)}.
Prefix each speaker's lines with their name and a :, like the following.
{SPEAKER_NAMES[0]}: 
{SPEAKER_NAMES[1]}: 
Do not include any other script syntax.'''.replace('\n', ' '), max_length=MAX_TOKENS)
text = chat.message(
    title,
    model=MODEL,
)
outpath = '/Users/jong/Downloads/stats06_20231109.mp4'
MAX_WORKERS = 8
runner = Runner(None, outpath, skip_prereqs=True)
runner.run(script=[x for x in text.split('\n') if x])

[2m2023-11-10T21:08:42.060322Z[0m [[32m[1minfo     [0m] [1mrequesting openai...          [0m [[34m[1mChatPodcastGPT[0m] [36mthread_id[0m=[35m[Main][0m
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
[2m2023-11-10T21:09:02.350873Z[0m [[32m[1minfo     [0m] [1mreceived openai...            [0m [[34m[1mChatPodcastGPT[0m] [36mthread_id[0m=[35m[Main][0m
[2m2023-11-10T21:09:02.364563Z[0m [[32m[1minfo     [0m] [1mrequesting openai...          [0m [[34m[1mChatPodcastGPT[0m] [36mthread_id[0m=[35m123145683095552[0m [36mthread_name[0m=[35mThreadPoolExecutor-250_0[0m
[2m2023-11-10T21:09:02.366536Z[0m [[32m[1minfo     [0m] [1mrequesting openai...          [0m [[34m[1mChatPodcastGPT[0m] [36mthread_id[0m=[35m123145699885056[0m [36mthread_name[0m=[35mThreadPoolExecutor-247_0[0m
[2m2023-11-10T21:09:02.367279Z[0m [[32m[1minfo     [0m] [1mrequesting openai...          [0m [[34m[1mChatPodcastGPT[0m] [

In [None]:
print("\n".join([t.split(': ', maxsplit=1)[1] for t, *_ in runner.images]))

In [48]:
# %%time
# # paper_path = '/Users/jong/Downloads/covid_garbage.pdf'
# paper_path  = '/Users/jong/Downloads/2311.02745.pdf'
# outpath = (paper_path[:-4] + '.mp4').replace(' ', '_')
# MAX_WORKERS = 8
# runner = Runner(paper_path, outpath, skip_prereqs=True)
# runner.run()

In [54]:
# for txt, img, prompt in runner.images:
#     img = PIL.Image.open(io.BytesIO(base64.b64decode(img)))
#     IPython.display.display(txt)
#     IPython.display.display(prompt)
#     IPython.display.display(img)

In [55]:
# create_video([PIL.Image.open(io.BytesIO(base64.b64decode(img))) for _txt, img, _prompt in runner.images], runner.audios, runner.outpath)