In [40]:
import gradio as gr
import re
import feedparser
import requests
from milvus import default_server
from pymilvus import connections, utility
from langchain.chains.summarize import load_summarize_chain
from langchain.document_loaders import PyPDFLoader
from langchain_openai import ChatOpenAI
from langchain.chains.combine_documents.stuff import StuffDocumentsChain
from langchain.chains.llm import LLMChain
from langchain.prompts import PromptTemplate
from langchain_openai import ChatOpenAI
from dotenv import load_dotenv
import os
import requests
from datetime import datetime
from pdf2image import convert_from_path
from openai import OpenAI
import re
import base64
from PIL import Image
from pydub import AudioSegment
from moviepy.editor import ImageSequenceClip, AudioFileClip, concatenate_videoclips
from moviepy.config import change_settings
from moviepy.editor import concatenate_videoclips

In [3]:
change_settings({"FFMPEG_BINARY": "/opt/homebrew/bin/ffmpeg"})
default_server.start()
connections.connect(host="127.0.0.1", port=default_server.listen_port)
port=default_server.listen_port
host="127.0.0.1"
my_uri = "http://localhost:" + str(port)
print(my_uri)

http://localhost:19530


In [51]:
def get_env_variables():
    """Fetch all necessary configurations from environment variables."""
    return {
        'OPENAI_API_KEY': os.getenv('OPENAI_API_KEY'),
        'ELEVEN_LABS_API_KEY': os.getenv('ELEVEN_LABS_API_KEY')
    }

def download_and_save_pdf(url):
    arxiv_id = arxiv_id_from_url(url)
    if arxiv_id:
        try:
            # Make a request to the arXiv API
            feed = feedparser.parse(f'http://export.arxiv.org/api/query?id_list={arxiv_id}')

            # Check if the response contains entries
            if 'entries' in feed:
                # Iterate over each entry (paper) in the feed
                for entry in feed.entries:
                    # Extract the PDF link from the entry
                    pdf_link = entry.link.replace('/abs/', '/pdf/') + '.pdf'

                    # Download the PDF
                    response = requests.get(pdf_link)

                    # Save the PDF in the local directory with the name based on the arXiv ID
                    with open(f'{arxiv_id}.pdf', 'wb') as pdf_file:
                        pdf_file.write(response.content)

                    print(f"PDF downloaded and saved as {arxiv_id}.pdf")
                    return arxiv_id

            else:
                return f"No entries found for arXiv ID {arxiv_id}"

        except Exception as e:
            return f"Error extracting information: {e}"
    else:
        return "Invalid arXiv PDF URL format. Please enter a valid URL."

def arxiv_id_from_url(url):
    # Extract the arXiv ID from the URL using a regular expression
    match = re.search(r'arxiv\.org/pdf/(\d+\.\d+)', url)
    if match:
        return match.group(1)
    else:
        return None

def create_folder(folder_name):
    if not os.path.exists(folder_name):
        os.makedirs(folder_name)
        print(f"The folder '{folder_name}' has been created.")
    else:
        print(f"The folder '{folder_name}' already exists.")


def text_to_speech(text_for_TTS, arxiv_id):
    # Specify the folder name
    folder_name = "audio_voiceover"
    ELEVEN_LABS_API_KEY = os.environ.get("ELEVEN_LABS_API_KEY")

    CHUNK_SIZE = 1024
    url = "https://api.elevenlabs.io/v1/text-to-speech/bVMeCyTHy58xNoL34h3p"

    headers = {
        "Accept": "audio/mpeg",
        "Content-Type": "application/json",
        "xi-api-key": ELEVEN_LABS_API_KEY
    }

    data = {
        "text": text_for_TTS,
        "model_id": "eleven_monolingual_v1",
        "voice_settings": {
            "stability": 0.5,
            "similarity_boost": 0.5
        }
    }

    response = requests.post(url, json=data, headers=headers)

    if response.status_code == 200:
        # Generate a unique filename based on timestamp
        # timestamp = datetime.now().strftime("%Y%m%d_%H-%M-%S")
        filename = f'output_{arxiv_id}.mp3'

        # Save the recording to the unique file
        with open(f"{folder_name}/{filename}", 'wb') as f:
            for chunk in response.iter_content(chunk_size=CHUNK_SIZE):
                if chunk:
                    f.write(chunk)

        print(f"Recording saved in {folder_name}/{filename}")
    else:
        print(f"Error: {response.status_code} - {response.text}")

def convert_pdf_to_png(pdf_path):
    # Create a folder for storing the PNGs
    folder_name = os.path.splitext(os.path.basename(pdf_path))[0] + "_pngs"
    if not os.path.exists(folder_name):
        os.makedirs(folder_name)

    # Convert each page of the PDF to PNG
    images = convert_from_path(pdf_path, output_folder=folder_name)

    # Save each image as a separate PNG file
    for i, image in enumerate(images):
        png_path = os.path.join(folder_name, f"{folder_name}_page_{i + 1}.png")
        image.save(png_path, "PNG")

    print(f"All pages converted and saved in the folder: {folder_name}")

    # Clean up: Delete the .ppm files
    for filename in os.listdir(folder_name):
        if filename.endswith(".ppm"):
            ppm_path = os.path.join(folder_name, filename)
            os.remove(ppm_path)

    print(f".ppm files deleted in the folder: {folder_name}")

# Function to encode the image
def encode_image(image_path):
  with open(image_path, "rb") as image_file:
    return base64.b64encode(image_file.read()).decode('utf-8')

def generate_image_responses(image_folder):
    # Initialize the OpenAI client
    client = OpenAI(api_key=os.environ.get("OPENAI_API_KEY"))

    # List to store messages for the OpenAI API call
    messages=[
    {
      "role": "user",
      "content": [
        {
          "type": "text",
          "text": "If the image has a diagram or visual, output the file name in a list format and whether it is at the top or bottom of the page?",
        },
              ]
    }
    ]

    # Iterate through the images in the folder
    for image_filename in os.listdir(image_folder):
        if image_filename.endswith((".jpg", ".jpeg", ".png")):
            image_path = f"{image_folder}/{image_filename}"  # Replace your_base_url with the actual base URL
            # Getting the base64 string
            base64_image = encode_image(image_path)
            images={
                    "type": "image_url",
                    "image_url": {
                        "url": f"data:image/jpeg;base64,{base64_image}",
                        "detail": "low"},
                }
            # Append the images structure to the content list in the messages dictionary
            messages[0]["content"].append(images)

    # Make the OpenAI API call
    response = client.chat.completions.create(
        model="gpt-4-vision-preview",
        messages=messages,
        max_tokens=100,
    )

    # Print the generated responses
    for choice in response.choices:
        print(choice)

def cut_pngs_in_half(directory_path):
    # Ensure the directory path is valid
    if not os.path.exists(directory_path):
        print(f"Error: Directory '{directory_path}' does not exist.")
        return

    # Get a list of all files in the directory
    files = [f for f in os.listdir(directory_path) if os.path.isfile(os.path.join(directory_path, f))]

    # Process each file in the directory
    for file_name in files:
        # Check if the file is a PNG
        if file_name.lower().endswith('.png'):
            image_path = os.path.join(directory_path, file_name)

            # Open the image
            with Image.open(image_path) as img:
                # Get the dimensions of the image
                width, height = img.size

                # Cut the image in half (top and bottom)
                top_half = img.crop((0, 0, width, height // 2))
                bottom_half = img.crop((0, height // 2, width, height))

                # Save the top and bottom halves with "_cropped_1" and "_cropped_2" suffixes
                top_half.save(os.path.join(directory_path, f"{os.path.splitext(file_name)[0]}_cropped_1.png"), 'PNG')
                bottom_half.save(os.path.join(directory_path, f"{os.path.splitext(file_name)[0]}_cropped_2.png"), 'PNG')

                print(f"Images saved: {file_name}_cropped_1.png (top) and {file_name}_cropped_2.png (bottom)")


def analyze_mp3_length(mp3_path):
    audio = AudioSegment.from_file(mp3_path)
    return len(audio) / 1000.0  # Length in seconds

def fetch_cropped_images(image_folder):
    # List all images in the folder
    all_images = os.listdir(image_folder)
    
    # Identify files to keep (those with the word "cropped" in their filenames)
    cropped_images = [image for image in all_images if image.lower().endswith('.png') and 'cropped' in image.lower()]
    
    # Delete files that do not contain the word "cropped"
    for image in all_images:
        if image not in cropped_images:
            os.remove(os.path.join(image_folder, image))
    
    # List the remaining images after deletion
    remaining_images = os.listdir(image_folder)
    
    # Sort the cropped images based on numeric values in their filenames
    sorted_images = sorted(remaining_images, key=lambda x: int(''.join(filter(str.isdigit, x))))
    return sorted_images

def create_video_with_audio(mp3_path, image_folder, output_path):
    # Sort the images in alphanumeric order
    image_files = sorted(os.listdir(image_folder))
    audio_clip = AudioFileClip(mp3_path)   
    
    # Calculate the duration of each image based on the total duration of the audio and number of images
    image_duration = audio_clip.duration / len(image_files)
    
    clips = []
    
    for idx, image_file in enumerate(image_files):
        # Load each image and set its duration
        image_path = os.path.join(image_folder, image_file)
        image_clip = ImageSequenceClip([image_path], fps=24).set_duration(image_duration)
        
        # Add the image clip to the list of clips
        clips.append(image_clip)
    
    # Concatenate the image clips to create the final video
    final_clip = concatenate_videoclips(clips, method="compose")
    final_clip = final_clip.set_audio(audio_clip)
    
    # Write the final video with audio
    final_clip.write_videofile(output_path, codec="libx264", audio_codec="aac", fps=24, verbose=False)


In [5]:
# Define prompt
prompt_template = """
You will be provided a research paper and your task is to summarize the research paper into a 5 minute video as follows:
- Outline the key points of the paper
- Edit the outline into a voiceover script for a 5 minute video
- Clearly state why was the research done, what are the technologies that were previously known involved,
how is the technique or actions performed advancing the field, what are the key metrics that define the success of the work 
and what are future directions that lie ahead.
- Do not write any fact which is not present in the paper
- The final script should contain 4000 words

- First, pretend you are a research scientist who has won accolates for being able to explain expert information to a high-schooler and is giving your dissertation defense.
- Write a clearly organized and to-the-point outline summary of the following research:
"{text}",
- The outline should have 3000 words and objectives should be clearly defined for each section of the paper while preserving the specifics address in the technology used or methods tried that have advanced the particular field.
- Introduce the research scientists involved and the institutions involved if known.
- Every single line in the outline should be in complete sentences, talk with dignity and sophistication. 
- Use phrases such as "Our research presents", "This paper details the", do not use words such as realm, or start the sentence with "In the"
- Assume the audience is asking why and how about the reasoning and logic of the content. 
- Use present tense and do not use past tense.
- Do not use phrases such as "x has been discussed, x has been highlighted", be as specific on the details as possible.
- Make sure to answer clearly what is the major contribution of this body of work.
- The outline should answer to the point and in specific detail why was the research done, what are the technologies that were previously known involved,
how is the technique or actions performed advancing the field, what are the key metrics that define the success of the work 
and what are future directions that lie ahead.

- After you have produced the outline, next convert each point in the outline to be one or more complete sentences in third person point of view, going into detail especially
- regarding the technicalities and key concepts of the research. Make sure that it is absolutely clear in specific detail why was the research done, what are the technologies that were previously known involved,
how is the technique or actions performed advancing the field, what are the key metrics that define the success of the work 
and what are future directions that lie ahead.
- Assume the role of the editor of the best ranking tv production company in the world. 
- Format into a script but not screenplay to be broadcasted publicly in a 5 minute production of 4000 words for higher education consumption.
- Introduce yourself to assume the role of a third party and do not assume the time of day, do not say good evening you are not the researcher but you represent
the researcher in advocating for their work. Provide the narration only, do not format as a screenplay.
Spend at least 6 sentences delving deep into the research key findings and evaluation.

- Lastly edit the entire script to make sure that it is obviously stated to the video viewer why was the research done, what are the technologies that were previously known involved,
how is the technique or actions performed advancing the field, what are the key metrics that define the success of the work 
and what are future directions that lie ahead."""

In [38]:
def process_url(url):
    load_dotenv()
    key = get_env_variables()
    
    LLM_NAME = "gpt-3.5-turbo"
    TEMPERATURE = 0.1
    
    arxiv_id = download_and_save_pdf(url)
    
    
    loader = PyPDFLoader(f"{arxiv_id}.pdf")
    docs = loader.load()
    
    num_documents = len(docs)
    print(f"loaded {num_documents} documents")
    
    llm = ChatOpenAI(api_key=key["OPENAI_API_KEY"], temperature=TEMPERATURE, model_name=LLM_NAME)
    
    prompt = PromptTemplate.from_template(prompt_template)
    
    llm_chain = LLMChain(llm=llm, prompt=prompt)
    
    # Define StuffDocumentsChain
    stuff_chain = StuffDocumentsChain(llm_chain=llm_chain, document_variable_name="text")
    
    docs = loader.load()
    print(stuff_chain.run(docs))
    
    # # Specify the folder name
    # folder_name = "audio_voiceover"
    # # Call the function to create the folder
    # create_folder(folder_name)
    
    text_to_speech(stuff_chain.run(docs), arxiv_id)
    
    # Example usage:
    pdf_path = f"{arxiv_id}.pdf"
    convert_pdf_to_png(pdf_path)
    
    image_folder = f"{arxiv_id}_pngs"
    generate_image_responses(image_folder)
    
    # Call the function to cut PNGs in half
    cut_pngs_in_half(image_folder)
    
    output_path = f"final_videos/{arxiv_id}.mp4"  # Update with your desired output path
    # Call the function with the provided paths
    mp3_path = f"audio_voiceover/output_{arxiv_id}.mp3"
    
    create_video_with_audio(mp3_path, image_folder, output_path)
    return output_path

In [34]:
process_url("https://arxiv.org/pdf/2112.12709.pdf")

PDF downloaded and saved as 2112.12709.pdf
loaded 6 documents
Hello, I am here to present a groundbreaking research paper titled "Data-Driven Safety Verification of Stochastic Systems via Barrier Certificates." This paper is authored by Ali Salamati, Abolfazl Lavaei, Sadegh Soudjani, and Majid Zamani from various prestigious institutions such as Ludwig-Maximilians-Universität München, ETH Zurich, Newcastle University, and the University of Colorado Boulder.

The research paper aims to propose a data-driven approach to formally verify the safety of potentially unknown discrete-time continuous-space stochastic systems. The key technology involved in this research is the use of barrier certificates together with data collected from trajectories of unknown systems. This approach advances the field by providing a safety guarantee over unknown stochastic systems with a priori guaranteed confidence, using a finite number of data points.

The main contribution of this work is the development o

                                                                                

MoviePy - Done.
Moviepy - Writing video final_videos/2112.12709.mp4



                                                                                

Moviepy - Done !
Moviepy - video ready final_videos/2112.12709.mp4


'video saved to final_videos/2112.12709.mp4'

In [53]:
# Gradio interface
iface = gr.Interface(
    fn=process_url,
    inputs=gr.Textbox(placeholder="Enter arXiv PDF URL"),
    outputs=gr.Video(),
    live=True,
    theme="sky",
    flagging_options=None,  # Disable the flag button
    title="Arxiv2Video",
)

# Add a submit button
submit_button = gr.Button()
iface.launch(share=True)


Running on local URL:  http://127.0.0.1:7861
Running on public URL: https://ec33e8373a47f55d25.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)




PDF downloaded and saved as 1907.09450.pdf
loaded 7 documents
Good day, esteemed viewers. Today, we delve into a groundbreaking research paper titled "A New Computation Reduction Based Nonlinear Kalman Filter" conducted by M. Behvandi, A.A. Suratgar, and M.A. Khosravi from Amirkabir University of Technology in Tehran, Iran. This research introduces a novel algorithm for nonlinear state estimation that significantly reduces computation costs while maintaining high accuracy compared to existing methods like the Extended Kalman Filter (EKF) and Unscented Kalman Filter (UKF). The key innovation lies in propagating the mean and covariance of the state to a 3rd order Taylor series, resulting in improved accuracy and computational efficiency.

The primary objective of this research was to address the limitations of existing nonlinear state estimation techniques, particularly the EKF and UKF, which are widely used but can be computationally expensive and numerically unstable. By leveraging det

                                                                                

MoviePy - Done.
Moviepy - Writing video final_videos/1907.09450.mp4



t:  27%|███████                   | 1093/3990 [00:21<01:21, 35.54it/s, now=None]Traceback (most recent call last):
  File "/Users/lilysu/anaconda3/envs/condaenv/lib/python3.8/site-packages/moviepy/video/io/ffmpeg_writer.py", line 136, in write_frame
    self.proc.stdin.write(img_array.tobytes())
BrokenPipeError: [Errno 32] Broken pipe

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/Users/lilysu/anaconda3/envs/condaenv/lib/python3.8/site-packages/gradio/queueing.py", line 501, in call_prediction
    output = await route_utils.call_process_api(
  File "/Users/lilysu/anaconda3/envs/condaenv/lib/python3.8/site-packages/gradio/route_utils.py", line 253, in call_process_api
    output = await app.get_blocks().process_api(
  File "/Users/lilysu/anaconda3/envs/condaenv/lib/python3.8/site-packages/gradio/blocks.py", line 1695, in process_api
    result = await self.call_function(
  File "/Users/lilysu/anaconda3/envs/condaenv/lib/

In [52]:
iface.close()

Closing server running on port: 7861
