In [3]:
from moviepy.editor import ImageSequenceClip, AudioFileClip, concatenate_videoclips
from pymilvus import Milvus, MilvusClient, IndexType, connections, utility
from langchain.chains.combine_documents.stuff import StuffDocumentsChain
from pdf2image.exceptions import PDFPageCountError, PDFSyntaxError
from langchain.text_splitter import RecursiveCharacterTextSplitter
from moviepy.editor import concatenate_videoclips, ImageClip
from langchain.chains.summarize import load_summarize_chain
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
from langchain.memory import VectorStoreRetrieverMemory
from sentence_transformers import SentenceTransformer
from langchain.document_loaders import PyPDFLoader
from langchain.prompts import PromptTemplate
from moviepy.config import change_settings
from langchain.chains.llm import LLMChain
from langchain.vectorstores import Milvus
from pdf2image import convert_from_path
from milvus import default_server
from dotenv import load_dotenv
from pydub import AudioSegment
from datetime import datetime
from openai import OpenAI
from PIL import Image
import gradio as gr
import numpy as np
import feedparser
import requests
import imageio
import base64
import pprint
import torch
import re
import os

In [2]:
change_settings({"FFMPEG_BINARY": "/opt/homebrew/bin/ffmpeg", "DYLD_LIBRARY_PATH":"/opt/homebrew/bin/convert"})
# Set up a Milvus client
default_server.start()
host="127.0.0.1"
connections.connect(host=host, port=default_server.listen_port)
port=default_server.listen_port
my_uri = "http://localhost:" + str(port)
print(my_uri)

http://localhost:19531


In [4]:
my_uri="http://localhost:19531"

In [5]:
def get_env_variables():
    """Fetch all necessary configurations from environment variables."""
    return {
        'OPENAI_API_KEY': os.getenv('OPENAI_API_KEY'),
        'ELEVEN_LABS_API_KEY': os.getenv('ELEVEN_LABS_API_KEY')
    }


def create_folder(folder_name):
    if not os.path.exists(folder_name):
        os.makedirs(folder_name)
        print(f"The folder '{folder_name}' has been created.")
    else:
        print(f"The folder '{folder_name}' already exists.")


def arxiv_id_from_url(url):
    # Extract the arXiv ID from the URL using a regular expression
    match = re.search(r'arxiv\.org/pdf/(\d+\.\d+)', url)
    if match:
        return match.group(1)
    else:
        return None
        

def download_and_save_pdf(url, folder_pdfs):
    """
    Download and save a PDF file from an arXiv.org URL into local directory.

    Parameters:
    - url (str): The arXiv.org URL of the paper.

    Returns:
    - str: ArXiv ID of the downloaded paper if successful, or an error message.
    """
    # Extract arXiv ID from the URL
    arxiv_id = arxiv_id_from_url(url)

    arxiv_name = arxiv_id.replace(".", "_")
    pdf_path = os.path.join(folder_pdfs, arxiv_name)
    create_folder(pdf_path)

    # Check if a valid arXiv ID was extracted
    if arxiv_id:
        try:
            # Make a request to the arXiv API
            feed = feedparser.parse(f'http://export.arxiv.org/api/query?id_list={arxiv_id}')

            # Check if the response contains entries
            if 'entries' in feed:
                # Iterate over each entry (paper) in the feed
                for entry in feed.entries:
                    # Extract the PDF link from the entry
                    pdf_link = entry.link.replace('/abs/', '/pdf/') + '.pdf'

                    # Download the PDF
                    response = requests.get(pdf_link)

                    # Save the PDF in the local directory with the name based on the arXiv ID
                    with open(f'{pdf_path}/{arxiv_name}.pdf', 'wb') as pdf_file:
                        pdf_file.write(response.content)

                    print(f"\nPDF downloaded and saved as {arxiv_name}.pdf")
                    return arxiv_id

            else:
                return f"\nNo entries found for arXiv ID {arxiv_id}"

        except Exception as e:
            return f"\nError extracting information: {e}"
    else:
        return "Invalid arXiv PDF URL format. Please enter a valid URL."


def download_and_initialize_embedding_model(model_name="WhereIsAI/UAE-Large-V1", device=None):
    """
    Download and initialize the Sentence Transformer model.

    Parameters:
    - model_name (str): The name of the Sentence Transformer model to download.
    - device (str or torch.device): The device to use for the model (e.g., 'cuda:3' or 'cpu').

    Returns:
    - encoder (SentenceTransformer): The initialized Sentence Transformer model.
    - EMBEDDING_DIM (int): The embedding dimension of the model.
    - MAX_SEQ_LENGTH (int): The maximum sequence length.

    Example usage:
    encoder, EMBEDDING_DIM, max_seq_length = download_and_initialize_embedding_model()
    """
    # Initialize torch settings
    torch.backends.cudnn.deterministic = True
    DEVICE = torch.device(device) if device else torch.device('cuda:3' if torch.cuda.is_available() else 'cpu')
    print(f"\ndevice: {DEVICE}")

    # Load the model from the Hugging Face model hub
    encoder = SentenceTransformer(model_name, device=DEVICE)
    print(f"\nDatatype of SentenceTransformer encoded object{type(encoder)}\n")
    print(f"\nWhat the encoder object looks like: {encoder}\n")

    # Get the model parameters and save for later
    EMBEDDING_DIM = encoder.get_sentence_embedding_dimension()
    MAX_SEQ_LENGTH_IN_TOKENS = encoder.get_max_seq_length()
    # Assume tokens are 3 characters long
    MAX_SEQ_LENGTH = MAX_SEQ_LENGTH_IN_TOKENS * 3
    HF_EOS_TOKEN_LENGTH = 1 * 3
    # Test with 512 sequence length
    MAX_SEQ_LENGTH = MAX_SEQ_LENGTH_IN_TOKENS
    HF_EOS_TOKEN_LENGTH = 1

    # Inspect model parameters
    print(f"\nmodel_name: {model_name}")
    print(f"\nEMBEDDING_DIM: {EMBEDDING_DIM}")
    print(f"\nMAX_SEQ_LENGTH: {MAX_SEQ_LENGTH}")

    return encoder, EMBEDDING_DIM, MAX_SEQ_LENGTH


def create_milvus_collection(COLLECTION_NAME, EMBEDDING_DIM, M=16, uri=my_uri):
    """
    Create a no-schema Milvus collection and define the database index.

    Parameters:
    - uri (str): The URI of the Milvus server.
    - COLLECTION_NAME (str): The name of the Milvus collection.
    - EMBEDDING_DIM (int): The dimension of the embedding vectors.
    - M (int): The maximum number of graph connections per layer for the HNSW index. Default is 16.

    Returns:
    - milvus_client (Milvus): The Milvus client instance.


    Example usage:
    my_uri = "tcp://127.0.0.1:19530"
    COLLECTION_NAME = "MilvusDocs"
    my_EMBEDDING_DIM = 1024
    
    milvus_client = create_milvus_collection(COLLECTION_NAME, EMBEDDING_DIM, M=16, uri=my_uri)
    """
    # Add custom HNSW search index to the collection.
    # M = max number graph connections per layer. Large M = denser graph.
    # Choice of M: 4~64, larger M for larger data and larger embedding lengths.
    # M = 16
    # efConstruction = num_candidate_nearest_neighbors per layer. 
    # Use Rule of thumb: int. 8~512, efConstruction = M * 2.
    efConstruction = M * 2
    index_params = {
        "index_type": IndexType.HNSW,
        "metric_type": "COSINE",
        "params": {"M": M, "efConstruction": efConstruction}
    }

    # Use no-schema Milvus client using flexible json key:value format.
    milvus_client = MilvusClient(uri=my_uri)

    # Check if collection already exists, if so drop it.
    if utility.has_collection(COLLECTION_NAME):
        utility.drop_collection(COLLECTION_NAME)
        print(f"\nCollection had previously been created, dropping previous collection to initialize anew: `{COLLECTION_NAME}`")

    # Create the collection.
    milvus_client.create_collection(COLLECTION_NAME, EMBEDDING_DIM,
                                    consistency_level="Eventually",
                                    auto_id=True,
                                    overwrite=True,
                                    params=index_params)

    print(f"\nSuccessfully created collection: `{COLLECTION_NAME}`")
    print(milvus_client.describe_collection(COLLECTION_NAME))

    return milvus_client


def split_documents_to_chunks(docs, max_seq_length, hf_eos_token_length):
    """
    Split documents into smaller recursive chunks using Sentence Transformers' RecursiveCharacterTextSplitter.

    Parameters:
    - docs (list): List of documents to be split.
    - max_seq_length (int): Maximum sequence length.
    - hf_eos_token_length (int): Length of the EOS token.

    Returns:
    - chunks (list): List of chunks.

    Example usage:
    MAX_SEQ_LENGTH = 512
    HF_EOS_TOKEN_LENGTH = 3
    docs = ["Document 1 text.", "Document 2 text.", "Document 3 text."]
    
    resulting_chunks = split_documents_to_chunks(docs, MAX_SEQ_LENGTH, HF_EOS_TOKEN_LENGTH)
    print(resulting_chunks)
    """
    # Calculate chunk size and overlap
    chunk_size = max_seq_length - hf_eos_token_length
    chunk_overlap = int(round(chunk_size * 0.15, 0))

    # Create an instance of the RecursiveCharacterTextSplitter
    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size=chunk_size,
        chunk_overlap=chunk_overlap,
        length_function=len,
        is_separator_regex=False,
    )

    # Split the documents further into smaller, recursive chunks.
    chunks = text_splitter.split_documents(docs)
    
    return chunks


def insert_chunks_into_milvus(chunks, COLLECTION_NAME, encoder, milvus_client, max_seq_length, hf_eos_token_length):
    """
    Insert document chunks into a Milvus collection.

    Parameters:
    - docs (list): List of documents to be inserted.
    - COLLECTION_NAME (str): Name of the Milvus collection.
    - encoder (SentenceTransformer): SentenceTransformer model for generating embeddings.
    - milvus_client (Milvus): Milvus client instance.
    - max_seq_length (int): Maximum sequence length.
    - hf_eos_token_length (int): Length of the EOS token.

    Returns:
    - insert_time (float): Time taken for the insertion process.

    Example Usage assuming 'chunks' is a list of dictionaries with 'page_content' and 'metadata' keys:
    MAX_SEQ_LENGTH = 512
    HF_EOS_TOKEN_LENGTH = 3
    COLLECTION_NAME = "MilvusDocs"
    ENCODER_MODEL_NAME = "WhereIsAI/UAE-Large-V1"
    # Initialize Milvus client
    # Initialize encoder model
    
    resulting_insert_time = insert_chunks_into_milvus(chunks, COLLECTION_NAME, encoder, milvus_client, MAX_SEQ_LENGTH, HF_EOS_TOKEN_LENGTH)
    """
    # Convert chunks to a list of dictionaries.
    chunk_list = []
    for chunk in chunks:
    
        # Generate embeddings using encoder from HuggingFace.
        embeddings = OpenAIEmbeddings(model="text-embedding-3-large", chunk_size=max_seq_length)
        
        # Assemble embedding vector, original text chunk, metadata.
        # chunk_dict = {
        #     'vector': converted_values,
        #     'chunk': chunk.page_content,
        #     'source': chunk.metadata['page']
        # }
        chunk_list = embeddings.embed_query(chunk)

    # Insert data into the Milvus collection.
    print("Start inserting entities")

    inserted_chunks = milvus_client.insert(
        COLLECTION_NAME,
        data=chunk_list,
        progress_bar=True
    )
    print("Finished inserting entities")

    # After the final entity is inserted, call flush to stop growing segments left in memory.
    utility.drop_collection(COLLECTION_NAME)

    return print(f"\nNumber of chunks inserted into Milvus database: {len(inserted_chunks)} with chunk id starting at number: {inserted_chunks[0]}\n")


def client_assemble_retrieved_context(retrieved_top_k, metadata_fields=[], num_shot_answers=3):
    """ 
    For each question, assemble the context and metadata from the retrieved_top_k chunks.
    retrieved_top_k: list of dicts

    Example Usage:
    formatted_results, context, context_metadata = client_assemble_retrieved_context(results, metadata_fields=metadata_fields, num_shot_answers=top_k)
    """
    # Assemble the context as a stuffed string.
    distances = []
    context = []
    context_metadata = []
    i = 1
    for r in retrieved_top_k[0]:
        distances.append(r['distance'])
        if i <= num_shot_answers:
            if len(metadata_fields) > 0:
                metadata = {}
                for field in metadata_fields:
                    metadata[field] = r['entity'][field]
                context_metadata.append(metadata)
            context.append(r['entity']['chunk'])
        i += 1

    # Assemble formatted results in a zipped list.
    formatted_results = list(zip(distances, context, context_metadata))
    # Return all the things for convenience.
    return formatted_results, context, context_metadata
    

def search_and_generate_response(milvus_client, encoder, COLLECTION_NAME, llm_name, temperature, random_seed, top_k=3, M=16):
    """
    Search Milvus collection for relevant context and generate a response using the OpenAI API.

    Parameters:
    - openai_client (OpenAI): OpenAI client instance.
    - milvus_client (Milvus): Milvus client instance.
    - encoder (SentenceTransformer): SentenceTransformer model for generating embeddings.
    - COLLECTION_NAME (str): Name of the Milvus collection.
    - SAMPLE_QUESTION (str): Sample question for search.
    - llm_name (str): Name of the OpenAI language model.
    - temperature (float): Temperature for response generation.
    - random_seed (int): Random seed for response generation.
    - top_k (int): Top K results to retrieve from Milvus search.
    - M (Milvus): Choice of M: 4~64, larger M for larger data and larger embedding lengths.

    Returns:
    - response_choices (list): List of response choices.

    Example usage:
    
    response_choices = search_and_generate_response(
        milvus_client,
        encoder,
        COLLECTION_NAME,
        LLM_NAME,
        TEMPERATURE,
        RANDOM_SEED
    )
    """
    efConstruction = M * 2
    
    # Return top k results with HNSW index.
    search_params = {"ef": efConstruction}

    # Define output fields to return.
    output_fields = ["source", "chunk"]

    SAMPLE_QUESTION = "What are the key contributions of this paper and the evaluation metrics that prove that this paper advances previously known information?"
    
    # Search Milvus collection
    results = milvus_client.search(
        COLLECTION_NAME,
        data=encoder.encode([SAMPLE_QUESTION]),
        search_params=search_params,
        output_fields=output_fields,
        limit=top_k,
        consistency_level="Eventually"
    )

    # Assemble retrieved context
    metadata_fields = [f for f in output_fields if f != 'chunk']
    formatted_results, context, context_metadata = client_assemble_retrieved_context(results, metadata_fields=metadata_fields, num_shot_answers=top_k)
    
    SYSTEM_PROMPT = f"""Answer in no less than 4000 characters. 
    - It is of utmost importance to use only the information from the Context to answer the user's question. Be clear, factual, complete, concise. Answer the question and follow the instructions to the best of your ability.You will be provided a research paper and your task is to summarize the research paper into a 5 minute video as follows:
    - Create an outline the key points of the paper
    - Clearly state in your outline why was the research done, what are the technologies that were previously known involved,
    how is the technique or actions performed advancing the field, what are the key metrics that define the success of the work 
    and what are future directions that lie ahead.
    - Do not write any fact which is not present in the paper
    
    - Write a clearly organized and to-the-point outline summary of the following research:,
    - The outline should have 3000 words and objectives should be clearly defined for each section of the paper while preserving the specifics address in the technology used or methods tried that have advanced the particular field.
    - Introduce the research scientists involved and the institutions involved if known.
    - Every single line in the outline should be in complete sentences, talk with dignity and sophistication. 
    - Use phrases such as "Our research presents", "This paper details the", do not use words such as realm, or start the sentence with "In the"
    - Assume the audience is asking why and how about the reasoning and logic of the content. 
    - Use present tense and do not use past tense.
    - Do not use phrases such as "x has been discussed, x has been highlighted", be as specific on the details as possible.
    - Make sure to answer clearly what is the major contribution of this body of work.
    - The outline should answer to the point and in specific detail why was the research done, what are the technologies that were previously known involved,
    how is the technique or actions performed advancing the field, what are the key metrics that define the success of the work 
    and what are future directions that lie ahead.
    
    - After you have produced the outline, next convert each point in the outline to be one or more complete sentences in third person point of view, going into detail especially regarding the technicalities and key concepts of the research. Make sure that it is absolutely clear in specific detail why was the research done, what are the technologies that were previously known involved,
    how is the technique or actions performed advancing the field, what are the key metrics that define the success of the work 
    and what are future directions that lie ahead.
    - Always start by stating the title of the paper as the first few words.
    - First, assume the role of a research scientist who has won accolates for being able to explain expert information to a high-schooler and is giving an overview briefing of a research project.
    - Assume the role of the editor of the best ranking tv production company in the world. 
    - Format into a script but not screenplay to be broadcasted publicly in a 5 minute production of 4000 words for higher education consumption.
    - Introduce yourself to assume the role of a third party and do not assume the time of day, do not say good evening you are not the researcher but you represent
    the researcher in advocating for their work. Provide the narration only, do not format as a screenplay.
    - Spend at least 6 sentences delving deep into the research key findings and evaluation.
    - Do not start a paragraph with "Good day, esteemed viewers."
    
    - Lastly edit the entire script to make sure that it is obviously stated to the video viewer why was the research done, what are the technologies that were previously known involved,
    how is the technique or actions performed advancing the field, what are the key metrics that define the success of the work 
    and what are future directions that lie ahead. Cite the grounding sources.
    Context: {context}
    Answer: The answer to the question in no less than 4000 characters in complete sentences as a narration. Do not pretend to be the author, just an instructor.
    Grounding sources: {context_metadata[1]}
    """
    
    # Also create a template for function calls later.
    SYSTEM_PROMPT_TEMPLATE = f"""Answer in no less than 4000 characters. 
    - It is of utmost importance to use only the information from the Context to answer the user's question. Be clear, factual, complete, concise. Answer the question and follow the instructions to the best of your ability.You will be provided a research paper and your task is to summarize the research paper into a 5 minute video as follows:
    - Create an outline the key points of the paper
    - Clearly state in your outline why was the research done, what are the technologies that were previously known involved,
    how is the technique or actions performed advancing the field, what are the key metrics that define the success of the work 
    and what are future directions that lie ahead.
    - Do not write any fact which is not present in the paper
    
    - Write a clearly organized and to-the-point outline summary of the following research:,
    - The outline should have 3000 words and objectives should be clearly defined for each section of the paper while preserving the specifics address in the technology used or methods tried that have advanced the particular field.
    - Introduce the research scientists involved and the institutions involved if known.
    - Every single line in the outline should be in complete sentences, talk with dignity and sophistication. 
    - Use phrases such as "Our research presents", "This paper details the", do not use words such as realm, or start the sentence with "In the"
    - Assume the audience is asking why and how about the reasoning and logic of the content. 
    - Use present tense and do not use past tense.
    - Do not use phrases such as "x has been discussed, x has been highlighted", be as specific on the details as possible.
    - Make sure to answer clearly what is the major contribution of this body of work.
    - The outline should answer to the point and in specific detail why was the research done, what are the technologies that were previously known involved,
    how is the technique or actions performed advancing the field, what are the key metrics that define the success of the work 
    and what are future directions that lie ahead.
    
    - After you have produced the outline, next convert each point in the outline to be one or more complete sentences in third person point of view, going into detail especially regarding the technicalities and key concepts of the research. Make sure that it is absolutely clear in specific detail why was the research done, what are the technologies that were previously known involved,
    how is the technique or actions performed advancing the field, what are the key metrics that define the success of the work 
    and what are future directions that lie ahead.
    - Always start by stating the title of the paper as the first few words.
    - First, assume the role of a research scientist who has won accolates for being able to explain expert information to a high-schooler and is giving an overview briefing of a research project.
    - Assume the role of the editor of the best ranking tv production company in the world. 
    - Format into a script but not screenplay to be broadcasted publicly in a 5 minute production of 4000 words for higher education consumption.
    - Introduce yourself to assume the role of a third party and do not assume the time of day, do not say good evening you are not the researcher but you represent
    the researcher in advocating for their work. Provide the narration only, do not format as a screenplay.
    - Spend at least 6 sentences delving deep into the research key findings and evaluation.
    - Do not start a paragraph with "Good day, esteemed viewers."
    
    - Lastly edit the entire script to make sure that it is obviously stated to the video viewer why was the research done, what are the technologies that were previously known involved,
    how is the technique or actions performed advancing the field, what are the key metrics that define the success of the work 
    and what are future directions that lie ahead. Cite the grounding sources.
    Context: {context}
    Answer: The answer to the question in no less than 4000 characters in complete sentences as a narration. Do not pretend to be the author, just an instructor.
    Grounding sources: {context_metadata[1]}
    """

    
    load_dotenv()
    key = get_env_variables()
    openai_client = OpenAI(api_key=key["OPENAI_API_KEY"])
    
    # Generate response using the OpenAI API
    response = openai_client.chat.completions.create(
        messages=[
            {"role": "system", "content": SYSTEM_PROMPT,},
            {"role": "user", "content": f"question: {SAMPLE_QUESTION}",}
        ],
        model=llm_name,
        temperature=temperature,
        seed=random_seed,
    )

    # Extract and print the contents of the number one ranked response:
    response_choices = [choice.message.content for choice in response.choices]
    for i, choice in enumerate(response_choices, 1):
        pprint.pprint(f"\nAnswer {i}: {choice}\n")

    return response_choices


def save_transcript(response_choices, folder_transcripts, arxiv_name):
    """
    Save the first element of response_choices into a text file in the specified directory.

    Parameters:
    - response_choices (list): A list of choices where the first element is the transcript content.
    - folder_transcripts (str): The directory path where the transcript file will be saved.
    - arxiv_name (str): The name used for generating the transcript file.

    Returns:
    None: The function saves the transcript content to a text file.

    Example:
    save_transcript(["This is the transcript content."], "transcripts", "example_arxiv")
    """
    # Ensure the directory path is valid
    if not os.path.exists(folder_transcripts):
        os.makedirs(folder_transcripts)

    # Generate the file path
    file_path = os.path.join(folder_transcripts, f"{arxiv_name}.txt")

    # Save response_choices[0] to the file
    with open(file_path, 'w', encoding='utf-8') as file:
        file.write(response_choices[0])

    print(f"\nTranscript saved in: {file_path}")


def text_to_speech(text_for_TTS, arxiv_name, folder_audio):

    ELEVEN_LABS_API_KEY = os.environ.get("ELEVEN_LABS_API_KEY")

    CHUNK_SIZE = 1024
    url = "https://api.elevenlabs.io/v1/text-to-speech/bVMeCyTHy58xNoL34h3p"

    headers = {
        "Accept": "audio/mpeg",
        "Content-Type": "application/json",
        "xi-api-key": ELEVEN_LABS_API_KEY
    }

    data = {
        "text": text_for_TTS,
        "model_id": "eleven_monolingual_v1",
        "voice_settings": {
            "stability": 0.5,
            "similarity_boost": 0.5
        }
    }

    # Generate a unique filename based on timestamp
    filename = f"output_{arxiv_name}.mp3"
    target_path = os.path.join(folder_audio, filename)

    # Check if the file already exists
    if os.path.exists(target_path):
        print(f"Recording file {filename} already exists in {folder_audio}. Skipping download.")
        return target_path

    response = requests.post(url, json=data, headers=headers)

    if response.status_code == 200:
        # Save the recording to the unique file
        with open(target_path, 'wb') as f:
            for chunk in response.iter_content(chunk_size=CHUNK_SIZE):
                if chunk:
                    f.write(chunk)

        print(f"\nRecording saved in {target_path}")
        return target_path
    else:
        print(f"\n Error: {response.status_code} - {response.text}")


def convert_pdf_to_png(folder_images, pdf_file_path, arxiv_name):
    try:
        # Create a folder for storing the PNGs
        sub_folder_name = os.path.splitext(os.path.basename(pdf_file_path))[0] + "_pngs"
        full_path = os.path.join(folder_images, sub_folder_name)
        if not os.path.exists(full_path):
            os.makedirs(full_path)
    
        # Convert each page of the PDF to PNG
        images = convert_from_path(pdf_file_path, output_folder=full_path)
        # arxiv_name = sub_folder_name.replace("_pngs", "")
    
        # Save each image as a separate PNG file
        for i, image in enumerate(images):
            png_path = os.path.join(full_path, f"{arxiv_name}_page_{i + 1}.png")
            image.save(png_path, "PNG")
    
        print(f"\nAll pages converted and saved in the folder: {full_path}")
    
        # Clean up: Delete the .ppm files and uncropped files
        for filename in os.listdir(full_path):
            if filename.endswith(".ppm"):
                file_to_remove_path = os.path.join(full_path, filename)
                os.remove(file_to_remove_path)
    
        print(f"\n.ppm artifacts deleted in the folder: {full_path}")
    except (PDFPageCountError, PDFSyntaxError, PermissionError) as e:
        print(f"\nError: {e}")
        print(f"Skipping processing of {pdf_file_path}")
        if isinstance(e, PdfReadError):
            print("PdfReadError: Unable to read PDF file.")
        elif isinstance(e, PermissionError):
            print("PermissionError: Permission issue while processing the PDF file.")


def cut_pngs_in_half(image_folder):
    # Ensure the directory path is valid
    if not os.path.exists(image_folder):
        print(f"\nError: Directory '{image_folder}' does not exist.")
        return

    # Get a list of all files in the directory
    files = [f for f in os.listdir(image_folder) if os.path.isfile(os.path.join(image_folder, f))]

    # Process each file in the directory
    for file_name in files:
        # Check if the file is a PNG and does not contain 'cropped' in the name
        if file_name.lower().endswith('.png') and 'cropped' not in file_name.lower():
            image_path = os.path.join(image_folder, file_name)

            # Open the image
            with Image.open(image_path) as img:
                # Get the dimensions of the image
                width, height = img.size

                # Cut the image in half (top and bottom)
                top_half = img.crop((0, 0, width, height // 2))
                bottom_half = img.crop((0, height // 2, width, height))

                # Save the top and bottom halves with "_cropped_1" and "_cropped_2" suffixes
                top_half.save(os.path.join(image_folder, f"{os.path.splitext(file_name)[0]}_cropped_1.png"), 'PNG')
                bottom_half.save(os.path.join(image_folder, f"{os.path.splitext(file_name)[0]}_cropped_2.png"), 'PNG')

                print(f"\nImages saved: {file_name}_cropped_1.png (top) and {file_name}_cropped_2.png (bottom)")
        else:
            print(f"\nSkipping processing for {file_name} as it contains 'cropped' in the file name.")


def analyze_mp3_length(mp3_path):
    audio = AudioSegment.from_file(mp3_path)
    return len(audio) / 1000.0  # Length in seconds

def fetch_cropped_images(image_folder):
    # List all images in the folder
    all_images = os.listdir(image_folder)
    
    # Identify files to keep (those with the word "cropped" in their filenames)
    cropped_images = [image for image in all_images if image.lower().endswith('.png') and 'cropped' in image.lower()]
    
    # Delete files that do not contain the word "cropped"
    for image in all_images:
        if image not in cropped_images:
            os.remove(os.path.join(image_folder, image))
    
    # List the remaining images after deletion
    remaining_images = os.listdir(image_folder)
    
    # Sort the cropped images based on numeric values in their filenames
    sorted_images = sorted(remaining_images, key=lambda x: int(''.join(filter(str.isdigit, x))))
    return sorted_images


def move_uncropped_files(image_folder):
    try:
        # Create a new folder if it doesn't exist
        uncropped_folder = os.path.join(image_folder, "uncropped_pngs")
        if not os.path.exists(uncropped_folder):
            os.makedirs(uncropped_folder)

        # Loop through all files in the folder
        for filename in os.listdir(image_folder):
            file_path = os.path.join(image_folder, filename)

            # Check if the file name contains the word "cropped"
            if "cropped" not in filename:
                # Move the file to the uncropped folder
                new_path = os.path.join(uncropped_folder, filename)

                try:
                    shutil.move(file_path, new_path)
                    print(f"File moved to uncropped folder: {filename}")
                except Exception as move_error:
                    print(f"Error moving file {filename}: {move_error}")
                    continue

        print(f"All non-cropped files moved to the folder: {uncropped_folder}")
    except Exception as e:
        print(f"Error: {e}")


def create_video(mp3_path, image_folder, output_path):
    try:
        # Loop through all files in the folder
        for filename in os.listdir(image_folder):
            file_path = os.path.join(image_folder, filename)
            
            # Check if the file name contains the word "cropped"
            if "cropped" not in filename:
                # Remove the file
                os.remove(file_path)
                print(f"File removed: {filename}")
                
        print(f"All non-cropped files removed in the folder: {image_folder}")
    except Exception as e:
        print(f"Error: {e}")
    
    # Sort the images by converting the numeric parts of filenames into integers
    image_files = sorted([file for file in os.listdir(image_folder) if 'cropped' in file and file.lower().endswith('.png')],
                         key=lambda x: [int(part) if part.isdigit() else part for part in re.split(r'(\d+)', x)])
    audio_clip = AudioFileClip(mp3_path)
    
    # Calculate the duration of each image based on the total duration of the audio and the number of images
    image_duration = audio_clip.duration / len(image_files)
    
    clips = []
    
    for idx, image_file in enumerate(image_files):
        # Load each image using imageio
        image_path = os.path.join(image_folder, image_file)
        image = imageio.imread(image_path)
    
        if image.sum() == 0:
            print(f"Image {image_file} is blank. Skipping...")
            os.remove(image_file)
            continue
                
        # Create a clip from the image and set its duration
        image_clip = ImageClip(image).set_duration(image_duration)
    
        # Add the image clip to the list of clips
        clips.append(image_clip)
    
    # Concatenate the image clips to create the final video
    final_clip = concatenate_videoclips(clips, method="compose")
    final_clip = final_clip.set_audio(audio_clip)
    
    # Write the final video with audio
    final_clip.write_videofile(output_path, codec="libx264", audio_codec="aac", fps=24, verbose=True)
    print(f"\nFinal video saved at: {output_path}.")   


In [8]:
url = f"https://arxiv.org/pdf/2402.13254.pdf"

MAX_SEQ_LENGTH = 1000
port="19533"
host="127.0.0.1"

COLLECTION_NAME = "MilvusDocs"
HF_EOS_TOKEN_LENGTH = 3
LLM_NAME = "gpt-3.5-turbo"
TEMPERATURE = 0.1
RANDOM_SEED = 415
M=16

folder_pdfs = "pdfs"
folder_images = "images"
folder_final_videos = "final_videos"
folder_audio = "audio_voiceovers"
folder_transcripts = "transcripts"

# Call the function to create the folder
# create_folder(folder_pdfs)
# create_folder(folder_images)
# create_folder(folder_audio)
# create_folder(folder_final_videos)
# create_folder(folder_transcripts)

# Download and save a PDF file from an arXiv.org URL into local directory.
arxiv_id = download_and_save_pdf(url, folder_pdfs)
arxiv_id

arxiv_name = arxiv_id.replace(".", "_")
pdf_path = os.path.join(folder_pdfs, arxiv_name)
pdf_file_path = os.path.join(pdf_path, f"{arxiv_name}.pdf")
image_folder = f"{folder_images}/{arxiv_name}_pngs" 
mp3_path = f"{folder_audio}/output_{arxiv_name}.mp3"
output_path = f"{folder_final_videos}/{arxiv_name}.mp4" 

# # Download open source embedding model "WhereIsAI/UAE-Large-V1" via Huggingface's Sentence Transformers
# encoder, EMBEDDING_DIM, MAX_SEQ_LENGTH = download_and_initialize_embedding_model()

# Create a no-schema milvus collection and define the database index
# milvus_client = create_milvus_collection(COLLECTION_NAME, EMBEDDING_DIM, M, my_uri)

# Load PDF's into a PDF object using LangChain's PyPDFLoader
loader = PyPDFLoader(f"{pdf_path}/{arxiv_name}.pdf")
docs = loader.load()

# Cut text from PDF's into chunks using LangChain's RecursiveCharacterTextSplitter
chunks = split_documents_to_chunks(docs, MAX_SEQ_LENGTH, HF_EOS_TOKEN_LENGTH)


print("chunks: \n")
for i, chunk in enumerate(chunks):
    pprint.pprint(f"chunk num {1} : \n {chunk}")

# # Convert chunks to a list of dictionaries.
# chunk_list = []
# for chunk in chunks:

# Generate embeddings using encoder from HuggingFace.
embeddings = OpenAIEmbeddings(model="text-embedding-3-large")
    
    # Assemble embedding vector, original text chunk, metadata.
    # chunk_dict = {
    #     'vector': converted_values,
    #     'chunk': chunk.page_content,
    #     'source': chunk.metadata['page']
    # }
    # chunk_list = embeddings.embed_query(chunk)

# Creating new collection in Milvus vector store
vector_store = Milvus(
    embedding_function=embeddings,
    connection_args={"host": host, "port": port},
    collection_name=COLLECTION_NAME,
    drop_old=True,
).from_documents(
    chunks,
    embedding=embeddings,
    collection_name=COLLECTION_NAME,
    connection_args={"host": host, "port": port},
)
    

# Retrieving stored collection from Milvus vector store
vector_store = Milvus(
    embedding_function=embeddings,    
    collection_name=COLLECTION_NAME,
    connection_args={"host": host, "port": port}
)

query = "What is the title of the paper?"

# Similarity search
docs_result = vector_store.similarity_search(query)

print(f"Original source document: {docs_result[0].metadata['source']}")
print(f"Original source document page: {docs_result[0].metadata['page']}")
print("--------------------------------------")
print(docs_result[0].page_content)

# # Insert data into the Milvus collection.
# print("Start inserting entities\n")

# inserted_chunks = milvus_client.insert(
#     COLLECTION_NAME,
#     data=chunk_list,
#     progress_bar=True
# )
# print("Finished inserting entities\n")
# print("inserted_chunks")
# print(inserted_chunks)

# # After the final entity is inserted, call flush to stop growing segments left in memory.
# utility.drop_collection(COLLECTION_NAME)

# print("done")

The folder 'pdfs/2402_13254' already exists.

PDF downloaded and saved as 2402_13254.pdf
chunks: 

('chunk num 1 : \n'
 " page_content='CounterCurate: Enhancing Physical and Semantic "
 'Visio-Linguistic\\nCompositional Reasoning via Counterfactual '
 'Examples\\nJianrui Zhang*1Mu Cai∗1Tengyang Xie1,2Yong Jae '
 'Lee1\\njzhang2427@wisc.edu, {mucai,tx,yongjaelee}@cs.wisc.edu\\n1University '
 'of Wisconsin–Madison2Microsoft '
 'Research\\nhttps://countercurate.github.io\\nAbstract\\nWe propose '
 'CounterCurate, a framework to\\ncomprehensively improve the '
 'visio-linguistic\\ncompositional reasoning capability for both\\ncontrastive '
 'and generative multimodal models.\\nIn particular, we identify two critical '
 'under-\\nexplored problems: the neglect of physically\\ngrounded reasoning '
 '(counting and position un-\\nderstanding) and the potential of using '
 'highly\\ncapable text and image generation models for\\nsemantic '
 'counterfactual fine-tuning. Our work\\npioneers an ap

In [14]:
SAMPLE_QUESTION = f"""Answer in no less than 4000 characters. 
    - It is of utmost importance to use only the information from the Context to answer the user's question. Be clear, factual, complete, concise. Answer the question and follow the instructions to the best of your ability.You will be provided a research paper and your task is to summarize the research paper into a 5 minute video as follows:
    - Create an outline the key points of the paper
    - Clearly state in your outline why was the research done, what are the technologies that were previously known involved,
    how is the technique or actions performed advancing the field, what are the key metrics that define the success of the work 
    and what are future directions that lie ahead.
    - Do not write any fact which is not present in the paper
    
    - Write a clearly organized and to-the-point outline summary of the following research:,
    - The outline should have 3000 words and objectives should be clearly defined for each section of the paper while preserving the specifics address in the technology used or methods tried that have advanced the particular field.
    - Introduce the research scientists involved and the institutions involved if known.
    - Every single line in the outline should be in complete sentences, talk with dignity and sophistication. 
    - Use phrases such as "Our research presents", "This paper details the", do not use words such as realm, or start the sentence with "In the"
    - Assume the audience is asking why and how about the reasoning and logic of the content. 
    - Use present tense and do not use past tense.
    - Do not use phrases such as "x has been discussed, x has been highlighted", be as specific on the details as possible.
    - Make sure to answer clearly what is the major contribution of this body of work.
    - The outline should answer to the point and in specific detail why was the research done, what are the technologies that were previously known involved,
    how is the technique or actions performed advancing the field, what are the key metrics that define the success of the work 
    and what are future directions that lie ahead.
    
    - After you have produced the outline, next convert each point in the outline to be one or more complete sentences in third person point of view, going into detail especially regarding the technicalities and key concepts of the research. Make sure that it is absolutely clear in specific detail why was the research done, what are the technologies that were previously known involved,
    how is the technique or actions performed advancing the field, what are the key metrics that define the success of the work 
    and what are future directions that lie ahead.
    - Always start by stating the title of the paper as the first few words.
    - First, assume the role of a research scientist who has won accolates for being able to explain expert information to a high-schooler and is giving an overview briefing of a research project.
    - Assume the role of the editor of the best ranking tv production company in the world. 
    - Format into a script but not screenplay to be broadcasted publicly in a 5 minute production of 4000 words for higher education consumption.
    - Introduce yourself to assume the role of a third party and do not assume the time of day, do not say good evening you are not the researcher but you represent
    the researcher in advocating for their work. Provide the narration only, do not format as a screenplay.
    - Spend at least 6 sentences delving deep into the research key findings and evaluation.
    - Do not start a paragraph with "Good day, esteemed viewers."
    
    - Lastly edit the entire script to make sure that it is obviously stated to the video viewer why was the research done, what are the technologies that were previously known involved,
    how is the technique or actions performed advancing the field, what are the key metrics that define the success of the work 
    and what are future directions that lie ahead. Cite the grounding sources.
    """


docs_result = vector_store.similarity_search(SAMPLE_QUESTION)
docs_result

[Document(page_content='If you cannot generate one, report None.All in all, the new description must meet all of\nthese requirements:\n1. The change of attribute must be sufficiently\ndifferent to make the new description inaccurate,\nbut it should also be somewhat related to be chal-\nlenging to an AI model.\n2. Compared to the original description, the new\ndescription must differ in only one attribute. All\nother details must be kept the same.\n3. The new description must mimic the sentence\nstructure of the original description.\n4. The new description must be fluent, logical,\nand grammatically correct.\n5. Carefully look at the image, and give negative\ncaptions that are reasonable given the objects’ po-\nsition, size, and relationship to the overall setting.\n6. Pose challenging(difficult enough) negative\ncaptions so that a large multimodal text generation\nmodel should struggle to distinguish the original\ncaption v.s. negative caption.\nHere are some examples whose output for

In [9]:
top_k = 16384
M = 16
num_shot_answers = 9999999
efConstruction = M * 2

# Return top k results with HNSW index.
search_params = {"ef": efConstruction}

# Define output fields to return.
output_fields = ["source", "chunk"]

metadata_fields = [f for f in output_fields if f != 'chunk']

SAMPLE_QUESTION = "What is the title of the paper"

# Search Milvus collection
retrieved_top_k = milvus_client.search(
    COLLECTION_NAME,
    data=encoder.encode([SAMPLE_QUESTION]),
    search_params=search_params,
    output_fields=output_fields,
    limit=top_k,
    consistency_level="Eventually"
)

distances = []
context = []
context_metadata = []
i = 1
for r in retrieved_top_k[0]:
    distances.append(r['distance'])
    if i <= num_shot_answers:
        if len(metadata_fields) > 0:
            metadata = {}
            for field in metadata_fields:
                metadata[field] = r['entity'][field]
            context_metadata.append(metadata)
        context.append(r['entity']['chunk'])
    i += 1

# Assemble formatted results in a zipped list.
formatted_results = list(zip(distances, context, context_metadata))
# Return all the things for convenience.
# formatted_results
# , context, context_metadata
context

NameError: name 'milvus_client' is not defined

In [25]:
docs[-1].page_content.replace('\n',' ')

'Model Text Score CLIP (ViT-B/32) (Radford et al., 2021) 25.25 UNITER base(Chen et al., 2020) 32.25 UNITER large(Chen et al., 2020) 38.00 VinVL (Zhang et al., 2021) 37.75 BLIP2 (Zhang et al., 2021) 44.00 PALI (Chen et al., 2023) 46.50 LLaV A-1.5 (Liu et al., 2023a) 65.85 LLaV A-1.5 + CounterCurate 69.15 (+3.30) Table 7: Our fine-tuned model of LLaV A-1.5 with Flickr30k-Attributes shows significant improvements on the difficult visio-linguistic reasoning dataset Winoground. improving models’ counting abilities. Model Setting Accuracy Vanilla 57.50 + CounterCurate (No Neg) 60.85 + CounterCurate (No Group) 65.70 + CounterCurate 68.51 Table 8: More ablations on the CLIP model trained with Flickr30k-Counting. the score without any negatives and the score without grouping. G More Results on SugarCrepe Here we show the performance improvements for every sub-category of SugarCrepe in Table 6. Over- all, CounterCurate shows clear performance gain over the two base models, CLIP (Radford et al., 

In [26]:
docs[-1].metadata

{'source': 'pdfs/2402_13254/2402_13254.pdf', 'page': 12}

In [28]:
docs[0].schema

<bound method BaseModel.schema of <class 'langchain_core.documents.base.Document'>>

In [4]:
def process_url(url):

    COLLECTION_NAME = "MilvusDocs"
    HF_EOS_TOKEN_LENGTH = 3
    LLM_NAME = "gpt-3.5-turbo"
    TEMPERATURE = 0.1
    RANDOM_SEED = 415
    M=16
    
    folder_pdfs = "pdfs"
    folder_images = "images"
    folder_final_videos = "final_videos"
    folder_audio = "audio_voiceovers"
    folder_transcripts = "transcripts"
    
    # Call the function to create the folder
    create_folder(folder_pdfs)
    create_folder(folder_images)
    create_folder(folder_audio)
    create_folder(folder_final_videos)
    create_folder(folder_transcripts)
    
    # Download and save a PDF file from an arXiv.org URL into local directory.
    arxiv_id = download_and_save_pdf(url, folder_pdfs)
    
    arxiv_name = arxiv_id.replace(".", "_")
    pdf_path = os.path.join(folder_pdfs, arxiv_name)
    pdf_file_path = os.path.join(pdf_path, f"{arxiv_name}.pdf")
    image_folder = f"{folder_images}/{arxiv_name}_pngs" 
    mp3_path = f"{folder_audio}/output_{arxiv_name}.mp3"
    output_path = f"{folder_final_videos}/{arxiv_name}.mp4" 
    
    # Download open source embedding model "WhereIsAI/UAE-Large-V1" via Huggingface's Sentence Transformers
    encoder, EMBEDDING_DIM, MAX_SEQ_LENGTH = download_and_initialize_embedding_model()
    
    # Create a no-schema milvus collection and define the database index
    milvus_client = create_milvus_collection(COLLECTION_NAME, EMBEDDING_DIM, M, my_uri)
    
    # Load PDF's into a PDF object using LangChain's PyPDFLoader
    loader = PyPDFLoader(f"{pdf_path}/{arxiv_name}.pdf")
    docs = loader.load()
    
    # Cut text from PDF's into chunks using LangChain's RecursiveCharacterTextSplitter
    chunks = split_documents_to_chunks(docs, MAX_SEQ_LENGTH, HF_EOS_TOKEN_LENGTH)
    
    # Insert text chunks into Milvus vector database using index type HNSW Indexing and Cosine Distance
    insert_chunks_into_milvus(chunks, COLLECTION_NAME, encoder, milvus_client, MAX_SEQ_LENGTH, HF_EOS_TOKEN_LENGTH)
    
    # Generate transcript using OpenAI based on the cosine distance search of the document then using gpt-3.5-turbo's chat completions
    text_for_TTS_list = search_and_generate_response(
        milvus_client, # Running session via docker container on port http://localhost:19531
        encoder, # Sentence Transformer WhereIsAI/UAE-Large-V1
        COLLECTION_NAME, # MilvusDocs by default
        LLM_NAME,
        TEMPERATURE,
        RANDOM_SEED,
        M)
    
    # Save LLM-generated voiceover script to directory
    save_transcript(text_for_TTS_list, folder_transcripts, arxiv_name)
    print("transcript saved")
    
    # # convert text to speech with Elevenlabs
    # audio_path = text_to_speech(text_for_TTS_list[0], arxiv_name, folder_audio)
    
    # # convert each pdf to a png
    # convert_pdf_to_png(folder_images, pdf_file_path, arxiv_name)
    
    # # cut png's in half
    # cut_pngs_in_half(image_folder)

    # move_uncropped_files(image_folder)
    
    # # combine png's with audio to generate an mp4
    # create_video(mp3_path, image_folder, output_path)
    # milvus_client.drop_collection(collection_name=COLLECTION_NAME)
    # return folder_final_videos

In [5]:
# paper_list = ["2402.13254", "2403.07874", "2403.07872", "2403.07870","2403.07869"]
paper_list = ["2402.13254","2308.08079"]

In [6]:
for paper in paper_list:

    url = f"https://arxiv.org/pdf/{paper}.pdf"
    process_url(url)

The folder 'pdfs' already exists.
The folder 'images' already exists.
The folder 'audio_voiceovers' already exists.
The folder 'final_videos' already exists.
The folder 'transcripts' already exists.
The folder 'pdfs/2402_13254' already exists.

PDF downloaded and saved as 2402_13254.pdf

device: cpu


No sentence-transformers model found with name WhereIsAI/UAE-Large-V1. Creating a new one with MEAN pooling.



Datatype of SentenceTransformer encoded object<class 'sentence_transformers.SentenceTransformer.SentenceTransformer'>


What the encoder object looks like: SentenceTransformer(
  (0): Transformer({'max_seq_length': 512, 'do_lower_case': False}) with Transformer model: BertModel 
  (1): Pooling({'word_embedding_dimension': 1024, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
)


model_name: WhereIsAI/UAE-Large-V1

EMBEDDING_DIM: 1024

MAX_SEQ_LENGTH: 512

Successfully created collection: `MilvusDocs`
{'collection_name': 'MilvusDocs', 'auto_id': True, 'num_shards': 1, 'description': '', 'fields': [{'field_id': 100, 'name': 'id', 'description': '', 'type': 5, 'params': {}, 'element_type': 0, 'auto_id': True, 'is_primary': True}, {'field_id': 101, 'name': 'vector', 'description': '', 'type': 1

100%|█████████████████████████████████████████████| 1/1 [00:00<00:00, 20.92it/s]

Finished inserting entities






Number of chunks inserted into Milvus database: 122 with chunk id starting at number: 448362022341446549



huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


('\n'
 'Answer 1: Title: "Llama: Open and efficient foundation language models"\n'
 '\n'
 'Introduction:\n'
 '- The research paper titled "Llama: Open and efficient foundation language '
 'models" is authored by Marie-Anne Martinet, Timothée Lachaux, Baptiste '
 'Rozière, Naman Goyal, Eric Hambro, Faisal Azhar, and others in 2023.\n'
 '- The paper aims to introduce Llama, a new language model that is open and '
 'efficient, contributing to the advancement of language models in the field '
 'of natural language processing.\n'
 '\n'
 'Key Contributions:\n'
 '1. Introduction of Llama Language Model:\n'
 '   - The paper introduces the Llama language model, which is designed to be '
 'open and efficient, addressing the need for improved language models in the '
 'field of natural language processing.\n'
 '   - Llama is developed to be a foundational language model that can be '
 'utilized for various NLP tasks, offering a new approach to language '
 'modeling.\n'
 '\n'
 '2. Efficiency and P

No sentence-transformers model found with name WhereIsAI/UAE-Large-V1. Creating a new one with MEAN pooling.



PDF downloaded and saved as 2308_08079.pdf

device: cpu

Datatype of SentenceTransformer encoded object<class 'sentence_transformers.SentenceTransformer.SentenceTransformer'>


What the encoder object looks like: SentenceTransformer(
  (0): Transformer({'max_seq_length': 512, 'do_lower_case': False}) with Transformer model: BertModel 
  (1): Pooling({'word_embedding_dimension': 1024, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
)


model_name: WhereIsAI/UAE-Large-V1

EMBEDDING_DIM: 1024

MAX_SEQ_LENGTH: 512

Collection had previously been created, dropping previous collection to initialize anew: `MilvusDocs`

Successfully created collection: `MilvusDocs`
{'collection_name': 'MilvusDocs', 'auto_id': True, 'num_shards': 1, 'description': '', 'fields': [{'field_id': 100, 'name': 'id', 'desc

100%|█████████████████████████████████████████████| 1/1 [00:00<00:00, 31.48it/s]

Finished inserting entities






Number of chunks inserted into Milvus database: 156 with chunk id starting at number: 448362022341446672

('\n'
 'Answer 1: Title: Advancing Lower-Dimensional Space Stabilization through '
 'Rigid Transformations: A Novel Workflow\n'
 '\n'
 'Introduction:\n'
 '- Introduce the research scientists involved: The research conducted by a '
 'team of scientists led by the corresponding author, whose workflow is '
 'publicly available on GitHub.\n'
 '- Briefly mention the institutions involved: The research was conducted '
 'independently by the team of scientists.\n'
 '\n'
 'Key Contributions:\n'
 '1. Development of a novel workflow: \n'
 '   - The research presents a novel workflow for stabilizing '
 'lower-dimensional spaces through rigid transformations.\n'
 '   - The workflow is designed to address stability issues in '
 'lower-dimensional spaces and ensure Euclidean transformation invariance.\n'
 '\n'
 '2. Application of rigid transformations:\n'
 '   - The research applies rigid trans

In [None]:
# Gradio interface
iface = gr.Interface(
    fn=process_url,
    inputs=gr.Textbox(placeholder="Enter arXiv PDF URL"),
    outputs=gr.Video(),
    live=True,
    theme="sky",
    flagging_options=None,  # Disable the flag button
    title="Arxiv2Video",
)

# Add a submit button
submit_button = gr.Button()
iface.launch(share=True)


In [None]:
iface.close()