In [4]:
!pip install mediawikiapi duckduckgo-search html2text urllib3 tqdm
import json
import numpy as np
import glob
import os
import re
import html2text
import urllib3

from duckduckgo_search import DDGS
from pptx import Presentation
from pptx.dml.color import RGBColor
from pptx.util import Cm, Pt, Inches
from pptx.enum.text import MSO_ANCHOR, MSO_AUTO_SIZE
from h2ogpte import H2OGPTE
from mediawikiapi import MediaWikiAPI
from tqdm import tqdm
api = os.environ.get('H2OGPTE_API_KEY')





In [5]:
# create folder for searched articles if it doesnt exist
def create_folder(folder_name):
    if not os.path.exists(folder_name):
        os.makedirs(folder_name)

create_folder('articles')
create_folder('presentations')

In [6]:
client = H2OGPTE(
    address="https://h2ogpte.genai.h2o.ai",
    api_key=api
)

user_query = 'Create a presentation on dynamic programming'

In [7]:
def clear_all_documents(client):
    docs = client.list_recent_documents(offset=0, limit = 1000)
    client.delete_documents(list(map(lambda x: x.id, docs)))
    assert client.count_documents() == 0

def clear_all_collections(client):
    col = client.list_recent_collections(0, 1000)
    client.delete_collections(list(map(lambda x: x.id, col)))
    assert client.count_collections() == 0


def trim(lst, keep=30):
    '''
    Trims a list. This function was originally used to permute before trimming, but\
    now that functionality is removed, so it appears rather redundant.
    '''
    _ = lst.copy()
    
    return _[0:keep]

def format_site_description(ls, start_from = 0):
    i = start_from
    _ = list(ls).copy()
    for element in ls:
        element = f"{i}. {element}"
        _[i] = element
        i += 1
    return "\n\n".join(_)


def try_and_parse(user_query, function, failed=0, markdown=False):
    '''
    Accepts a function and user_query, an input. Evaluates function(user_query) and 
    converts string output (usually a reply from an llm) into a json value. Use markdown=True
    if the json value is contained within a code chunk.
    '''
    chosen = function(user_query)
    try:
        if not markdown:
            topics = json.loads(chosen.content)
        else:
            print(chosen.content)
            pattern = r'^```(?:\w+)?\s*\n(.*?)(?=^```)```'
            result = re.findall(pattern, chosen.content, re.DOTALL | re.MULTILINE)[0].strip() 
            #print(result)
            topics = json.loads(result)
            
        return topics
    except Exception as e:
        failed+=1
        print(failed)
        print(e)# CHANGE TO LOGGING STATEMENT
        return try_and_parse(user_query, function, failed=failed, markdown=markdown)


## Step 1. What comes to mind when you think about xyz?

In [8]:
search = lambda user_query: client.answer_question(
    question=user_query,
    system_prompt="""You are an assistant whose task is to perform searches on the internet on a specific topic.\
    The user is interested to create a presentation about a topic of interest.\
    Think about what to do, then reply with your thought process and at least one corresponding google query as an array in JSON format,\
    but limit yourself to 5 queries.\
    The JSON array should be contained in a code chunk. Keep strictly to the format in the example below.
    Example:
    Since the presentation is about milk, I will probably want to search up the different types of milk, ...
    ```json
    ["Milk", "Oat Milk", "Plant-based milks", "Cow Milk", "Goat Milk"]
    ```
    """,
    llm='mistralai/Mixtral-8x7B-Instruct-v0.1'
)
searched = try_and_parse(user_query, search, markdown=True)
searched

Sure, I can help you with that! Here's my thought process and some potential Google queries to get started:

1. First, I would want to understand the basics of dynamic programming. A good starting point might be to search for "dynamic programming definition" or "what is dynamic programming". This could lead me to resources that explain the concept in a clear and concise way.
2. Once I have a solid understanding of dynamic programming, I would want to explore some common use cases. Some potential queries here might include "dynamic programming examples" or "common dynamic programming problems". These searches could help me identify specific scenarios where dynamic programming is particularly useful, such as optimization problems or problems with overlapping subproblems.
3. After identifying some common use cases, I would want to dive deeper into the mechanics of dynamic programming. This might involve searching for "dynamic programming algorithm" or "how does dynamic programming work". 

['dynamic programming definition',
 'dynamic programming examples',
 'dynamic programming algorithm',
 'dynamic programming tips',
 'dynamic programming in industry']

## Step 2. Search Wikipedia 

In [9]:
import random
wiki = MediaWikiAPI()
ddgs = DDGS()

articles = list(
    set(
        
        [i for j in [wiki.search(cat, results=5) for cat in searched] for i in j]
        
    )
) # remove duplicates with set(list())




In [10]:
articles

['Dynamic pricing',
 'Type system',
 'Optimal binary search tree',
 'Stochastic dynamic programming',
 'Edit distance',
 'IP address',
 'Static program analysis',
 'Dynamic time warping',
 'Static web page',
 'Polymorphism (computer science)',
 'Dynamic simulation',
 'Exploratory data analysis',
 'Viterbi algorithm',
 'Comparison of programming languages by type system',
 'Dynamic programming language',
 'Dynamic programming',
 'Python (programming language)',
 'Monkey patch']

In [11]:
random.shuffle(articles) # random shuffle
snippet = trim(
    list(map(lambda x: wiki.summary(x, auto_suggest=False, sentences=1), articles))
)

In [12]:


snippet_text = format_site_description(snippet)

print(snippet_text)

0. Dynamic simulation (or dynamic system simulation) is the use of a computer program to model the time-varying behavior of a dynamical system.

1. In computer science,  an optimal binary search tree (Optimal BST), sometimes called a weight-balanced binary tree, is a binary search tree which provides the smallest possible search time (or expected search time) for a given sequence of accesses (or access probabilities).

2. In computational linguistics and computer science, edit distance is a string metric, i.e.

3. The Viterbi algorithm is a dynamic programming algorithm for obtaining the maximum a posteriori probability estimate of the most likely sequence of hidden states—called the Viterbi path—that results in a sequence of observed events.

4. In programming language theory and type theory, polymorphism is the use of a single symbol to represent multiple different types.In object-oriented programming, polymorphism is the provision of a single interface to entities of different types

## Step 2b. Search the Internet (DDG)

In [13]:
ddg_results = list(
    set(
        [tuple(i.values()) for j in [ddgs.text(cat, max_results=3) for cat in searched] for i in j]
        )
    )
random.shuffle(ddg_results)
ddg_results = trim(ddg_results, keep=15)
websites, links, body = zip(
    *ddg_results
)

In [14]:
ddg_snippet_text = format_site_description(list(zip(links, body)))
print(ddg_snippet_text)

0. ('https://en.wikipedia.org/wiki/Dynamic_programming', 'Dynamic programming is widely used in bioinformatics for tasks such as sequence alignment, protein folding, RNA structure prediction and protein-DNA binding. The first dynamic programming algorithms for protein-DNA binding were developed in the 1970s independently by Charles DeLisi in US and Georgii Gurskii and Alexander Zasedatelev in USSR.')

1. ('https://blog.amphy.com/what-is-dynamic-programming-the-basics-and-its-applications/', 'Dynamic programming means using an algorithmic programming strategy that breaks down problems into sub-problems, then solving each individually and saving the results in a data structure such as an array. This allows future instances of the same problem to be solved more quickly, as the data structure already contains solutions to prior problems.')

2. ('https://simpleprogrammer.com/guide-dynamic-programming/', 'Find the First Solution. The first step to solving any dynamic programming problem usin

## Step 3. "Brainstorm" and filter Wikipedia/DDG searches for useful ones
I treat them separately. Also: Chain-of-thought prompting
https://www.promptingguide.ai/techniques/cot

In [15]:
choose_topics = lambda user_query: client.answer_question(
        question=f"""{user_query}
        Referring to the list of wikipedia entries you have been provided, decide on which topics are useful for the presentation. For each entry, explain, in a few words,\
        whether you think an entry is useful or not and why.
        After that, generate a code chunk. Within the code chunk is an array of integers in JSON, and these integers correspond to the topics you think are useful.
        For each line, think about its usefulness/relevance and make some short deductions, then conclude whether it is useful or not useful.
        Please keep strictly to the format in the following example:
        0. - Sugar irrelevant to Jesus Christ, therefore not useful.
        1. - Christianity is about the topic of Jesus Christ, thus Useful.
        2. - Protestants follow Jesus Christ, therefore useful.
        ```
        [1, 2]
        ```
        """,
        system_prompt=f"""You are an assistant whose task is to help a user in creating a presentation.\
        Here are a list of wikipedia entries, starting from the 0th entry, that may or may not be related to the topic at hand:
        {snippet_text}
        """,
        llm='gpt-4-1106-preview' # only instance of gpt4 usage. need this for the big brain.
    )

topics = try_and_parse(user_query, choose_topics, markdown=True)
print(topics)

0. - Dynamic simulation models time-varying behavior but does not specifically pertain to dynamic programming, therefore not useful.
1. - Optimal binary search trees involve a form of optimization, but not directly related to the concept of dynamic programming, therefore not useful.
2. - Edit distance is a string metric and does not relate to dynamic programming, therefore not useful.
3. - The Viterbi algorithm uses dynamic programming for sequence estimation, therefore useful.
4. - Polymorphism in programming is unrelated to dynamic programming, therefore not useful.
5. - Type systems in programming do not directly relate to dynamic programming, therefore not useful.
6. - Monkey patching is a technique for dynamic code changes at runtime, not related to dynamic programming, therefore not useful.
7. - Dynamic pricing is a business strategy and does not relate to dynamic programming, therefore not useful.
8. - Static web pages are unrelated to the algorithmic concept of dynamic programm

In [16]:
articles

['Dynamic simulation',
 'Optimal binary search tree',
 'Edit distance',
 'Viterbi algorithm',
 'Polymorphism (computer science)',
 'Type system',
 'Monkey patch',
 'Dynamic pricing',
 'Static web page',
 'Python (programming language)',
 'Dynamic programming language',
 'IP address',
 'Static program analysis',
 'Dynamic programming',
 'Exploratory data analysis',
 'Stochastic dynamic programming',
 'Dynamic time warping',
 'Comparison of programming languages by type system']

In [17]:
chosen_articles = [articles[i] for i in topics]
chosen_snippets = [snippet[i] for i in topics]
chosen_articles

['Viterbi algorithm',
 'Dynamic programming',
 'Stochastic dynamic programming',
 'Dynamic time warping']

In [18]:
chosen_full_articles = list(map(lambda x: wiki.page(x, auto_suggest=False).content, chosen_articles))
chosen_articles_images = list(map(lambda x: wiki.page(x, auto_suggest=False).images, chosen_articles))


## Step 3b. Filter DDG Searches

In [19]:
choose_ddg_topics = lambda user_query: client.answer_question(
        question=f"""{user_query}
        Referring to the list of websites you have been provided, decide on which topics are useful for the presentation. For each entry, explain, in a few words,\
        whether you think an entry is useful or not and why.
        After that, generate a code chunk. Within the code chunk is an array of integers in JSON, and these integers correspond to the topics you think are useful\
        and are NOT from wikipedia.
        For each line, think about its usefulness/relevance and make some short deductions, then conclude whether it is useful or not useful.
        Please keep strictly to the format in the following example:
        0. - Sugar irrelevant to Jesus Christ, therefore not useful.
        1. - Christianity is about the topic of Jesus Christ, thus Useful.
        2. - Protestants follow Jesus Christ, therefore useful.
        3. - Website is a link to a youtube video, which is not suitable for a presentation, thus not useful.
        4. - This is relevant, but it is a Wikipedia link, so it is omitted.
        ```
        [1, 2]
        ```
        """,
        system_prompt=f"""You are an assistant whose task is to help a user in creating a presentation.\
        Here are a list of website body text, starting from the 0th entry, that may or may not be related to the topic at hand:
        {ddg_snippet_text}
        """,
        llm='gpt-4-1106-preview' # only instance of gpt4 usage. need this for the big brain.
    )
ddg_topics = try_and_parse(user_query, choose_ddg_topics, markdown=True)

ddg_topics

0. - This entry provides specific applications of dynamic programming in bioinformatics, which could be useful for an advanced audience but is from Wikipedia, thus omitted.
1. - Explains the basics of dynamic programming and its applications, which is essential for an introductory presentation, therefore useful.
2. - Offers a step-by-step approach to solving dynamic programming problems, which is practical for understanding the process, thus useful.
3. - Provides a detailed explanation of dynamic programming, but it is a duplicate of entry 5 from the same source, therefore not useful.
4. - Discusses the conceptual approach of dynamic programming, which is important for grasping the fundamental idea, thus useful.
5. - Similar to entry 3, it explains dynamic programming in detail, but as it is a duplicate, it is not useful.
6. - Introduces common terms in dynamic programming, which is helpful for beginners to understand the jargon, therefore useful.
7. - Offers a comparison of top-down a

[1, 2, 4, 6, 7, 8, 9, 11]

In [20]:
chosen_websites, chosen_links, chosen_body = ([websites[i] for i in ddg_topics], [links[i] for i in ddg_topics], [body[i] for i in ddg_topics])

## Step 4: Store useful ideas in VectorDB (h2oai)

In [21]:

    
# now its time to store them for RAG
import os


collection_id = client.create_collection(
    name='Articles',
    description='Articles for presentation',
)

pages = dict(zip(chosen_articles, chosen_full_articles))



In [22]:

to_ingest = []
for title, content in (pages.items()):
    title = re.sub('[\W_]+', '', title)
    name = f"./articles/{title}.txt"
    f = open(name, "w+", encoding="utf-8")
    f.write(content)
    f.close() # dont know why i gotta do this, i think it has to be in binary
    f = open(name, 'rb')
    to_ingest.append(client.upload(name, f))
    
    f.close() 

client.ingest_uploads(collection_id, to_ingest)  

Exception: HTTP error: 403 Forbidden

In [20]:


# http = urllib3.PoolManager()
# to_ingest = [] # CLEAR LIST
# for link in tqdm(chosen_links):
#     page = http.request('GET', link)
#     html_content = page.data.decode('utf-8')
#     rendered_content = html2text.html2text(html_content)
    
#     sanitised_fname = re.sub(r'[\W_]+', '_', link)
#     path = f"./articles/{sanitised_fname}.txt"
#     file = open(path, 'w', encoding='utf-8')
#     file.write(rendered_content)
#     file.close()

#     f = open(path, 'rb')
#     to_ingest.append(client.upload(path, f)) 
#     f.close()

# client.ingest_uploads(collection_id, to_ingest)  


import urllib3
import html2text
import re
from tqdm import tqdm
from bs4 import BeautifulSoup



http = urllib3.PoolManager()
h = html2text.HTML2Text()
h.ignore_links = True  # Optionally ignore links in the output

for link in tqdm(chosen_links):
    try:
        # Make a GET request to the URL
        page = http.request('GET', link, headers=\
                            {
                                "User-Agent": 
                                "Mozilla/5.0 (iPhone; CPU iPhone OS 17_3_1 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) CriOS/122.0.6261.89 Mobile/15E148 Safari/604.1"
                            }
                           )
        
        # Decode the bytes object to a string
        html_content = page.data.decode('utf-8')
        
        # Parse the HTML content with BeautifulSoup
        soup = BeautifulSoup(html_content, 'html.parser')
        
        # Extract the body content
        body_content = soup.find('body')
        
        # Convert the body content to plain text
        rendered_content = h.handle(str(body_content))
        
        # Sanitize the filename
        sanitised_fname = re.sub(r'[\W_]+', '_', link)
        path = f"./articles/{sanitised_fname}.txt"
        
        # Write the rendered content to the file
        with open(path, 'w', encoding='utf-8') as file:
            file.write(rendered_content)

        with open(path, 'rb') as f:
            to_ingest.append(client.upload(path, f)) 
        

        
    except urllib3.exceptions.HTTPError as errh:
        print(f"HTTP Error: {errh}")
    except urllib3.exceptions.ConnectionError as errc:
        print(f"Error Connecting: {errc}")
    except urllib3.exceptions.TimeoutError as errt:
        print(f"Timeout Error: {errt}")
    except Exception as err:
        print(f"An error occurred: {err}")

client.ingest_uploads(collection_id, to_ingest)

100%|██████████████████████████████████████████████████████████████████████████████████| 10/10 [00:44<00:00,  4.41s/it]


Job(id='fb11990e-fe04-4018-97c0-44b751e3416a', passed=0.5, failed=0.5, progress=1.0, completed=True, canceled=False, date=datetime.datetime(2024, 2, 29, 11, 45, 1, tzinfo=TzInfo(UTC)), kind=<JobKind.IngestUploadsJob: 'IngestUploadsJob'>, statuses=[JobStatus(id='f5706ac2e39d4bba8eb5c8165393a3e5', status='Collecting files...')], errors=['Job crashed: S3 operation failed; code: NoSuchKey, message: Object does not exist, resource: /h2ogpte-web-uploads/e6af1c36-166f-4488-ab30-321fcdf46f27/9c10078f-36e1-478f-8599-83535ade387e, request_id: 17B852CD9FD9FC1C, host_id: 292fc9e0c54138519d4cedfc5e6f652e396c2bf1612c53cfc4cd3bddf4b165a9, bucket_name: h2ogpte-web-uploads, object_name: e6af1c36-166f-4488-ab30-321fcdf46f27/9c10078f-36e1-478f-8599-83535ade387e'], last_update_date=datetime.datetime(2024, 2, 29, 11, 45, 1, tzinfo=TzInfo(UTC)), duration='0s')

In [21]:
# delete
files_dir = glob.glob('./articles/*.txt')
for f in files_dir:
    os.remove(f)

## Step 5: Plan sections for slide
This is to ensure the entire presentation is a coherent one with a flow/narrative, instead of many disjoint/overlapping generations.
Again, chain of thought prompting is very heavily incorporated

In [22]:
decide_sections = lambda user_query: client.answer_question(
        question=f"""{user_query}
        Please plan the presentation by doing the following:
        1. Explain how you would design the presentation slides such that the presentation will flow well.\
        Remember that each slide must contain something different, and content should not overlap.
        2. Think of a good title for the presentation.
        3. Create a code chunk. Inside that code chunk, generate a JSON array consisting of appropriate slide titles starting from the first slide to the last slide, \
        remembering your answer to point 2. Include the title slide, which is the title for the presentation.

        Below is an example reply. Please adhere strictly to the format in the example below and remember to output the JSON array in a code chunk:  
        1. I would introduce the Transformers franchise and provide general information about its history to ease my viewers into the subject. \
        Then, I will think about subtopics, such as the Transformers films, Transformers characters and Transformers in comics, using the wikipedia entry summaries\
        I have been provided. 
            * For my first subtopic, the Transformers films, I would create two additional slides to expand on Bumblebee (2018) and Revenge of the Fallen (2009)\
            as these are popular films within the franchise. I will order the films chronologically.
            * For my second subtopic on Transformers characters, I will have a slide on the cast of the film. 
            * For my next subtopic...
        2. I think a good title for this presentation is "Transformers: An Overview".
        3. Here is the json array of slide titles:
        ```json
        [
        "Transformers: An Overview", 
        "Introduction to Transformers", 
        "Transformers in Film",
        "Transformers: Revenge of the Fallen (2009)", 
        "Bumblebee (2018)", 
        "Characters in the Transformers Universe", 
        "Transformers in comics",
        "Conclusion"
        ]
        ```
        """,
        system_prompt=f"""You are an assistant whose task is to help a user in creating a presentation.\
        Below is a list of wikipedia articles that are selected for the presentation.\
        You will be asked to come up with slide titles for the presentation. Each line is a 1-sentence summary of a wikipedia page.\
        The number of slides should depend on the amount of information/wikipedia entry articles and websites available.
        Articles:
        {chosen_articles}
        Chosen websites:
        {format_site_description(list(zip(chosen_websites, chosen_links)))}
        """,
        llm='mistralai/Mixtral-8x7B-Instruct-v0.1' 
    )

all_sections = try_and_parse(user_query, decide_sections, markdown=True)

1. For the presentation on dynamic programming, I would start with an introductory slide that explains what dynamic programming is and its importance in computer science. Then, I would create slides for each of the provided articles, starting with a slide on dynamic programming, followed by matrix chain multiplication, and ending with stochastic dynamic programming. I would also include slides on the differences between dynamic programming and greedy algorithms, as well as examples and common problems in dynamic programming.
2. A good title for the presentation could be "Dynamic Programming: Techniques and Applications".
3. Here is the JSON array of slide titles:
```json
[
"Dynamic Programming: Techniques and Applications",
"Introduction to Dynamic Programming",
"Matrix Chain Multiplication",
"Stochastic Dynamic Programming",
"Dynamic Programming vs Greedy Algorithms",
"Dynamic Programming Examples and Common Problems",
"Conclusion"
]
```
Regarding the chosen websites, I would include 

In [23]:
sections = all_sections[1:]

sections

['Introduction to Dynamic Programming',
 'Matrix Chain Multiplication',
 'Stochastic Dynamic Programming',
 'Dynamic Programming vs Greedy Algorithms',
 'Dynamic Programming Examples and Common Problems',
 'Conclusion']

In [24]:
del client
client = H2OGPTE(
    address="https://h2ogpte.genai.h2o.ai",
    api_key=api
) # does this reset client?

chat_session_id = client.create_chat_session(collection_id)
chat_session_id

'22d29a4e-1535-409c-a4c5-ec8656ee0692'

#### Ref for slide types:  
0. title and subtitle 
1. title and content 
2. section header 
3. two content 
4. Comparison 
5. Title only  
6. Blank 
7. Content with caption 
8. Pic with caption 


## Step 6: Generate using RAG
LLM chooses colour with chain-of-thought prompting again.

In [25]:
prs = Presentation()
prs.slide_width = Inches(16)
prs.slide_height = Inches(9)
title_slide = prs.slides.add_slide(prs.slide_layouts[0]) 
decide_slide_format = lambda user_query: client.answer_question(
        question=f"""{user_query} Think of a good background colour, in RGB format,\
        for the slides and a good colour, also in RGB format, for the\
        text. Typically, if the text colour is bright (for example RGB [255, 255, 255] is white), then the background colour should be dark
        (RGB [0, 0, 100] is dark blue). Conversely, if the text colour is dark (for example RGB [0, 0, 0] is black), the background colour should be bright\
        . You are free to choose any text and background colour, \
        as long as you follow these rules. Please do not assign grey-scale colours for the text and background (like RGB [50, 50, 50]), as much as possible.

        Explain clearly why you chose the background and text colours. Then, generate a code chunk. Within the code chunk,\
        provide a JSON array containing two colours. Do not say anything else. Adhere strictly to the example reply below:
        I chose blue RGB [0, 35, 140] for the background color and light yellow RGB [255, 234, 0] for the font color. The contrast makes it easy to read.\
        Furthermore, the colours blue and yellow are associated with the Pokémon Franchise.
        ```
        [{{"background": [0, 0, 140]}}, {{"text": [255, 234, 0]}}]
        ```
        """,
    
        system_prompt=f"""You are an assistant whose task is to help a user in creating a presentation.\
        Here are a list of wikipedia entry summaries that are selected for the presentation:
        {chosen_snippets}
        """,
        llm='mistralai/Mixtral-8x7B-Instruct-v0.1' 
)

format = try_and_parse(user_query, decide_slide_format, markdown=True)

I chose a light grey background color RGB [230, 230, 230] and a dark purple text color RGB [50, 0, 80]. The light grey background provides a subtle contrast to make the text easy to read, while the dark purple text color adds a touch of elegance and professionalism to the slides.
```
[
  {"background": [230, 230, 230]},
  {"text": [50, 0, 80]}
]
```
Slide 1: Introduction to Dynamic Programming

Dynamic programming is both a mathematical optimization method and an algorithmic paradigm. It is used to solve complex problems by breaking them down into smaller, more manageable subproblems. The solutions to these subproblems are then combined to find the optimal solution to the original problem.

Slide 2: Matrix Chain Multiplication

Matrix chain multiplication (or the matrix chain ordering problem) is an optimization problem concerning the most efficient way to multiply a given sequence of matrices. This problem can be solved using dynamic programming by calculating the optimal order of mul

In [26]:
format

[{'background': [230, 230, 230]}, {'text': [50, 0, 80]}]

In [27]:
background = RGBColor(*tuple(list(format[0].values())[0])) 
font = RGBColor(*tuple(list(format[1].values())[0])) 
fill = title_slide.background.fill
fill.solid()
fill.fore_color.rgb = background


title_slide.shapes.title.text = all_sections[0]
title_slide.shapes.title.text_frame.paragraphs[0].font.color.rgb =  font
title_slide.shapes.title.text_frame.paragraphs[0].font.name = 'Montserrat'
title_slide.shapes.title.text_frame.paragraphs[0].font.bold = True

first_shape =  title_slide.shapes[0]
first_shape.left, first_shape.top, first_shape.width, first_shape.height = (prs.slide_width - Inches(12))//2, \
(prs.slide_height-first_shape.height)//2 - Inches(1),\
Inches(12),\
Inches(2)

In [28]:

with client.connect(chat_session_id) as session:

    for section in tqdm(sections):
        slide = prs.slides.add_slide(prs.slide_layouts[1])
        fill = slide.background.fill
        fill.solid()
        fill.fore_color.rgb = background

        
        contents = slide.placeholders[1]
        contents.text_frame.word_wrap = True

        title = slide.shapes.title
        title.text = section
        title.text_frame.paragraphs[0].font.color.rgb = font
        title.text_frame.paragraphs[0].font.size = Pt(32)
        title.text_frame.paragraphs[0].font.name = 'Karla'
       
       
        content = session.query(
            
            message = section,
            system_prompt=f"""You are an assistant whose task is to help a user in creating a presentation. \
            The slides of the presentation are as follows: {sections}
            You are now tasked with generating the content of one slide, which will be provided by the user.
            You are designing one slide, so do not generate long paragraphs. Instead, summarise your most important points\
            in up to 3 sentences each, and do not generate more than 15 lines of text.
            """,
            pre_prompt_query="You have been provided with the following information, which may be useful in your task.",
            prompt_query="""Decide if the information is relevant, and use it if needed.\
            Generate the content required in the slide provided by the user. You only need to generate the contents of the slide, not the title\
            or anything else. Remember, you are designing one slide, so do not generate long paragraphs. Instead, summarise your most important points\
            in up to 3 sentences each, and do not generate more than 15 lines of text. Between each point, leave a line.

            Here is an example. Please adhere to this example strictly:
            This is the first point I am trying to make. I will explain this first point in the second sentence.
            
            This is the second point I am trying to make. This is the second sentence for the second point. \
            I need a third sentence to fully explain this point. Notice that I will leave a line for the next point.

            This is the third point.
            """,
            llm="mistralai/Mixtral-8x7B-Instruct-v0.1",
            rag_config={
                "rag_type": "hyde1",
            },
        ).content

        contents.text = content
        
        for paragraph in contents.text_frame.paragraphs:
            paragraph.space_after = 1
            paragraph.space_before = 1
            # paragraph.level = 0
           
            paragraph.font.size = Pt(22)  
            paragraph.font.color.rgb = font
            paragraph.font.name = 'Karla'

        contents.text_frame.auto_size = MSO_AUTO_SIZE.SHAPE_TO_FIT_TEXT
        shapes = slide.shapes
        new_width = Inches(14)
        new_height = Inches(7)
        shapes[0].height, shapes[0].width, shapes[0].top, shapes[0].left = shapes[0].height, new_width, shapes[0].top, (prs.slide_width-new_width)//2
        shapes[1].height, shapes[1].width, shapes[1].top, shapes[1].left = new_height, new_width, shapes[1].top, (prs.slide_width-new_width)//2
        
        

# gpt-4-1106-preview


100%|████████████████████████████████████████████████████████████████████████████████████| 6/6 [02:31<00:00, 25.20s/it]


In [29]:
clear_all_collections(client)
clear_all_documents(client)

## Step 7: Enjoy

In [32]:
sanitised = re.sub(r'[\W_]+', '_', all_sections[0])
prs.save(f"./presentations/{sanitised}.pptx")


## Appendix: Extra Code that may be useful in the future
```python
# Create a chat session
# chat_session_id = client.create_chat_session(collection_id)

# # Query the collection
# with client.connect(chat_session_id) as session:
#     reply = session.query(
#         'How many paper clips were shipped to Scranton?',
#         llm="gpt-4-0613"
#     )
#     print(reply.content)

#     reply = session.query(
#         'Did David Brent co-sign the contract with Initech?',
#         timeout=60,
#         llm="gpt-4-0613"
#     )
#     print(reply.content)

# # Summarize each document
# documents = client.list_documents_in_collection(collection_id, offset=0, limit=99)
# for doc in documents:
#     summary = client.summarize_document(
#         document_id=doc.id,
#         timeout=60,
#     )
#     print(summary.content)


#client.delete_documents_from_collection
```