In [1]:
import json
import numpy as np
import glob
import os
import re
import html2text
import urllib3

from duckduckgo_search import DDGS
from pptx import Presentation
from pptx.dml.color import RGBColor
from pptx.util import Cm, Pt, Inches
from pptx.enum.text import MSO_ANCHOR, MSO_AUTO_SIZE
from h2ogpte import H2OGPTE
from mediawikiapi import MediaWikiAPI
from tqdm import tqdm
with open('secrets.txt') as f:
    api = f.read()
    f.close()



In [2]:
client = H2OGPTE(
    address="https://h2ogpte.genai.h2o.ai",
    api_key=api
)

user_query = 'Create a presentation on gold'

In [3]:
def clear_all_documents(client):
    docs = client.list_recent_documents(offset=0, limit = 1000)
    client.delete_documents(list(map(lambda x: x.id, docs)))
    assert client.count_documents() == 0

def clear_all_collections(client):
    col = client.list_recent_collections(0, 1000)
    client.delete_collections(list(map(lambda x: x.id, col)))
    assert client.count_collections() == 0


def trim(lst, keep=30):
    '''
    Trims a list. This function was originally used to permute before trimming, but\
    now that functionality is removed, so it appears rather redundant.
    '''
    _ = lst.copy()
    
    return _[0:keep]

def format_site_description(ls, start_from = 0):
    i = start_from
    _ = list(ls).copy()
    for element in ls:
        element = f"{i}. {element}"
        _[i] = element
        i += 1
    return "\n\n".join(_)


def try_and_parse(user_query, function, failed=0, markdown=False):
    '''
    Accepts a function and user_query, an input. Evaluates function(user_query) and 
    converts string output (usually a reply from an llm) into a json value. Use markdown=True
    if the json value is contained within a code chunk.
    '''
    chosen = function(user_query)
    try:
        if not markdown:
            topics = json.loads(chosen.content)
        else:
            print(chosen.content)
            pattern = r'^```(?:\w+)?\s*\n(.*?)(?=^```)```'
            result = re.findall(pattern, chosen.content, re.DOTALL | re.MULTILINE)[0].strip() 
            #print(result)
            topics = json.loads(result)
            
        return topics
    except Exception as e:
        failed+=1
        print(failed)
        print(e)# CHANGE TO LOGGING STATEMENT
        return try_and_parse(user_query, function, failed=failed, markdown=markdown)


## Step 1. What comes to mind when you think about xyz?

In [4]:
search = lambda user_query: client.answer_question(
    question=user_query,
    system_prompt="""You are an assistant whose task is to perform searches on the internet on a specific topic.\
    The user is interested to create a presentation about a topic of interest.\
    Think about what to do, then reply with your thought process and at least one corresponding google query as an array in JSON format,\
    but limit yourself to 5 queries.\
    The JSON array should be contained in a code chunk. Keep strictly to the format in the example below.
    Example:
    Since the presentation is about milk, I will probably want to search up the different types of milk, ...
    ```json
    ["Milk", "Oat Milk", "Plant-based milks", "Cow Milk", "Goat Milk"]
    ```
    """,
    llm='mistralai/Mixtral-8x7B-Instruct-v0.1'
)
searched = try_and_parse(user_query, search, markdown=True)
searched

Sure, I can help you with that. Here are some potential search queries that could be useful for creating a presentation on gold:

```json
["Gold",
 "History of gold",
 "Gold mining",
 "Gold uses and applications",
 "Gold market and economy",
 "Gold jewelry and fashion",
 "Gold recycling and sustainability"]
```
These queries cover a range of topics related to gold, from its historical significance to its modern-day uses and impact on the economy. By searching for information on these topics, you should be able to gather a wealth of knowledge to include in your presentation. Let me know if you have any other questions or if there's anything else I can do to help!


['Gold',
 'History of gold',
 'Gold mining',
 'Gold uses and applications',
 'Gold market and economy',
 'Gold jewelry and fashion',
 'Gold recycling and sustainability']

## Step 2. Search Wikipedia 

In [5]:
import random
wiki = MediaWikiAPI()
ddgs = DDGS()

articles = list(
    set(
        
        [i for j in [wiki.search(cat, results=5) for cat in searched] for i in j]
        
    )
) # remove duplicates with set(list())




In [6]:
articles

['Old gold',
 'Gold',
 'Gold teeth',
 'Gold mining in the United States',
 'Recycling codes',
 'Virtual economy',
 'California gold rush',
 'Gold medal',
 'Platinum',
 'Gold mining in Alaska',
 'History of Gold Coast',
 'Gold mining in Colorado',
 'Sims Metal Management',
 'Titanium',
 'Economy of Dubai',
 'Gallium',
 'Market socialism',
 'Electronic waste',
 'Potassium',
 'Gold mining',
 'GOLD',
 'Elsa Peretti',
 'Recycling',
 'Gold-filled jewelry',
 'Economy of Canada',
 'Gold Coast (British colony)',
 'Live insect jewelry',
 'Land recycling',
 'Socialist market economy',
 'Harmony Gold (mining)',
 'Grill (jewelry)',
 'Costume jewelry',
 'Gold Bar, Washington']

In [7]:
random.shuffle(articles) # random shuffle
snippet = trim(
    list(map(lambda x: wiki.summary(x, auto_suggest=False, sentences=1), articles))
)

In [8]:


snippet_text = format_site_description(snippet)

print(snippet_text)

0. Gold teeth are a form of dental prosthesis where the visible part of a tooth is replaced or capped with a prosthetic molded from gold.

1. Gold-filled jewelry is jewelry composed of a solid layer of gold (typically constituting at least 5% of the item's total weight) mechanically bonded to a base of either sterling silver or some base metal.

2. Live insect jewelry refers to jewelry made from living creatures – usually bejeweled oversized insects – which is worn as a fashion accessory.

3. The California gold rush (1848–1855) was a gold rush that began on January 24, 1848, when gold was found by James W. Marshall at Sutter's Mill in Coloma, California.

4. Gold is a chemical element; it has symbol Au (from Latin  aurum 'gold') and atomic number 79.

5. Gallium is a chemical element; it has symbol Ga and atomic number 31.

6. History of Gold Coast may refer to:

7. Gold Bar is a city in Snohomish County, Washington, United States.

8. A gold medal is a medal awarded for highest achie

## Step 2b. Search the Internet (DDG)

In [9]:
ddg_results = list(
    set(
        [tuple(i.values()) for j in [ddgs.text(cat, max_results=3) for cat in searched] for i in j]
        )
    )
random.shuffle(ddg_results)
ddg_results = trim(ddg_results, keep=15)
websites, links, body = zip(
    *ddg_results
)

In [10]:
ddg_snippet_text = format_site_description(list(zip(links, body)))
print(ddg_snippet_text)

0. ('https://goldfundamentals.org/gold-uses/', 'Introduction Part 1: Gold in Jewelry Part 2: Gold as a Currency Part 3: Gold in Electronics Part 4: Gold in Optics Part 5: Gold in Space Technology Part 6: Gold in Dental Procedures Part 7: Gold in Medical Treatments and Diagnostics Part 8: Gold in Cosmetics Part 9: Gold in Gastronomy Part 10: Gold in Architecture Part 11: Gold in Decorative Arts')

1. ('https://www.gold.org/history-gold', "Learn how gold's role as money and the gold standard changed over time, from the Classical Gold Standard to the Bretton Woods system and beyond. Explore the records of gold demand and supply, market structure and flows, and the role of gold in jewellery and culture.")

2. ('https://www.chicagofed.org/publications/chicago-fed-letter/2021/464', 'The measure of the real gold price is the London PM fixing price for gold (from the London Bullion Market Association) in U.S. dollars per ounce deflated by the U.S. Consumer Price Index, or CPI (from the U.S. Bu

## Step 3. "Brainstorm" and filter Wikipedia/DDG searches for useful ones
I treat them separately. Also: Chain-of-thought prompting
https://www.promptingguide.ai/techniques/cot

In [11]:
choose_topics = lambda user_query: client.answer_question(
        question=f"""{user_query}
        Referring to the list of wikipedia entries you have been provided, decide on which topics are useful for the presentation. For each entry, explain, in a few words,\
        whether you think an entry is useful or not and why.
        After that, generate a code chunk. Within the code chunk is an array of integers in JSON, and these integers correspond to the topics you think are useful.
        For each line, think about its usefulness/relevance and make some short deductions, then conclude whether it is useful or not useful.
        Please keep strictly to the format in the following example:
        0. - Sugar irrelevant to Jesus Christ, therefore not useful.
        1. - Christianity is about the topic of Jesus Christ, thus Useful.
        2. - Protestants follow Jesus Christ, therefore useful.
        ```
        [1, 2]
        ```
        """,
        system_prompt=f"""You are an assistant whose task is to help a user in creating a presentation.\
        Here are a list of wikipedia entries, starting from the 0th entry, that may or may not be related to the topic at hand:
        {snippet_text}
        """,
        llm='gpt-4-1106-preview' # only instance of gpt4 usage. need this for the big brain.
    )

topics = try_and_parse(user_query, choose_topics, markdown=True)
print(topics)

0. - Gold teeth are a specific application of gold in dentistry, relevant to the topic of gold, thus useful.
1. - Gold-filled jewelry is an example of how gold is used in items, relevant to the topic of gold, thus useful.
2. - Live insect jewelry is not directly related to the topic of gold, therefore not useful.
3. - The California gold rush is a significant historical event involving gold, relevant to the topic of gold, thus useful.
4. - Gold as a chemical element is the fundamental aspect of the topic, thus useful.
5. - Gallium is a different chemical element and not related to gold, therefore not useful.
6. - History of Gold Coast may refer to historical aspects involving gold but is too vague without specific context, potentially useful but not clearly so without further information.
7. - Gold Bar is a city and not directly related to the topic of gold, therefore not useful.
8. - A gold medal represents an achievement and is related to the cultural significance of gold, thus usefu

In [12]:
articles

['Gold teeth',
 'Gold-filled jewelry',
 'Live insect jewelry',
 'California gold rush',
 'Gold',
 'Gallium',
 'History of Gold Coast',
 'Gold Bar, Washington',
 'Gold medal',
 'Gold Coast (British colony)',
 'Old gold',
 'Potassium',
 'Socialist market economy',
 'GOLD',
 'Gold mining',
 'Recycling codes',
 'Titanium',
 'Grill (jewelry)',
 'Market socialism',
 'Sims Metal Management',
 'Virtual economy',
 'Economy of Dubai',
 'Electronic waste',
 'Land recycling',
 'Elsa Peretti',
 'Gold mining in Alaska',
 'Recycling',
 'Costume jewelry',
 'Harmony Gold (mining)',
 'Gold mining in Colorado',
 'Gold mining in the United States',
 'Platinum',
 'Economy of Canada']

In [13]:
chosen_articles = [articles[i] for i in topics]
chosen_snippets = [snippet[i] for i in topics]
chosen_articles

['Gold teeth',
 'Gold-filled jewelry',
 'California gold rush',
 'Gold',
 'Gold medal',
 'Gold Coast (British colony)',
 'Gold mining',
 'Grill (jewelry)',
 'Gold mining in Alaska',
 'Harmony Gold (mining)',
 'Gold mining in Colorado']

In [14]:
chosen_full_articles = list(map(lambda x: wiki.page(x, auto_suggest=False).content, chosen_articles))
chosen_articles_images = list(map(lambda x: wiki.page(x, auto_suggest=False).images, chosen_articles))


## Step 3b. Filter DDG Searches

In [15]:
choose_ddg_topics = lambda user_query: client.answer_question(
        question=f"""{user_query}
        Referring to the list of websites you have been provided, decide on which topics are useful for the presentation. For each entry, explain, in a few words,\
        whether you think an entry is useful or not and why.
        After that, generate a code chunk. Within the code chunk is an array of integers in JSON, and these integers correspond to the topics you think are useful\
        and are NOT from wikipedia.
        For each line, think about its usefulness/relevance and make some short deductions, then conclude whether it is useful or not useful.
        Please keep strictly to the format in the following example:
        0. - Sugar irrelevant to Jesus Christ, therefore not useful.
        1. - Christianity is about the topic of Jesus Christ, thus Useful.
        2. - Protestants follow Jesus Christ, therefore useful.
        3. - Website is a link to a youtube video, which is not suitable for a presentation, thus not useful.
        4. - This is relevant, but it is a Wikipedia link, so it is omitted.
        ```
        [1, 2]
        ```
        """,
        system_prompt=f"""You are an assistant whose task is to help a user in creating a presentation.\
        Here are a list of website body text, starting from the 0th entry, that may or may not be related to the topic at hand:
        {ddg_snippet_text}
        """,
        llm='gpt-4-1106-preview' # only instance of gpt4 usage. need this for the big brain.
    )
ddg_topics = try_and_parse(user_query, choose_ddg_topics, markdown=True)

ddg_topics

0. - Comprehensive list of gold uses, relevant for presentation, thus useful.
1. - History of gold as currency, relevant for historical context, thus useful.
2. - Technical data on gold pricing, may be too complex, but relevant for economic aspect, thus useful.
3. - Current gold prices, not relevant for a general presentation on gold, thus not useful.
4. - Current gold prices and historical trends, more relevant for investors, not for general presentation, thus not useful.
5. - Overview of gold uses, relevant and informative, thus useful.
6. - Wikipedia link, informative but not suitable due to source, thus not useful.
7. - Detailed uses of gold, relevant for presentation, thus useful.
8. - Commercial site for jewelry, not informative about gold itself, thus not useful.
9. - Recent gold price fluctuations, relevant for economic trends, but too specific for a general presentation, thus not useful.
10. - Gold recycling, relevant for sustainability aspect, thus useful.
11. - Discussion on

[0, 1, 2, 5, 7, 10, 11, 13, 14]

In [16]:
chosen_websites, chosen_links, chosen_body = ([websites[i] for i in ddg_topics], [links[i] for i in ddg_topics], [body[i] for i in ddg_topics])

## Step 4: Store useful ideas in VectorDB (h2oai)

In [17]:

    
# now its time to store them for RAG
import os


collection_id = client.create_collection(
    name='Articles',
    description='Articles for presentation',
)

pages = dict(zip(chosen_articles, chosen_full_articles))



In [18]:

to_ingest = []
for title, content in tqdm(pages.items()):
    title = re.sub('[\W_]+', '', title)
    name = f"./articles/{title}.txt"
    f = open(name, "w+", encoding="utf-8")
    f.write(content)
    f.close() # dont know why i gotta do this, i think it has to be in binary
    f = open(name, 'rb')
    to_ingest.append(client.upload(name, f))
    
    f.close() 

client.ingest_uploads(collection_id, to_ingest)  

100%|██████████████████████████████████████████████████████████████████████████████████| 11/11 [00:43<00:00,  3.95s/it]


Job(id='873526b7-e730-429d-908f-0eb8d6df2093', passed=1.0, failed=0.0, progress=1.0, completed=True, canceled=False, date=datetime.datetime(2024, 2, 28, 13, 20, 22, tzinfo=TzInfo(UTC)), kind=<JobKind.IngestUploadsJob: 'IngestUploadsJob'>, statuses=[JobStatus(id='164b869012b2498398bf12b1cc3138e2', status='Indexing done.'), JobStatus(id='2518ce5b5eb34ce191d7dbbcbf613118', status='Indexing done.'), JobStatus(id='6533e581ed56426dabccde33b0f1921f', status='Indexing done.'), JobStatus(id='f5706ac2e39d4bba8eb5c8165393a3e5', status='Indexing done.')], errors=[], last_update_date=datetime.datetime(2024, 2, 28, 13, 20, 51, tzinfo=TzInfo(UTC)), duration='29s')

In [19]:


# http = urllib3.PoolManager()
# to_ingest = [] # CLEAR LIST
# for link in tqdm(chosen_links):
#     page = http.request('GET', link)
#     html_content = page.data.decode('utf-8')
#     rendered_content = html2text.html2text(html_content)
    
#     sanitised_fname = re.sub(r'[\W_]+', '_', link)
#     path = f"./articles/{sanitised_fname}.txt"
#     file = open(path, 'w', encoding='utf-8')
#     file.write(rendered_content)
#     file.close()

#     f = open(path, 'rb')
#     to_ingest.append(client.upload(path, f)) 
#     f.close()

# client.ingest_uploads(collection_id, to_ingest)  


import urllib3
import html2text
import re
from tqdm import tqdm
from bs4 import BeautifulSoup



http = urllib3.PoolManager()
h = html2text.HTML2Text()
h.ignore_links = True  # Optionally ignore links in the output

for link in tqdm(chosen_links):
    try:
        # Make a GET request to the URL
        page = http.request('GET', link, headers=\
                            {
                                "User-Agent": 
                                "Mozilla/5.0 (iPhone; CPU iPhone OS 17_3_1 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) CriOS/122.0.6261.89 Mobile/15E148 Safari/604.1"
                            }
                           )
        
        # Decode the bytes object to a string
        html_content = page.data.decode('utf-8')
        
        # Parse the HTML content with BeautifulSoup
        soup = BeautifulSoup(html_content, 'html.parser')
        
        # Extract the body content
        body_content = soup.find('body')
        
        # Convert the body content to plain text
        rendered_content = h.handle(str(body_content))
        
        # Sanitize the filename
        sanitised_fname = re.sub(r'[\W_]+', '_', link)
        path = f"./articles/{sanitised_fname}.txt"
        
        # Write the rendered content to the file
        with open(path, 'w', encoding='utf-8') as file:
            file.write(rendered_content)

        with open(path, 'rb') as f:
            to_ingest.append(client.upload(path, f)) 
        

        
    except urllib3.exceptions.HTTPError as errh:
        print(f"HTTP Error: {errh}")
    except urllib3.exceptions.ConnectionError as errc:
        print(f"Error Connecting: {errc}")
    except urllib3.exceptions.TimeoutError as errt:
        print(f"Timeout Error: {errt}")
    except Exception as err:
        print(f"An error occurred: {err}")

client.ingest_uploads(collection_id, to_ingest)

100%|████████████████████████████████████████████████████████████████████████████████████| 9/9 [01:23<00:00,  9.27s/it]


Job(id='d5a5da3a-e7d9-4a0f-943f-ac12096320d6', passed=0.5, failed=0.5, progress=1.0, completed=True, canceled=False, date=datetime.datetime(2024, 2, 28, 13, 22, 17, tzinfo=TzInfo(UTC)), kind=<JobKind.IngestUploadsJob: 'IngestUploadsJob'>, statuses=[JobStatus(id='2518ce5b5eb34ce191d7dbbcbf613118', status='Collecting files...')], errors=['Job crashed: S3 operation failed; code: NoSuchKey, message: Object does not exist, resource: /h2ogpte-web-uploads/e6af1c36-166f-4488-ab30-321fcdf46f27/82301020-f54d-4937-a2b6-f56d1e152a6a, request_id: 17B80987E5C2330A, host_id: 129eada3bef6a66733359d8fa2bb39142754799116d6e56bbf39d92d3000c5df, bucket_name: h2ogpte-web-uploads, object_name: e6af1c36-166f-4488-ab30-321fcdf46f27/82301020-f54d-4937-a2b6-f56d1e152a6a'], last_update_date=datetime.datetime(2024, 2, 28, 13, 22, 18, tzinfo=TzInfo(UTC)), duration='1s')

In [20]:
# delete
files_dir = glob.glob('./articles/*.txt')
for f in files_dir:
    os.remove(f)

## Step 5: Plan sections for slide
This is to ensure the entire presentation is a coherent one with a flow/narrative, instead of many disjoint/overlapping generations.
Again, chain of thought prompting is very heavily incorporated

In [21]:
decide_sections = lambda user_query: client.answer_question(
        question=f"""{user_query}
        Please plan the presentation by doing the following:
        1. Explain how you would design the presentation slides such that the presentation will flow well.\
        Remember that each slide must contain something different, and content should not overlap.
        2. Think of a good title for the presentation.
        3. Create a code chunk. Inside that code chunk, generate a JSON array consisting of appropriate slide titles starting from the first slide to the last slide, \
        remembering your answer to point 2. Include the title slide, which is the title for the presentation.

        Below is an example reply. Please adhere strictly to the format in the example below and remember to output the JSON array in a code chunk:  
        1. I would introduce the Transformers franchise and provide general information about its history to ease my viewers into the subject. \
        Then, I will think about subtopics, such as the Transformers films, Transformers characters and Transformers in comics, using the wikipedia entry summaries\
        I have been provided. 
            * For my first subtopic, the Transformers films, I would create two additional slides to expand on Bumblebee (2018) and Revenge of the Fallen (2009)\
            as these are popular films within the franchise. I will order the films chronologically.
            * For my second subtopic on Transformers characters, I will have a slide on the cast of the film. 
            * For my next subtopic...
        2. I think a good title for this presentation is "Transformers: An Overview".
        3. Here is the json array of slide titles:
        ```json
        [
        "Transformers: An Overview", 
        "Introduction to Transformers", 
        "Transformers in Film",
        "Transformers: Revenge of the Fallen (2009)", 
        "Bumblebee (2018)", 
        "Characters in the Transformers Universe", 
        "Transformers in comics",
        "Conclusion"
        ]
        ```
        """,
        system_prompt=f"""You are an assistant whose task is to help a user in creating a presentation.\
        Below is a list of wikipedia articles that are selected for the presentation.\
        You will be asked to come up with slide titles for the presentation. Each line is a 1-sentence summary of a wikipedia page.\
        The number of slides should depend on the amount of information/wikipedia entry articles and websites available.
        Articles:
        {chosen_articles}
        Chosen websites:
        {format_site_description(list(zip(chosen_websites, chosen_links)))}
        """,
        llm='mistralai/Mixtral-8x7B-Instruct-v0.1' 
    )

all_sections = try_and_parse(user_query, decide_sections, markdown=True)

1. I would design the presentation slides to flow as follows:
* Start with a title slide that introduces the topic of gold and the purpose of the presentation.
* Provide a brief overview of the history of gold, including its uses and significance throughout history.
* Discuss the factors that drive gold prices and how they are determined.
* Explore the various uses of gold, including in jewelry, electronics, and other industries.
* Highlight the growing trend of gold recycling and its impact on the sustainability of the gold industry.
* Discuss the role of gold in the economy and its impact on global markets.
* Conclude the presentation with a summary of the key points and a call to action for further learning.
2. A good title for the presentation could be "Gold: History, Uses, and Impact on the Economy".
3. Here is the JSON array of slide titles:
```json
[
"Gold: History, Uses, and Impact on the Economy",
"Introduction to Gold",
"History of Gold",
"Factors that Drive Gold Prices",
"Us

In [22]:
sections = all_sections[1:]

sections

['Introduction to Gold',
 'History of Gold',
 'Factors that Drive Gold Prices',
 'Uses of Gold',
 'Gold Recycling and Sustainability',
 'Gold and the Economy',
 'Conclusion']

In [23]:
del client
client = H2OGPTE(
    address="https://h2ogpte.genai.h2o.ai",
    api_key=api
) # does this reset client?

chat_session_id = client.create_chat_session(collection_id)
chat_session_id

'b67fb8d9-3fe7-4701-a428-658f59eb086f'

#### Ref for slide types:  
0. title and subtitle 
1. title and content 
2. section header 
3. two content 
4. Comparison 
5. Title only  
6. Blank 
7. Content with caption 
8. Pic with caption 


## Step 6: Generate using RAG
LLM chooses colour with chain-of-thought prompting again.

In [24]:
prs = Presentation()
prs.slide_width = Inches(16)
prs.slide_height = Inches(9)
title_slide = prs.slides.add_slide(prs.slide_layouts[0]) 
decide_slide_format = lambda user_query: client.answer_question(
        question=f"""{user_query} Think of a good background colour, in RGB format,\
        for the slides and a good colour, also in RGB format, for the\
        text. Typically, if the text colour is bright (for example RGB [255, 255, 255] is white), then the background colour should be dark
        (RGB [0, 0, 100] is dark blue). Conversely, if the text colour is dark (for example RGB [0, 0, 0] is black), the background colour should be bright\
        . You are free to choose any text and background colour, \
        as long as you follow these rules. Please do not assign grey-scale colours for the text and background (like RGB [50, 50, 50]), as much as possible.

        Explain clearly why you chose the background and text colours. Then, generate a code chunk. Within the code chunk,\
        provide a JSON array containing two colours. Do not say anything else. Adhere strictly to the example reply below:
        I chose blue RGB [0, 35, 140] for the background color and light yellow RGB [255, 234, 0] for the font color. The contrast makes it easy to read.\
        Furthermore, the colours blue and yellow are associated with the Pokémon Franchise.
        ```
        [{{"background": [0, 0, 140]}}, {{"text": [255, 234, 0]}}]
        ```
        """,
    
        system_prompt=f"""You are an assistant whose task is to help a user in creating a presentation.\
        Here are a list of wikipedia entry summaries that are selected for the presentation:
        {chosen_snippets}
        """,
        llm='mistralai/Mixtral-8x7B-Instruct-v0.1' 
)

format = try_and_parse(user_query, decide_slide_format, markdown=True)

I chose a light beige background color with RGB [245, 223, 186] and a dark brown text color with RGB [60, 25, 0]. The contrast between the light background and dark text makes it easy to read. Additionally, these colors are reminiscent of gold's natural appearance, as well as the earthy tones associated with mining and extraction.

```
[{"background": [245, 223, 186]}, {"text": [60, 25, 0]}]
```


In [25]:
format

[{'background': [245, 223, 186]}, {'text': [60, 25, 0]}]

In [26]:
background = RGBColor(*tuple(list(format[0].values())[0])) 
font = RGBColor(*tuple(list(format[1].values())[0])) 
fill = title_slide.background.fill
fill.solid()
fill.fore_color.rgb = background


title_slide.shapes.title.text = all_sections[0]
title_slide.shapes.title.text_frame.paragraphs[0].font.color.rgb =  font
title_slide.shapes.title.text_frame.paragraphs[0].font.name = 'Montserrat'
title_slide.shapes.title.text_frame.paragraphs[0].font.bold = True

first_shape =  title_slide.shapes[0]
first_shape.left, first_shape.top, first_shape.width, first_shape.height = (prs.slide_width - Inches(12))//2, \
(prs.slide_height-first_shape.height)//2 - Inches(1),\
Inches(12),\
Inches(2)

In [27]:

with client.connect(chat_session_id) as session:

    for section in tqdm(sections):
        slide = prs.slides.add_slide(prs.slide_layouts[1])
        fill = slide.background.fill
        fill.solid()
        fill.fore_color.rgb = background

        
        contents = slide.placeholders[1]
        contents.text_frame.word_wrap = True

        title = slide.shapes.title
        title.text = section
        title.text_frame.paragraphs[0].font.color.rgb = font
        title.text_frame.paragraphs[0].font.size = Pt(32)
        title.text_frame.paragraphs[0].font.name = 'Karla'
       
       
        content = session.query(
            
            message = section,
            system_prompt=f"""You are an assistant whose task is to help a user in creating a presentation. \
            The slides of the presentation are as follows: {sections}
            You are now tasked with generating the content of one slide, which will be provided by the user.
            You are designing one slide, so do not generate long paragraphs. Instead, summarise your most important points\
            in up to 3 sentences each, and do not generate more than 15 lines of text.
            """,
            pre_prompt_query="You have been provided with the following information, which may be useful in your task.",
            prompt_query="""Decide if the information is relevant, and use it if needed.\
            Generate the content required in the slide provided by the user. You only need to generate the contents of the slide, not the title\
            or anything else. Remember, you are designing one slide, so do not generate long paragraphs. Instead, summarise your most important points\
            in up to 3 sentences each, and do not generate more than 15 lines of text. Between each point, leave a line.

            Here is an example. Please adhere to this example strictly:
            This is the first point I am trying to make. I will explain this first point in the second sentence.
            
            This is the second point I am trying to make. This is the second sentence for the second point. \
            I need a third sentence to fully explain this point. Notice that I will leave a line for the next point.

            This is the third point.
            """,
            llm="mistralai/Mixtral-8x7B-Instruct-v0.1",
            rag_config={
                "rag_type": "hyde1",
            },
        ).content

        contents.text = content
        
        for paragraph in contents.text_frame.paragraphs:
            paragraph.space_after = 1
            paragraph.space_before = 1
            # paragraph.level = 0
           
            paragraph.font.size = Pt(22)  
            paragraph.font.color.rgb = font
            paragraph.font.name = 'Karla'

        contents.text_frame.auto_size = MSO_AUTO_SIZE.SHAPE_TO_FIT_TEXT
        shapes = slide.shapes
        new_width = Inches(14)
        new_height = Inches(7)
        shapes[0].height, shapes[0].width, shapes[0].top, shapes[0].left = shapes[0].height, new_width, shapes[0].top, (prs.slide_width-new_width)//2
        shapes[1].height, shapes[1].width, shapes[1].top, shapes[1].left = new_height, new_width, shapes[1].top, (prs.slide_width-new_width)//2
        
        

# gpt-4-1106-preview


100%|████████████████████████████████████████████████████████████████████████████████████| 7/7 [03:15<00:00, 27.91s/it]


In [28]:
clear_all_collections(client)
clear_all_documents(client)

## Step 7: Enjoy

In [29]:
sanitised = re.sub(r'[\W_]+', '_', all_sections[0])
prs.save(f"./presentations/{sanitised}.pptx")


## Appendix: Extra Code that may be useful in the future
```python
# Create a chat session
# chat_session_id = client.create_chat_session(collection_id)

# # Query the collection
# with client.connect(chat_session_id) as session:
#     reply = session.query(
#         'How many paper clips were shipped to Scranton?',
#         llm="gpt-4-0613"
#     )
#     print(reply.content)

#     reply = session.query(
#         'Did David Brent co-sign the contract with Initech?',
#         timeout=60,
#         llm="gpt-4-0613"
#     )
#     print(reply.content)

# # Summarize each document
# documents = client.list_documents_in_collection(collection_id, offset=0, limit=99)
# for doc in documents:
#     summary = client.summarize_document(
#         document_id=doc.id,
#         timeout=60,
#     )
#     print(summary.content)


#client.delete_documents_from_collection
```