In [1]:
import json
import numpy as np
import re
from pptx import Presentation
from pptx.dml.color import RGBColor
from pptx.util import Cm, Pt, Inches
from pptx.enum.text import MSO_ANCHOR, MSO_AUTO_SIZE
from h2ogpte import H2OGPTE
from mediawikiapi import MediaWikiAPI
from tqdm import tqdm
with open('secrets.txt') as f:
    api = f.read()
    f.close()



In [2]:
client = H2OGPTE(
    address="https://h2ogpte.genai.h2o.ai",
    api_key=api
)

user_query = 'Create a presentation on the game lethal company'

In [3]:
def trim_randomly(lst, retain=30):
    '''
    Shuffles a list and randomly
    '''
    _ = lst.copy()
    random.shuffle(_)
    return _[0:retain]



def try_and_parse(user_query, function, failed=0, markdown=False):
    '''
    Accepts a function and user_query, an input. Evaluates function(user_query) and 
    converts string output (usually a reply from an llm) into a json value
    '''
    chosen = function(user_query)
    try:
        if not markdown:
            topics = json.loads(chosen.content)
        else:
            print(chosen.content)
            pattern = r'^```(?:\w+)?\s*\n(.*?)(?=^```)```'
            result = re.findall(pattern, chosen.content, re.DOTALL | re.MULTILINE)[0].strip() 
            topics = json.loads(result)
            
        return topics
    except Exception as e:
        failed+=1
        print(failed)
        print(e)# CHANGE TO LOGGING STATEMENT
        try_and_parse(user_query, function, failed=failed)


In [4]:
search = lambda user_query: client.answer_question(
    question=user_query,
    system_prompt="""You are an assistant whose task is to perform Wikipedia searches on a specific topic.\
    The user is interested to create a presentation about a topic of interest.\
    Reply with at least one corresponding Wikipedia query as an array in JSON format.\
    Only reply with the JSON array and nothing else. Here are some examples.
    Example 1.
    User: Create a presentation on the book, Baby Rudin.
    Assistant: ["Real analysis", "Mathematical Analysis"]

    Example 2.
    User: I want to create a ppt about Milk.
    Assistant: ["Milk", "Plant Milk", "Animal Milk", "Almond Milk"]
    """,
    llm='mistralai/Mixtral-8x7B-Instruct-v0.1' # i like this model
)
searched = try_and_parse(user_query, search)
searched

['Lethal Company',
 'Private military company',
 'Military history',
 'Security contracting',
 'Military industry']

In [5]:
wiki = MediaWikiAPI()

articles = list(
    set(
        
        [i for j in [wiki.search(cat, results=5) for cat in searched] for i in j]
        
    )
)

# remove duplicates with set(list())

In [6]:
import random
snippet = trim_randomly(
    list(map(lambda x: wiki.summary(x, auto_suggest=False, sentences=1), articles))
)



snippet_ = snippet.copy()
i = 0
for string in snippet:
    string = f"{i}. {string}"
    snippet[i] = string
    i+=1
snippet_text = "\n\n".join(snippet)
print(snippet_text)

0. Lethal Weapon is an American buddy cop action-comedy media franchise created by Shane Black.

1. The following is a list of notable private military contractors and companies.

2. Academi, formerly known as Blackwater, is an American private military contractor founded on December 26, 1996, by former Navy SEAL officer Erik Prince.

3. Jewish military history focuses on the military aspect of history of the Jewish people from ancient times until the modern age.

4. The military industry of Egypt produces defense and security products that range from "small arms to armored vehicles to naval vessels" for the Egyptian Armed Forces and export.

5. The military history of Japan covers a vast time-period of over three millennia - from the Jōmon (c.

6. Israel Weapon Industries (IWI), formerly the Magen division of the Israel Military Industries Ltd.

7. A private military company (PMC) or  private military and security company (PMSC) is a private company providing armed combat or security 

In [7]:
choose_topics = lambda user_query: client.answer_question(
        question=f"""{user_query}
        Referring to the list of wikipedia entries you have been provided, decide on which topics are useful for the presentation. For each entry, explain, in a few words,\
        whether you think an entry is useful or not and why.
        After that, generate a code chunk. Within the code chunk is an array of integers in JSON, and these integers correspond to the topics you think are useful. 
        Please keep strictly to the format in the following example:
        0. - Not useful, sugar irrelevant to Jesus Christ
        1. - Useful, christianity is about the topic of Jesus Christ
        2. - Useful, protestants follow Jesus Christ
        ```
        [1, 2]
        ```
        """,
        system_prompt=f"""You are an assistant whose task is to help a user in creating a presentation.\
        Here are a list of wikipedia entries, starting from the 0th entry, that may or may not be related to the topic at hand:
        {snippet_text}
        """,
        llm='gpt-4-1106-preview' 
    )

topics = try_and_parse(user_query, choose_topics, markdown=True)
print(topics)

0. - Not useful, relates to a different media franchise with a similar name.
1. - Not useful, private military contractors are not relevant to the video game.
2. - Not useful, specific private military contractor unrelated to the game.
3. - Not useful, historical aspect of Jewish military not relevant to the game.
4. - Not useful, Egypt's military industry does not pertain to the video game.
5. - Not useful, Japanese military history is unrelated to the game.
6. - Not useful, Israel Weapon Industries is not related to the video game.
7. - Not useful, private military companies are not the focus of the game.
8. - Not useful, non-lethal weapons are not the subject of the game.
9. - Not useful, Sudan's defense corporation has no connection to the game.
10. - Not useful, relates to a film with a similar name, not the video game.
11. - Not useful, Russian private military company not related to the game.
12. - Not useful, Israeli weapons manufacturer unrelated to the game.
13. - Not useful,

In [8]:
articles

['Triple Canopy',
 'Israel Weapon Industries',
 'Lethal Weapon',
 'Jewish military history',
 'Security company',
 'The Journal of Military History',
 'Patriot (company)',
 'Private military company',
 'Military industry of Egypt',
 'Lethal Company',
 'IMI Systems',
 'Blackwater (company)',
 'Military history',
 'Military Industry Corporation',
 'Arms industry',
 'List of private military contractors',
 'Lethal Weapon (franchise)',
 'Non-lethal weapon',
 'Contract',
 'Security guard',
 'List of private security companies',
 'Military history of Canada',
 'Military history of Japan',
 'Lethal injection']

In [9]:
chosen_articles = [articles[i] for i in topics]
chosen_snippets = [snippet_[i] for i in topics]
chosen_articles

['Non-lethal weapon']

In [10]:
chosen_full_articles = list(map(lambda x: wiki.page(x, auto_suggest=False).content, chosen_articles))
chosen_articles_images = list(map(lambda x: wiki.page(x, auto_suggest=False).images, chosen_articles))
# # Create documents
# # Note: Done for demonstration purposes only (not usually needed)
# with open('dunder_mifflin.txt', 'w') as f:
#     f.write('There were 55 paper clips shipped, 22 to Scranton and 33 to Filmer.')
    
# with open('initech.txt', 'w') as f:
#     f.write('David Brent did not sign any contract with Initech.')
    
# # Upload documents
# # Many file types are supported: text/image/audio documents and archives
# with open('dunder_mifflin.txt', 'rb') as f:
#     dunder_mifflin = client.upload('Dunder Mifflin.txt', f)
    
# with open('initech.txt', 'rb') as f:
#     initech = client.upload('IniTech.txt', f)

# # Ingest documents (Creates previews, chunks and embeddings)
# client.ingest_uploads(collection_id, [dunder_mifflin, initech])


In [11]:

    
# now its time to store them for RAG
import os


collection_id = client.create_collection(
    name='Articles',
    description='wikipedia articles for presentation',
)

pages = dict(zip(chosen_articles, chosen_full_articles))



In [12]:
import re
to_ingest = []
for title, content in pages.items():
    title = re.sub('[\W_]+', '', title)
    name = f"./articles/{title}.txt"
    f = open(name, "w+", encoding="utf-8")
    f.write(content)
    f.close() # dont know why i gotta do this, i think it has to be in binary
    f = open(name, 'rb')
    to_ingest.append(client.upload(name, f))
    print(f"{name} fed!")
    f.close() 

client.ingest_uploads(collection_id, to_ingest)  

./articles/Nonlethalweapon.txt fed!


Job(id='24ca2f4f-f82e-4b37-b38c-189559ed1b5e', passed=1.0, failed=0.0, progress=1.0, completed=True, canceled=False, date=datetime.datetime(2024, 2, 26, 20, 58, 35, tzinfo=TzInfo(UTC)), kind=<JobKind.IngestUploadsJob: 'IngestUploadsJob'>, statuses=[JobStatus(id='996b15dc29024cf8bfbb2510cccbf0a3', status='Collecting done.'), JobStatus(id='05a2a39c58054a818bf4fb83b921e8cd', status='Indexing done.'), JobStatus(id='5284b5fd5ec4479fb8d0cf789be7e18d', status='Collecting done.')], errors=[], last_update_date=datetime.datetime(2024, 2, 26, 20, 58, 41, tzinfo=TzInfo(UTC)), duration='6s')

In [13]:

# Create a chat session
# chat_session_id = client.create_chat_session(collection_id)

# # Query the collection
# with client.connect(chat_session_id) as session:
#     reply = session.query(
#         'How many paper clips were shipped to Scranton?',
#         llm="gpt-4-0613"
#     )
#     print(reply.content)

#     reply = session.query(
#         'Did David Brent co-sign the contract with Initech?',
#         timeout=60,
#         llm="gpt-4-0613"
#     )
#     print(reply.content)

# # Summarize each document
# documents = client.list_documents_in_collection(collection_id, offset=0, limit=99)
# for doc in documents:
#     summary = client.summarize_document(
#         document_id=doc.id,
#         timeout=60,
#     )
#     print(summary.content)


#client.delete_documents_from_collection

In [14]:
chosen_snippets

['Lethal Company is a cooperative survival horror video game created by Zeekerss.']

In [15]:
decide_sections = lambda user_query: client.answer_question(
        question=f"""{user_query}
        Please plan the presentation by replying a JSON array \
        consisting of slide titles, starting from the first slide to the last slide. Do not include the title slide. Do not say anything else.

        Here is an example reply:
        ["Introduction to Cookies", "History of cookies", "Types of Cookies", "Health Concerns", "Conclusion"]  
        """,
        system_prompt=f"""You are an assistant whose task is to help a user in creating a presentation.\
        Here are a list of wikipedia entry summaries that are selected for the presentation:
        {chosen_snippets}
        You will be asked to come up with slide titles for the presentation.
        """,
        llm='mistralai/Mixtral-8x7B-Instruct-v0.1' 
    )

sections = try_and_parse(user_query, decide_sections)

In [16]:
sections

['Introduction to Lethal Company',
 'Creators of Lethal Company',
 'Gameplay and Features',
 'Cooperative Survival Horror Elements',
 'Critical Reception and Reviews',
 'Future of Lethal Company']

In [17]:
chat_session_id = client.create_chat_session(collection_id)
chat_session_id

'c057b90e-409b-4cde-99e2-ce631a33f7d7'

In [18]:
""" Ref for slide types:  
0 ->  title and subtitle 
1 ->  title and content 
2 ->  section header 
3 ->  two content 
4 ->  Comparison 
5 ->  Title only  
6 ->  Blank 
7 ->  Content with caption 
8 ->  Pic with caption 
"""

' Ref for slide types:  \n0 ->  title and subtitle \n1 ->  title and content \n2 ->  section header \n3 ->  two content \n4 ->  Comparison \n5 ->  Title only  \n6 ->  Blank \n7 ->  Content with caption \n8 ->  Pic with caption \n'

In [19]:
prs = Presentation()
prs.slide_width = Inches(16)
prs.slide_height = Inches(9)
title_slide = prs.slides.add_slide(prs.slide_layouts[0]) 
decide_slide_format = lambda user_query: client.answer_question(
        question=f"""{user_query} Think of a simple title for this presentation.
        
        Also, think of a good background colour, in RGB format,\
        for the slides and a good colour, also in RGB format, for the\
        text. Typically, if the text colour is bright (for example RGB [255, 255, 255] is white), then the background colour should be dark
        (RGB [0, 0, 100] is dark blue). Conversely, if the text colour is dark (for example RGB [0, 0, 0] is black), the background colour should be bright\
        . You are free to choose any text and background colour, \
        as long as you follow these rules. Please do not assign grey-scale colours for the text and background (like RGB [50, 50, 50]), as much as possible.
        
        Format your reply as a JSON array containing the title and the two colours, following the example below. Do not say anything else.
        Example:
        ["Slide Title", {{"background": [100, 0, 0]}}, {{"text": [255, 255, 255]}}]""",
    
        system_prompt=f"""You are an assistant whose task is to help a user in creating a presentation.\
        Here are a list of wikipedia entry summaries that are selected for the presentation:
        {chosen_snippets}
        This should give you an idea of what this presentation should be about.
        """,
        llm='mistralai/Mixtral-8x7B-Instruct-v0.1' 
)

format = try_and_parse(user_query, decide_slide_format)

In [20]:
format

['Lethal Company: A Co-op Survival Horror Experience',
 {'background': [50, 50, 50]},
 {'text': [255, 255, 255]}]

In [21]:
background = RGBColor(*tuple(list(format[1].values())[0])) 
font = RGBColor(*tuple(list(format[2].values())[0])) 
fill = title_slide.background.fill
fill.solid()
fill.fore_color.rgb = background


title_slide.shapes.title.text = format[0]
title_slide.shapes.title.text_frame.paragraphs[0].font.color.rgb =  font
title_slide.shapes.title.text_frame.paragraphs[0].font.name = 'Montserrat'
title_slide.shapes.title.text_frame.paragraphs[0].font.bold = True

first_shape =  title_slide.shapes[0]
first_shape.left, first_shape.top, first_shape.width, first_shape.height = (prs.slide_width - Inches(12))//2, \
(prs.slide_height-first_shape.height)//2 - Inches(1),\
Inches(12),\
Inches(2)

In [22]:

with client.connect(chat_session_id) as session:

    for section in tqdm(sections):
        slide = prs.slides.add_slide(prs.slide_layouts[1])
        fill = slide.background.fill
        fill.solid()
        fill.fore_color.rgb = background

        
        contents = slide.placeholders[1]
        contents.text_frame.word_wrap = True

        title = slide.shapes.title
        title.text = section
        title.text_frame.paragraphs[0].font.color.rgb = font
        title.text_frame.paragraphs[0].font.size = Pt(32)
        title.text_frame.paragraphs[0].font.name = 'Karla'
       
       
        content = session.query(
            
            message = section,
            system_prompt=f"""You are an assistant whose task is to help a user in creating a presentation. \
            The slides of the presentation are as follows: {sections}
            You are now tasked with generating the content of one slide, which will be provided by the user.
            """,
            pre_prompt_query="You have been provided with the following information, which may be useful in your task.",
            prompt_query="Decide if the information is relevant, and use it if needed. \
            Generate the content required in the slide provided by the user. You only need to generate the contents of the slide, not the title\
            or anything else. Generate a maximum of 3 paragraphs of text. Keep to a word limit of 250 words. \
            Do not use numbered lists.",
            llm="mistralai/Mixtral-8x7B-Instruct-v0.1",
            rag_config={
                "rag_type": "rag",
            },
        ).content

        contents.text = content
        
        for paragraph in contents.text_frame.paragraphs:
            paragraph.space_after = 1
            paragraph.space_before = 0
           
            paragraph.font.size = Pt(18)  
            paragraph.font.color.rgb = font
            paragraph.font.name = 'Karla'

        contents.text_frame.auto_size = MSO_AUTO_SIZE.SHAPE_TO_FIT_TEXT
        shapes = slide.shapes
        new_width = Inches(14)
        new_height = Inches(7)
        shapes[0].height, shapes[0].width, shapes[0].top, shapes[0].left = shapes[0].height, new_width, shapes[0].top, (prs.slide_width-new_width)//2
        shapes[1].height, shapes[1].width, shapes[1].top, shapes[1].left = new_height, new_width, shapes[1].top, (prs.slide_width-new_width)//2
        
        

# gpt-4-1106-preview


100%|████████████████████████████████████████████████████████████████████████████████████| 6/6 [01:50<00:00, 18.40s/it]


In [23]:
sanitised = re.sub(r'[\W_]+', '_', format[0])
prs.save(f"./presentations/{sanitised}.pptx")

In [24]:
#contents.text_frame.fit_text(max_size=17)

