In [1]:
import requests
from bs4 import BeautifulSoup
import openai
import IPython.core.getipython
import time
import os
import concurrent.futures
from tqdm import tqdm
from scipy.spatial import distance
from sklearn.cluster import KMeans
import numpy as np

# Set up your OpenAI API key
openai.api_key = "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"
def get_inlink_texts(target_page):
    inlinks = get_wikipedia_inlinks(target_page)
    texts = []
    found_inlinks = []
    for inlink in tqdm(inlinks, total=len(inlinks), desc="Text Extraction", unit="page"):
        text = get_paragraph_with_link(inlink, target_page)
        if text:
            texts.append(text)
            found_inlinks.append(inlink)
    return texts, found_inlinks


def get_wikipedia_inlinks(title, lang='en', limit=5000):
    base_url = f'https://{lang}.wikipedia.org/w/api.php'
    params = {
        'action': 'query',
        'format': 'json',
        'list': 'backlinks',
        'bltitle': title,
        'bllimit': limit,
        'blnamespace': 0,  # Only retrieve links from main namespace
        'continue': ''  # Placeholder for pagination
    }
    inlinks = []
    while True:
        response = requests.get(base_url, params=params)
        data = response.json()
        if 'error' in data:
            print(f"Error: {data['error']['info']}")
            break
        inlink_pages = data['query']['backlinks']
        for page in inlink_pages:
            inlinks.append(page['title'])
        if 'continue' not in data:
            break
        params['continue'] = data['continue']['continue']
        params['blcontinue'] = data['continue']['blcontinue']
    return inlinks
def get_wikipedia_page_content(title, lang='en'):
    base_url = f'https://{lang}.wikipedia.org/w/api.php'
    params = {
        'action': 'parse',
        'page': title,
        'format': 'json',
        'prop': 'text',
        'contentmodel': 'wikitext'
    }
    response = requests.get(base_url, params=params)
    data = response.json()
    if 'error' in data:
        print(f"Error: {data['error']['info']}")
        return None
    html_content = data['parse']['text']['*']
    return html_content
def get_paragraph_with_link(page_title, link_title, lang='en'):
    content = get_wikipedia_page_content(page_title, lang)
    soup = BeautifulSoup(content, 'html.parser')
    
    #elements = soup.find_all(['p', 'li', 'dl'])
    elements = soup.find_all('p')
    
    #element_names = {'p': 'Paragraph', 'li': 'List item', 'dl': 'Description list'}
    
    link_href = "/wiki/" + link_title.replace(' ', '_')
    
    for element in elements:
        a_tags = element.find_all('a', href=True)
        if any(a['href'] == link_href for a in a_tags):
            context_info = "Unknown"
            preceding_header = element.find_previous(['h1', 'h2', 'h3', 'h4', 'h5', 'h6'])
            if preceding_header:
                context_info = preceding_header.text.replace('[edit]', '')  # Remove the '[edit]' portion
            # Handle paragraphs separately from list items and description lists
            if element.name == 'p':
                #return f"\nElement type: {element_names[element.name]}, \nContext: {context_info}, \nText: \n{element.text.strip()}"
                return f"\n{element.text.strip()}\n"
            else:
                fallback_context = None
                current_context = element
                while current_context is not None:
                    if current_context.name not in ['li', 'dl']:
                        fallback_context = current_context
                        break
                    current_context = current_context.find_parent()
                return f"\nElement type: {element_names[element.name]}, \nContext: {context_info}, \nText: \n{element.text.strip()}"
    return None

def get_analyses(target_page, found_inlinks, texts):
    analyses = []
    for found_inlink, text in tqdm(zip(found_inlinks, texts), total=len(found_inlinks), desc="Analyses", unit="page"):
        analysis = hyperlink_analysis(target_page, text, found_inlink)
        analyses.append(analysis)
    return analyses

def hyperlink_analysis(hyperlink, paragraph, page):
    prompt = f"In the context of '{paragraph}' on the Wikipedia page '{page}', the hyperlink '{hyperlink}' appears. The following factors come into consideration:\n\n1) Extent of '{hyperlink}' usage within this context.\n2) Boundaries and limitations regarding this usage.\n3) Any interplay with other concepts or events within this context.\n4) The relevance and necessity of '{hyperlink}' within this specific context.\n\n1) Extent of '{hyperlink}' usage within this context can be described as:\n\n"

    analysis = inference(prompt)

    return f"1) Extent of '{hyperlink}' usage within this context can be described as:\n\n" + analysis


def inference(prompt, retries=5, backoff_factor=0.1):
    for i in range(retries):
        try:
            # Your API call here
            response = openai.Completion.create(
                engine="text-davinci-003",
                prompt=prompt,
                max_tokens=500,
                n=1,
                stop = "",
                temperature=0.1,
            )
            return response.choices[0].text.strip()
        except (openai.error.RateLimitError, openai.error.APIError) as e:
            if i < retries - 1:  # if it's not the last retry attempt
                sleep_time = backoff_factor * (2 ** i)  # exponential backoff
                time.sleep(sleep_time)
            else:  # if it's the last retry attempt
                print(f"Failed to generate inference after {retries} attempts.\n")
                raise

def embed(text, retries=5, backoff_factor=0.1):
    for i in range(retries):
        try:
            response = openai.Embedding.create(
                input=text,
                model="text-embedding-ada-002"
            )
            return response['data'][0]['embedding']
        except (openai.error.RateLimitError, openai.error.APIError) as e:
            if i < retries - 1:  # if it's not the last retry attempt
                sleep_time = backoff_factor * (2 ** i)  # exponential backoff
                time.sleep(sleep_time)
            else:  # if it's the last retry attempt
                print(f"Failed to generate embedding after {retries} attempts.")
                raise               


def get_sorted_cluster_partitions(embeddings, num_clusters):
    # Fit the KMeans model to your data
    kmeans = KMeans(n_clusters=num_clusters)
    kmeans.fit(embeddings)

    # A list to hold the data for each cluster
    cluster_partitions = []

    # After fitting the model...
    clusters = kmeans.labels_

    # For each unique label (which represents a cluster)...
    for cluster_num in np.unique(clusters):
        # Get the data points in this cluster
        cluster_data = embeddings[clusters == cluster_num]
        
        # Calculate the distances from the centroid for each data point
        centroid = kmeans.cluster_centers_[cluster_num]
        distances = distance.cdist([centroid], cluster_data, 'euclidean')[0]
        
        # Get the indices that would sort the distances
        sorted_indices = np.argsort(distances)
        
        # Sort the data points by their distances to the centroid
        sorted_cluster_data = cluster_data[sorted_indices]
        
        # Append the sorted data to our list
        cluster_partitions.append(sorted_cluster_data)
        
    return cluster_partitions
                

def core(num_clusters, num_documents):
    # Fit the KMeans model to your data
    kmeans = KMeans(n_clusters=num_clusters)
    kmeans.fit(embeddings)

    # Now `kmeans.cluster_centers_` contains the centroids of the clusters
    cluster_centroids = kmeans.cluster_centers_

    # And `kmeans.labels_` contains the cluster number for each document
    clusters = kmeans.labels_

    # Get the representative document (the one closest to the centroid) for each cluster
    representative_docs = []

    for i in range(num_clusters):
        # Find the points in this cluster
        idx = np.where(clusters == i)[0]
        cluster_points = np.array(embeddings)[idx]

        # Find the 9 documents closest to the centroid
        closest_indices = cdist([cluster_centroids[i]], cluster_points).argsort()[0][:num_documents]
        representatives = [found_inlinks[idx[closest_idx]] + "\n" + texts[idx[closest_idx]] for closest_idx in closest_indices]
        representative_docs.append(representatives)

    # Now `representative_docs` contains the most representative documents for each cluster
    cores = []
    combined_texts = ""
    for i, docs in enumerate(representative_docs, start=1):
        combined_text = f"Cluster {i} representative documents:\n"
        for j, doc in enumerate(docs, start=1):
            combined_text += f"Document {j}: {doc}\n"
        combined_texts += "\n" + combined_text
        #print(combined_texts)
        prompt = f"In clustering all the occurances of {target_page} in wikipedia, these {num_documents} paragraphs were extracted:\n{combined_texts}\nThe following represents a textual representation for the core of {target_page} which integrates all {num_documents} documents:\nAt the core of {target_page}"
        core = f"At the core of {target_page} {inference(prompt)}"
        cores.append(core)
        print(core + "\n")


def understand(target_page):
    inlinks = get_wikipedia_inlinks(target_page)
    found_inlinks = []

    print(f"Found {len(inlinks)} pages linking to {target_page}.")

    info = []

    def process_inlink(inlink):
        paragraph = get_paragraph_with_link(inlink, target_page)
        if paragraph:
            analysis = hyperlink_analysis(target_page, paragraph, inlink)
            return (inlink, paragraph, analysis)
        return None

    with concurrent.futures.ThreadPoolExecutor() as executor:
        results = list(tqdm(executor.map(process_inlink, inlinks), total=len(inlinks), desc="Analysis", unit="page"))

    paragraphs = []
    gpt_paragraphs = []
    for result in results:
        if result is not None:
            inlink, paragraph, analysis = result
            found_inlinks.append(inlink)
            paragraphs.append(f"\nPage: {inlink}\n{paragraph}")
            gpt_paragraphs.append(f"Analysis:\n {analysis}\n")
    embeddings = []
    for gptparagraph in tqdm(gpt_paragraphs, total=len(gpt_paragraphs), desc="Embedding", unit="page"):
        embedding = embed(gptparagraph)
        embeddings.append(embedding)
    return found_inlinks, paragraphs, gpt_paragraphs, embeddings    

In [None]:
data = understand("Idea")

Found 560 pages linking to Idea.


Analysis:  60%|████████████████████████████████████████▊                           | 336/560 [06:32<08:26,  2.26s/page]

In [None]:
target_page = "Idea"
texts, found_inlinks = get_inlink_texts(target_page)
print(f"Paragraphs found: {len(found_inlinks)}")

analyses = get_analyses(target_page, found_inlinks, texts)

embeddings = []
for analysis in tqdm(analyses, total=len(analyses), desc="Text Extraction", unit="page"):
    embeddings.append(embed(analysis))

Text Extraction: 100%|█████████████████████████████████████████████████████████████| 560/560 [03:46<00:00,  2.48page/s]


Paragraphs found: 234


Analyses:   0%|                                                                              | 0/234 [00:00<?, ?page/s]

In [20]:
from sklearn.metrics.pairwise import cosine_similarity

def search(query, embeddings, texts, found_inlinks, top_n=5):
    query_embedding = embed(query)
    
    # Compute cosine similarity between the query and all embeddings
    similarities = cosine_similarity([query_embedding], embeddings)[0]

    # Get the top N most similar embeddings
    top_indices = similarities.argsort()[-top_n:][::-1]
    top_similarities = similarities[top_indices]
    
    # Get the corresponding texts
    top_texts = [found_inlinks[i] + "\n" + texts[i] for i in top_indices]
    
    return top_texts

search_term = "Shoot"
searches = search(search_term, embeddings, texts, found_inlinks)
for term in searches:
    print(term, "\n")

I Am Alive

The game's most frequently employed mechanic is climbing, which is governed by a stamina meter, whereby physically demanding activities, such as jumping or running exert stamina and drain the meter.[9] Once the protagonist's feet are off the ground, everything exerts stamina, even hanging motionlessly.[7][10] Some actions exert more stamina than others - for example, jumping from one beam to another when climbing uses a lot more stamina than shimmying across a beam.[10] The stamina meter refills automatically when the player is on solid ground, unless the protagonist is outside at ground level, where the dust in the air constantly depletes the stamina meter, and eventually the health meter.[8] If the player runs out of stamina when climbing, they are allowed a few seconds for a final exertion to try to get to solid ground. However, such exertion results in a piece of the stamina meter itself disappearing (thus reducing the maximum stamina the player can have). This loss can

In [3]:
for found_inlink, text, analysis in zip(found_inlinks,texts, analyses):
    print(f"{found_inlink}\n{text}\n{analysis}\n\n")

Dungeons & Dragons

With the launch of Dungeons & Dragons's 3rd Edition, Wizards of the Coast made the d20 System available under the Open Game License (OGL) and d20 System trademark license. Under these licenses, authors were free to use the d20 System when writing games and game supplements.[145] The OGL has allowed a wide range of unofficial commercial derivative work based on the mechanics of Dungeons and Dragons to be produced since 2000;[146] it is credited with increasing the market share of d20 products[147] and leading to a "boom in the RPG industry in the early 2000s".[148]

1) Extent of 'Game mechanics' usage within this context can be described as:

- The game mechanics of Dungeons and Dragons are mentioned in the context of the Open Game License (OGL) and d20 System trademark license, which allowed authors to use the d20 System when writing games and game supplements. 

2) Boundaries and limitations regarding this usage can be described as:

 - The OGL only allowed authors

In [23]:
from sklearn.cluster import KMeans
from scipy.spatial.distance import cdist
import numpy as np
def core(num_clusters, num_documents):
    # Fit the KMeans model to your data
    kmeans = KMeans(n_clusters=num_clusters)
    kmeans.fit(embeddings)

    # Now `kmeans.cluster_centers_` contains the centroids of the clusters
    cluster_centroids = kmeans.cluster_centers_

    # And `kmeans.labels_` contains the cluster number for each document
    clusters = kmeans.labels_

    # Get the representative document (the one closest to the centroid) for each cluster
    representative_docs = []

    for i in range(num_clusters):
        # Find the points in this cluster
        idx = np.where(clusters == i)[0]
        cluster_points = np.array(embeddings)[idx]

        # Find the 9 documents closest to the centroid
        closest_indices = cdist([cluster_centroids[i]], cluster_points).argsort()[0][:num_documents]
        representatives = [found_inlinks[idx[closest_idx]] + "\n" + texts[idx[closest_idx]] for closest_idx in closest_indices]
        representative_docs.append(representatives)

    # Now `representative_docs` contains the most representative documents for each cluster
    cores = []
    combined_texts = ""
    for i, docs in enumerate(representative_docs, start=1):
        combined_text = f"Cluster {i} representative documents:\n"
        for j, doc in enumerate(docs, start=1):
            combined_text += f"Document {j}: {doc}\n"
        combined_texts += "\n" + combined_text
        #print(combined_texts)
        prompt = "In"
        prompt = f"In construclustering all the occurances of {target_page} in wikipedia, these {num_documents} paragraphs are extracted:\n{combined_text}\nThe following is the topic header for the dual"
        core = f"These documents represent {target_page} in the lens of {inference(prompt)}"
        cores.append(core)
        print(core + "\n")
    return cores    

cores = []
for core_count in range(1,5):
    print("\n")
    cores.append(core(core_count, 5))









These documents represent Game mechanics in the lens of a variety of video games. Game mechanics are the rules and systems that govern the game, such as the use of melee and shooting mechanics, platforming, quick time events, puzzles, and platforming elements. These mechanics are used to create a unique and engaging experience for the player, and can include the use of weapons, power-enhancing abilities, and side quests.







These documents represent Game mechanics in the lens of various video games. Game mechanics are the rules and systems that govern the behavior of a game, including the actions of players, the objectives of the game, and the rewards for success. These mechanics can include combat, platforming, shooting, and other elements that make up the game. Game mechanics are used to create an immersive experience for players, and can be used to create a unique and engaging experience.

These documents represent Game mechanics in the lens of video games. Game mechanics are the rules and systems that govern the interactions between players and the game environment. This includes the use of combo-based combat, quick time events, magical attacks, puzzles, platforming elements, downloadable content, and online multiplayer. It also includes the ability to effect the world in a believable way, storytelling techniques, morality systems, and the use of shields, speed shoes, and invincibility. Game mechanics



These documents represent Game mechanics in the lens of a first-person shooter, where players use weapons and other tools to defeat opponents. The mechanics involve using melee and shooting attacks, as well as platforming and physics puzzles. Players must also manage their health and weapons, and explore the environment to find optional areas.

These documents represent Game mechanics in the lens of a variety of different games. In Good Society, game mechanics include spreading rumors and writing letters. In Fallout 2, game mechanics are similar to the first game. Legacy games are designed to change permanently over the course of a series of sessions. MegaTraveller 2 features a character creation system and other elements of game mechanics compatible with previous Traveller products. Iwo Jima is a turn-based strategy game where the player must eliminate all Japanese forces by ground, air, or naval combat.

These documents represent Game mechanics in the lens of video games. Game mechan



These documents represent Game mechanics in the lens of a side-scrolling platform game, a shooting game, a graphic adventure game, a western-style game, and a strategy game. Game mechanics are seen as features such as jumping between platforms, avoiding obstacles, collecting shields, speed shoes, and invincibility, collecting rings, enemy AI, platforming sections, whip mechanics, narrative, script, shooting mechanics, authentic western tone, voice acting, multiplayer, economic micromanagement, technology trees, trade requirements, dynamic Victory Points system, and road networks. Additionally, game mechanics can be seen as strategies and styles of play such as military, technology, and/or trade.

These documents represent Game mechanics in the lens of fast-paced combat, environmental effects, barrels, checkpoints, bonus games, upgrades, and time traveling. These mechanics are used to create a unique and engaging experience for the player, allowing them to explore different levels, coll

In [8]:
def differentiate(cores):
    combine = ""
    for core in cores:
        combine += core + "\n"
    prompt = f"""
        The following {len(cores)} paragraphs are text extraction from clustering all occurences of {target_page} on Wikipedia using k-means where k is {len(cores)}:
        {combine}
        Each paragraph represents a different aspect of {target_page}, The {len(cores)} ways {target_page} is understood:
        1)
    """
    return f"1)" + inference(prompt)

differentials = []
for core in cores:
    differential = differentiate(core)
    print(differential + "\n")

1)Combo-based combat: Players are typically armed with a sword and gun, and must defeat each other with one hit with an attack with either their sword or a bullet. Bullets can be deflected by other players with precise timing.
     Quick time events: Players must react quickly to certain events in order to progress in the game.
     Platforming elements: Players must navigate through levels, jumping and dodging obstacles.
     Puzzles: Players must solve puzzles in order to progress in the game.
     World Weapons: Players can acquire special weapons through the World Weapons mechanic.
     Minigames: Players can access minigames such as fishing and target shooting.
     Multiplayer: Both competitive and cooperative multiplayer modes are available.

1)The first paragraph focuses on the use of various game mechanics such as melee and shooting, platforming, roleplaying, abstractions, trading card games, battle systems, side quests, mini-games, and additional weapons. These mechanics are 

In [27]:
print(differential)
print()
print(differential1)

None

None


In [7]:
print(inference(f"The following paragraph was generated by analyzing 10 texts:\n{core}\nThe following 10 texts were used to generate the paragraph:\n{combined_texts}\nThe following relates each sentence in the paragraph to the specific 10 texts to which it was derived:\n"))

At the core of Sublime (philosophy) is the idea of the sublime and the beauty of the natural world. (Document 5) Edmund Burke in his 1757 A Philosophical Enquiry into the Origin of Our Ideas of the Sublime and Beautiful argued that the soft gentle curves appealed to the male sexual desire, while the sublime horrors appealed to our desires for self-preservation. (Document 2) Picturesque arose as a mediator between these opposed ideals of beauty and the sublime, showing the possibilities that existed between these two rationally idealised states. (Document 2) Julia Kristeva argued that the sublime is really our effort to cover over the breakdowns (and subsequent reassertion of boundaries) associated with the abject; and literature is the privileged space for both the sublime and abject. (Document 3) Costa defined a more general, comprehensive, aesthetic and philosophical, theory of new media, which he named the technological sublime. (Document 6) Lloyd's paintings are influenced by cinem

In [21]:
num_clusters = 2
num_documents = 6
# Fit the KMeans model to your data
kmeans = KMeans(n_clusters=num_clusters)
kmeans.fit(embeddings)

# Now `kmeans.cluster_centers_` contains the centroids of the clusters
cluster_centroids = kmeans.cluster_centers_

# And `kmeans.labels_` contains the cluster number for each document
clusters = kmeans.labels_

# Get the representative document (the one closest to the centroid) for each cluster
representative_docs = []

for i in range(num_clusters):
    # Find the points in this cluster
    idx = np.where(clusters == i)[0]
    cluster_points = np.array(embeddings)[idx]
    
    # Find the 9 documents closest to the centroid
    closest_indices = cdist([cluster_centroids[i]], cluster_points).argsort()[0][:num_documents]
    representatives = [texts[idx[closest_idx]] for closest_idx in closest_indices]
    representative_docs.append(representatives)

# Now `representative_docs` contains the most representative documents for each cluster

combined_texts = []
doc_number = 1
for i, docs in enumerate(representative_docs, start=1):
    combined_text = f"Cluster {i} representative documents:\n"
    for j, doc in enumerate(docs, start=1):
        combined_text += f"Document {doc_number}: \n{doc}\n"
        doc_number += 1
    combined_texts.append(combined_text)
    
text = combined_texts[0] + "\n" + combined_texts[1]
prompt1 = f"These {num_clusters * num_documents} paragraphs were extracted from clustering all occurences of {target_page} on wikipedia. These {num_clusters * num_documents} paragraphs represent two sides of {target_page}:\n{text}\n\n{core}\n\nCluster 1:\nFocuses on {target_page}"
print(prompt1)
dual = f"Cluster 1:\nFocuses on {target_page} in the lens of {inference(prompt1)}"
print(dual)




These 12 paragraphs were extracted from clustering all occurences of Game mechanics on wikipedia. These 12 paragraphs represent two sides of Game mechanics:
Cluster 1 representative documents:
Document 1: 

Samurai Gunn is a local multiplayer game that supports 2 to 4 players utilising melee and shooting mechanics as well as platforming.
Players are armed with a sword and gun with only three bullets per life. A match typically consists of players defeating each other with one hit with an attack with either their sword or a bullet. Both swords and bullets can be deflected by other players with precise timing.[3]

Document 2: 

The WARS Trading Card Game is an out-of-print trading card game released by Decipher in October 2004 with science fiction themes, using game mechanics from the Star Wars CCG.[1] After two releases, the game was officially "placed on hiatus" in May 2005.

Document 3: 

Xyllomer's setting is medieval fantasy in theme.[2] Roleplaying is enforced,[3] with strict penal

Cluster 1:
Focuses on Game mechanics in the lens of such as melee and shooting mechanics, platforming, swords and bullets, morality system, abstractions, range of side quests, battle system, weapons, arenas, and downloadable content.

Cluster 2:
Focuses on Game mechanics such as the ability to effect the world in a believable way, storytelling techniques, morality system, compatibility of a game within the "gory shooter" game milieu, player's ability to develop their settlement, implementation of the whip, enemy AI, narrative, main character, script, in-depth tutorial, role-playing, spreading rumors, writing letters, character creation systems, real-time exploration, trading, interaction with non-player characters, combat, problem solving, ground, air, and naval combat, and permanent changes over the course of a series of sessions.


In [11]:
combined_text = combined_texts[0] + "\n" + combined_texts[1]
oneside = f"On one side {target_page} " + inference(f"{core}\nThe following is obtained by clustering all occurences of {target_page} on wikipedia\n{combined_text}\n{dual}\nOn one side of {target_page}")
otherside = f"On the other side {target_page} " + inference(f"{core}\n{combined_text}\n{dual}\n{oneside}\nOn the other side of {target_page}")
print(oneside+"\n\n"+otherside)

On one side Sublime (philosophy) is the idea of the sublime and the beauty of the natural world. Edmund Burke in his 1757 A Philosophical Enquiry into the Origin of Our Ideas of the Sublime and Beautiful argued that the soft gentle curves appealed to the male sexual desire, while the sublime horrors appealed to our desires for self-preservation. Picturesque arose as a mediator between these opposed ideals of beauty and the sublime, showing the possibilities that existed between these two rationally idealised states. Julia Kristeva argued that the sublime is really our effort to cover over the breakdowns (and subsequent reassertion of boundaries) associated with the abject; and literature is the privileged space for both the sublime and abject. Costa defined a more general, comprehensive, aesthetic and philosophical, theory of new media, which he named the technological sublime. Lloyd's paintings are influenced by cinema, in particular film noir and science fiction, and the Romantic con

In [90]:
print(oneside+"\n\n"+otherside)

On one side User-generated content is the world of video games. Games such as Minecraft, LittleBigPlanet, and Dreams offer players the ability to create user-generated content to share with others to play. These games also feature development tools for players to create in-game elements such as games and maps. The game's large community also offers a wide variety of user-generated content, such as modifications, servers, skins, texture packs, and custom maps, which add new game mechanics and possibilities.

On the other side User-generated content is the broader context of the internet and digital media. Platforms such as YouTube, science.tv, HuffPost, House of Highlights, Historypin, Web 2.0-style websites, Smart TV devices, and Essence offer a wide variety of user-generated and corporate media videos, content posted directly on the site as well as user-generated content via video blogging, audio, and photo, and interactive features such as clickable links, the option to comment on co

In [73]:
both = oneside + " " + otherside
thisprompt = f"The following 12 texts were used to generate a paragraph:\n{combined_texts}\nThe following paragraph was generated by analyzing 12 texts:\n{both}\nThe following relates each sentence in the paragraphs to the specific 12 texts to which it was derived:\n"
print(thisprompt, "\n")
print(inference(thisprompt))

The following 12 texts were used to generate a paragraph:
['Cluster 1 representative documents:\nDocument 1: \n\nIn Minecraft, players explore a blocky, procedurally generated, three-dimensional world with virtually infinite terrain and may discover and extract raw materials, craft tools and items, and build structures, earthworks, and machines. Depending on their chosen game mode, players can fight hostile mobs, as well as cooperate with or compete against other players in the same world. Game modes include a survival mode (in which players must acquire resources to build in the world and maintain health) and a creative mode (in which players have unlimited resources and access to flight). The game\'s large community also offers a wide variety of user-generated content, such as modifications, servers, skins, texture packs, and custom maps, which add new game mechanics and possibilities.\n\nDocument 2: \n\nGames can be extended with new content and software patches through either expan

Sentence 1: Games such as Minecraft, LittleBigPlanet, and Dreams offer players the ability to create and share content with others, as well as development tools for players to create in-game elements such as games and maps. - Texts 1, 6, 5
Sentence 2: Platforms such as YouTube, science.tv, HuffPost, House of Highlights, Historypin, Web 2.0-style websites, Smart TV devices, and Essence offer a wide variety of user-generated and corporate media videos, content posted directly on the site as well as user-generated content via video blogging, audio, and photo, and interactive features such as clickable links, the option to comment on content, the option to tag images, videos or links, the option of user participation to evaluate or review the pages, or even the option to create new user-generated content. - Texts 7, 8, 9, 10, 11, 12
