In [4]:
import requests
from bs4 import BeautifulSoup
import openai
import IPython.core.getipython
import time
import os
import concurrent.futures
from tqdm import tqdm
from scipy.spatial import distance
from sklearn.cluster import KMeans
import numpy as np

# Set up your OpenAI API key
openai.api_key = "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"
def get_inlink_texts(target_page):
    inlinks = get_wikipedia_inlinks(target_page)
    texts = []
    found_inlinks = []
    for inlink in tqdm(inlinks, total=len(inlinks), desc="Text Extraction", unit="page"):
        text = get_paragraph_with_link(inlink, target_page)
        if text:
            texts.append(text)
            found_inlinks.append(inlink)
    return texts, found_inlinks


def get_wikipedia_inlinks(title, lang='en', limit=5000):
    base_url = f'https://{lang}.wikipedia.org/w/api.php'
    params = {
        'action': 'query',
        'format': 'json',
        'list': 'backlinks',
        'bltitle': title,
        'bllimit': limit,
        'blnamespace': 0,  # Only retrieve links from main namespace
        'continue': ''  # Placeholder for pagination
    }
    inlinks = []
    while True:
        response = requests.get(base_url, params=params)
        data = response.json()
        if 'error' in data:
            print(f"Error: {data['error']['info']}")
            break
        inlink_pages = data['query']['backlinks']
        for page in inlink_pages:
            inlinks.append(page['title'])
        if 'continue' not in data:
            break
        params['continue'] = data['continue']['continue']
        params['blcontinue'] = data['continue']['blcontinue']
    return inlinks
def get_wikipedia_page_content(title, lang='en'):
    base_url = f'https://{lang}.wikipedia.org/w/api.php'
    params = {
        'action': 'parse',
        'page': title,
        'format': 'json',
        'prop': 'text',
        'contentmodel': 'wikitext'
    }
    response = requests.get(base_url, params=params)
    data = response.json()
    if 'error' in data:
        print(f"Error: {data['error']['info']}")
        return None
    html_content = data['parse']['text']['*']
    return html_content
def get_paragraph_with_link(page_title, link_title, lang='en'):
    content = get_wikipedia_page_content(page_title, lang)
    soup = BeautifulSoup(content, 'html.parser')
    
    #elements = soup.find_all(['p', 'li', 'dl'])
    elements = soup.find_all('p')
    
    #element_names = {'p': 'Paragraph', 'li': 'List item', 'dl': 'Description list'}
    
    link_href = "/wiki/" + link_title.replace(' ', '_')
    
    for element in elements:
        a_tags = element.find_all('a', href=True)
        if any(a['href'] == link_href for a in a_tags):
            context_info = "Unknown"
            preceding_header = element.find_previous(['h1', 'h2', 'h3', 'h4', 'h5', 'h6'])
            if preceding_header:
                context_info = preceding_header.text.replace('[edit]', '')  # Remove the '[edit]' portion
            # Handle paragraphs separately from list items and description lists
            if element.name == 'p':
                #return f"\nElement type: {element_names[element.name]}, \nContext: {context_info}, \nText: \n{element.text.strip()}"
                return f"\n{element.text.strip()}\n"
            else:
                fallback_context = None
                current_context = element
                while current_context is not None:
                    if current_context.name not in ['li', 'dl']:
                        fallback_context = current_context
                        break
                    current_context = current_context.find_parent()
                return f"\nElement type: {element_names[element.name]}, \nContext: {context_info}, \nText: \n{element.text.strip()}"
    return None

def get_analyses(target_page, found_inlinks, texts):
    analyses = []
    for found_inlink, text in tqdm(zip(found_inlinks, texts), total=len(found_inlinks), desc="Analyses", unit="page"):
        analysis = hyperlink_analysis(target_page, text, found_inlink)
        analyses.append(analysis)
    return analyses

def hyperlink_analysis(hyperlink, paragraph, page):
    prompt = f"In the context of '{paragraph}' on the Wikipedia page '{page}', the hyperlink '{hyperlink}' appears. The following factors come into consideration:\n\n1) Extent of '{hyperlink}' usage within this context.\n2) Boundaries and limitations regarding this usage.\n3) Any interplay with other concepts or events within this context.\n4) The relevance and necessity of '{hyperlink}' within this specific context.\n\n1) Extent of '{hyperlink}' usage within this context can be described as:\n\n"

    analysis = inference(prompt)

    return f"1) Extent of '{hyperlink}' usage within this context can be described as:\n\n" + analysis


def inference(prompt, retries=5, backoff_factor=0.1):
    for i in range(retries):
        try:
            # Your API call here
            response = openai.Completion.create(
                engine="text-babbage-001",
                prompt=prompt,
                max_tokens=500,
                n=1,
                stop = "",
                temperature=0.1,
            )
            return response.choices[0].text.strip()
        except (openai.error.RateLimitError, openai.error.APIError) as e:
            if i < retries - 1:  # if it's not the last retry attempt
                sleep_time = backoff_factor * (2 ** i)  # exponential backoff
                time.sleep(sleep_time)
            else:  # if it's the last retry attempt
                print(f"Failed to generate inference after {retries} attempts.\n{prompt}")
                raise

def embed(text, retries=5, backoff_factor=0.1):
    for i in range(retries):
        try:
            response = openai.Embedding.create(
                input=text,
                model="text-embedding-ada-002"
            )
            return response['data'][0]['embedding']
        except (openai.error.RateLimitError, openai.error.APIError) as e:
            if i < retries - 1:  # if it's not the last retry attempt
                sleep_time = backoff_factor * (2 ** i)  # exponential backoff
                time.sleep(sleep_time)
            else:  # if it's the last retry attempt
                print(f"Failed to generate embedding after {retries} attempts.")
                raise               


def get_sorted_cluster_partitions(embeddings, num_clusters):
    # Fit the KMeans model to your data
    kmeans = KMeans(n_clusters=num_clusters)
    kmeans.fit(embeddings)

    # A list to hold the data for each cluster
    cluster_partitions = []

    # After fitting the model...
    clusters = kmeans.labels_

    # For each unique label (which represents a cluster)...
    for cluster_num in np.unique(clusters):
        # Get the data points in this cluster
        cluster_data = embeddings[clusters == cluster_num]
        
        # Calculate the distances from the centroid for each data point
        centroid = kmeans.cluster_centers_[cluster_num]
        distances = distance.cdist([centroid], cluster_data, 'euclidean')[0]
        
        # Get the indices that would sort the distances
        sorted_indices = np.argsort(distances)
        
        # Sort the data points by their distances to the centroid
        sorted_cluster_data = cluster_data[sorted_indices]
        
        # Append the sorted data to our list
        cluster_partitions.append(sorted_cluster_data)
        
    return cluster_partitions
                

def core(num_clusters, num_documents):
    # Fit the KMeans model to your data
    kmeans = KMeans(n_clusters=num_clusters)
    kmeans.fit(embeddings)

    # Now `kmeans.cluster_centers_` contains the centroids of the clusters
    cluster_centroids = kmeans.cluster_centers_

    # And `kmeans.labels_` contains the cluster number for each document
    clusters = kmeans.labels_

    # Get the representative document (the one closest to the centroid) for each cluster
    representative_docs = []

    for i in range(num_clusters):
        # Find the points in this cluster
        idx = np.where(clusters == i)[0]
        cluster_points = np.array(embeddings)[idx]

        # Find the 9 documents closest to the centroid
        closest_indices = cdist([cluster_centroids[i]], cluster_points).argsort()[0][:num_documents]
        representatives = [found_inlinks[idx[closest_idx]] + "\n" + texts[idx[closest_idx]] for closest_idx in closest_indices]
        representative_docs.append(representatives)

    # Now `representative_docs` contains the most representative documents for each cluster
    cores = []
    combined_texts = ""
    for i, docs in enumerate(representative_docs, start=1):
        combined_text = f"Cluster {i} representative documents:\n"
        for j, doc in enumerate(docs, start=1):
            combined_text += f"Document {j}: {doc}\n"
        combined_texts += "\n" + combined_text
        #print(combined_texts)
        prompt = f"In clustering all the occurances of {target_page} in wikipedia, these {num_documents} paragraphs were extracted:\n{combined_texts}\nThe following represents a textual representation for the core of {target_page} which integrates all {num_documents} documents:\nAt the core of {target_page}"
        core = f"At the core of {target_page} {inference(prompt)}"
        cores.append(core)
        print(core + "\n")


def understand(target_page):
    inlinks = get_wikipedia_inlinks(target_page)
    found_inlinks = []

    print(f"Found {len(inlinks)} pages linking to {target_page}.")

    info = []

    def process_inlink(inlink):
        paragraph = get_paragraph_with_link(inlink, target_page)
        if paragraph:
            analysis = hyperlink_analysis(target_page, paragraph, inlink)
            return (inlink, paragraph, analysis)
        return None

    with concurrent.futures.ThreadPoolExecutor() as executor:
        results = list(tqdm(executor.map(process_inlink, inlinks), total=len(inlinks), desc="Analysis", unit="page"))

    paragraphs = []
    gpt_paragraphs = []
    for result in results:
        if result is not None:
            inlink, paragraph, analysis = result
            found_inlinks.append(inlink)
            paragraphs.append(f"\nPage: {inlink}\n{paragraph}")
            gpt_paragraphs.append(f"Analysis:\n {analysis}\n")
    texts = []
    embeddings = []
    for gptparagraph, paragraph in tqdm(zip(gpt_paragraphs, paragraphs), total=len(gpt_paragraphs), desc="Embedding", unit="page"):
        text = paragraph + "\n" + gptparagraph
        texts.append(text)
        embedding = embed(text)
        embeddings.append(embedding)
    return found_inlinks, texts, embeddings    

In [5]:
target_page = "Goalkeeper"
texts, found_inlinks = get_inlink_texts(target_page)
print(f"Paragraphs found: {len(found_inlinks)}")

analyses = get_analyses(target_page, found_inlinks, texts)

embeddings = []
for analysis in tqdm(analyses, total=len(analyses), desc="Text Extraction", unit="page"):
    embeddings.append(embed(analysis))

Text Extraction: 100%|███████████████████████████████████████████████████████████████| 36/36 [00:11<00:00,  3.11page/s]


Paragraphs found: 14


Analyses: 100%|██████████████████████████████████████████████████████████████████████| 14/14 [00:20<00:00,  1.49s/page]
Text Extraction: 100%|███████████████████████████████████████████████████████████████| 14/14 [00:04<00:00,  3.03page/s]


In [6]:
for found_inlink, text, analysis in zip(found_inlinks,texts, analyses):
    print(f"{found_inlink}\n{text}\n{analysis}\n\n")

Quidditch

Matches are played on a large oval pitch with three ring-shaped goals of different heights on each side, between two opposing teams of seven players each: three Chasers, two Beaters, the Keeper, and the Seeker. The Chasers and the Keeper respectively score with and defend the goals against the Quaffle; the two Beaters bat the Bludgers away from their teammates and towards their opponents; and the Seeker locates and catches the Golden Snitch, whose capture simultaneously wins the Seeker's team 150 points and ends the game. The team with the most points at the end wins.

1) Extent of 'Goalkeeper' usage within this context can be described as:

The ' Goalkeeper' is a player who is responsible for defending the team's goal and preventing any other team from scoring. They are usually a tall, strong player who is able to catch the ball and throw it to their teammate.

2) Within this context, the ' Goalkeeper' is limited in their usage. They are not used as much as they could be an

In [41]:
from sklearn.cluster import KMeans
from scipy.spatial.distance import cdist
import numpy as np
def core(num_clusters, num_documents):
    # Fit the KMeans model to your data
    kmeans = KMeans(n_clusters=num_clusters)
    kmeans.fit(embeddings)

    # Now `kmeans.cluster_centers_` contains the centroids of the clusters
    cluster_centroids = kmeans.cluster_centers_

    # And `kmeans.labels_` contains the cluster number for each document
    clusters = kmeans.labels_

    # Get the representative document (the one closest to the centroid) for each cluster
    representative_docs = []

    for i in range(num_clusters):
        # Find the points in this cluster
        idx = np.where(clusters == i)[0]
        cluster_points = np.array(embeddings)[idx]

        # Find the 9 documents closest to the centroid
        closest_indices = cdist([cluster_centroids[i]], cluster_points).argsort()[0][:num_documents]
        representatives = [found_inlinks[idx[closest_idx]] + "\n" + texts[idx[closest_idx]] for closest_idx in closest_indices]
        representative_docs.append(representatives)

    # Now `representative_docs` contains the most representative documents for each cluster
    cores = []
    combined_texts = ""
    for i, docs in enumerate(representative_docs, start=1):
        combined_text = f"Cluster {i} representative documents:\n"
        for j, doc in enumerate(docs, start=1):
            combined_text += f"Document {j}: {doc}\n"
        combined_texts += "\n" + combined_text
        #print(combined_texts)
        prompt = f"In clustering all the occurances of {target_page} in wikipedia, these {num_documents} paragraphs were extracted:\n{combined_texts}\nThe following represents a textual representation for the core of {target_page} which integrates all {num_documents} documents:\nAt the core of {target_page}"
        core = f"At the core of {target_page} {inference(prompt)}"
        cores.append(core)
        print(core + "\n")
    return cores    

cores = []
for core_count in range(1,6):
    print("\n")
    cores.append(core(core_count, 6))









At the core of Consumption (economics) is the production, distribution, and trade of goods and services, as well as the consumption of them. This involves the practices, discourses, and material expressions associated with the production, use, and management of scarce resources. Income is the consumption and saving opportunity gained by an entity within a specified timeframe, which is generally expressed in monetary terms. The world economy or global economy is the economy of all humans of the world, referring to the global economic system, which includes all economic activities which are conducted both within and between nations, including production, consumption, economic management, work in general, exchange of financial values and trade of goods and services. La Quotidienne (TV series) is a programme devoted to new forms of consumption, whether collaborative, supportive or economic. Government restrictions on the free market and their impact on production and consumption can make p



At the core of Consumption (economics) is the study of consumer behaviour, which investigates individual qualities such as demographics, personality lifestyles, and behavioural variables (such as usage rates, usage occasion, loyalty, brand advocacy, and willingness to provide referrals), in an attempt to understand people's wants and consumption patterns. It also investigates the influences on the consumer, from social groups such as family, friends, sports, and reference groups, to society in general (brand-influencers, opinion leaders). Consumption distribution or consumption inequality is an alternative to the income distribution or wealth distribution for judging economic inequality, comparing levels of consumption rather than income or wealth. Expenditure cascades is an economic term which describes changes in purchasing and consumption behaviour which ripple through the levels of income in response to changes in income inequality. Supply-side economics proposed that production or



At the core of Consumption (economics) is the consideration that the free choice of consumers should strongly orient the choice by manufacturers of what is produced and how, and therefore orient the economic organization of a society. This is reflected in the relationship between production and consumption, which is mirrored against the economic theory of supply and demand. Accordingly, when production decreases more than factor consumption, this results in reduced productivity, while a production increase over consumption is seen as increased productivity. A marketing channel consists of the people, organizations, and activities necessary to transfer the ownership of goods from the point of production to the point of consumption. It is the way products get to the end-user, the consumer; and is also known as a distribution channel. Domestic tariff area also plays a role in Consumption (economics) as duty shall be chargeable on goods as if these goods have been cleared for home consumpt



At the core of Consumption (economics) is the production, distribution, and consumption of goods and services. This involves the practices, discourses, and material expressions associated with the production, use, and management of scarce resources. An economic system is a mechanism (social institution) which deals with the production, distribution and consumption of goods and services in a particular society. The world economy or global economy is the economy of all humans of the world, referring to the global economic system, which includes all economic activities which are conducted both within and between nations, including production, consumption, economic management, work in general, exchange of financial values and trade of goods and services. Goods are items that are usually (but not always) tangible, such as pens, physical books, salt, apples, and hats. Services are activities provided by other people, such as architects, suppliers, contractors, technologists, teachers, doctor



At the core of Consumption (economics) is the consideration that the free choice of consumers should strongly orient the choice by manufacturers of what is produced and how, and therefore orient the economic organization of a society. This is reflected in the relationship between production and consumption, which is mirrored against the economic theory of supply and demand. When production decreases more than factor consumption, this results in reduced productivity, while a production increase over consumption is seen as increased productivity. Additionally, consumerism may refer to economic policies that emphasise consumption, and marketing channels are the people, organizations, and activities necessary to transfer the ownership of goods from the point of production to the point of consumption. Views expounded with regularity in this field include "fear culture" and the critique of media coverage in Africa and the developing world in the context of Western intervention, underdevelopm

In [47]:
def differentiate(cores):
    combine = ""
    for core in cores:
        combine += core + "\n"
    prompt = f"""
        The following {len(cores)} paragraphs are text extraction from clustering all occurences of {target_page} on Wikipedia:
        {combine}
        The paragraphs can be differentiated as {len(cores)} different ways {target_page} is understood:
        The first paragraph
    """
    return "The first paragraph " + inference(prompt)

differentials = []
for core in cores:
    differential = differentiate(core)
    print(differential + "\n")

The first paragraph 1. Consumption (economics) is the production, distribution, and trade of goods and services, as well as the consumption of them. This involves the practices, discourses, and material expressions associated with the production, use, and management of scarce resources. Income is the consumption and saving opportunity gained by an entity within a specified timeframe, which is generally expressed in monetary terms. 
    
        The second paragraph
    
        2. The world economy or global economy is the economy of all humans of the world, referring to the global economic system, which includes all economic activities which are conducted both within and between nations, including production, consumption, economic management, work in general, exchange of financial values and trade of goods and services. La Quotidienne (TV series) is a programme devoted to new forms of consumption, whether collaborative, supportive or economic. 
    
        The third paragraph
    
  

In [27]:
print(differential)
print()
print(differential1)

None

None


In [7]:
print(inference(f"The following paragraph was generated by analyzing 10 texts:\n{core}\nThe following 10 texts were used to generate the paragraph:\n{combined_texts}\nThe following relates each sentence in the paragraph to the specific 10 texts to which it was derived:\n"))

At the core of Sublime (philosophy) is the idea of the sublime and the beauty of the natural world. (Document 5) Edmund Burke in his 1757 A Philosophical Enquiry into the Origin of Our Ideas of the Sublime and Beautiful argued that the soft gentle curves appealed to the male sexual desire, while the sublime horrors appealed to our desires for self-preservation. (Document 2) Picturesque arose as a mediator between these opposed ideals of beauty and the sublime, showing the possibilities that existed between these two rationally idealised states. (Document 2) Julia Kristeva argued that the sublime is really our effort to cover over the breakdowns (and subsequent reassertion of boundaries) associated with the abject; and literature is the privileged space for both the sublime and abject. (Document 3) Costa defined a more general, comprehensive, aesthetic and philosophical, theory of new media, which he named the technological sublime. (Document 6) Lloyd's paintings are influenced by cinem

In [9]:
num_clusters = 2
num_documents = 6
# Fit the KMeans model to your data
kmeans = KMeans(n_clusters=num_clusters)
kmeans.fit(embeddings)

# Now `kmeans.cluster_centers_` contains the centroids of the clusters
cluster_centroids = kmeans.cluster_centers_

# And `kmeans.labels_` contains the cluster number for each document
clusters = kmeans.labels_

# Get the representative document (the one closest to the centroid) for each cluster
representative_docs = []

for i in range(num_clusters):
    # Find the points in this cluster
    idx = np.where(clusters == i)[0]
    cluster_points = np.array(embeddings)[idx]
    
    # Find the 9 documents closest to the centroid
    closest_indices = cdist([cluster_centroids[i]], cluster_points).argsort()[0][:num_documents]
    representatives = [texts[idx[closest_idx]] for closest_idx in closest_indices]
    representative_docs.append(representatives)

# Now `representative_docs` contains the most representative documents for each cluster

combined_texts = []
doc_number = 1
for i, docs in enumerate(representative_docs, start=1):
    combined_text = f"Cluster {i} representative documents:\n"
    for j, doc in enumerate(docs, start=1):
        combined_text += f"Document {doc_number}: \n{doc}\n"
        doc_number += 1
    combined_texts.append(combined_text)
    
text = combined_texts[0] + "\n" + combined_texts[1]
prompt1 = f"These {num_clusters * num_documents} paragraphs were extracted from clustering all occurences of {target_page} on wikipedia. These {num_clusters * num_documents} paragraphs represent two sides of {target_page}:\n{text}\n\n{core}\n\nCluster 1:\nFocuses on {target_page}"
print(prompt1)
dual = f"Cluster 1:\nFocuses on {target_page} in the lens of {inference(prompt1)}"
print(dual)




These 12 paragraphs were extracted from clustering all occurences of Sublime (philosophy) on wikipedia. These 12 paragraphs represent two sides of Sublime (philosophy):
Cluster 1 representative documents:
Document 1: 

Sherman's main body of work consists of landscapes painted with oil on canvas. Their subject matter, more specifically described as ice glaciers, ominous islands, rocky terrain and foliage, is in line with philosophical discourse on the sublime. Sherman is influenced by the writings of Edmund Burke, Immanuel Kant, and Jean-François Lyotard who discussed the sublime and the beauty of the natural world.[3]

Document 2: 

Smibert's art reflects three diverse streams of thought: Japanese minimalism, the early 19th century English School of Painting and Abstract Expressionism.[citation needed] He has been recognised as one of Australia's leading watercolourists.[4] His 1993 collaboration with Japanese couturier Yasuhiro Chiji led to a signature range of high fashion, yūzen ki

Cluster 1:
Focuses on Sublime (philosophy) in the lens of as a concept in art, literature, and aesthetics. It discusses the influence of Edmund Burke, Immanuel Kant, and Jean-François Lyotard on the concept of the sublime, as well as the influence of the Romantic conception of the sublime on art and literature. It also mentions the use of local iron ore as pigment in Australian landscapes, and the influence of aikido on art.

Cluster 2:
Focuses on Sublime (philosophy) as a concept in philosophy and religion. It discusses the concept of the sublime in relation to physical objects and counter-experience, the Gothic Revival architecture and its association with the sublime, the influence of Thomas Gray, William Gilpin, and Edmund Burke on the concept of the sublime, and the influence of the Romantic conception of the sublime on art and literature. It also mentions the influence of Shaftesbury's philosophical work on the concept of the sublime.


In [11]:
combined_text = combined_texts[0] + "\n" + combined_texts[1]
oneside = f"On one side {target_page} " + inference(f"{core}\nThe following is obtained by clustering all occurences of {target_page} on wikipedia\n{combined_text}\n{dual}\nOn one side of {target_page}")
otherside = f"On the other side {target_page} " + inference(f"{core}\n{combined_text}\n{dual}\n{oneside}\nOn the other side of {target_page}")
print(oneside+"\n\n"+otherside)

On one side Sublime (philosophy) is the idea of the sublime and the beauty of the natural world. Edmund Burke in his 1757 A Philosophical Enquiry into the Origin of Our Ideas of the Sublime and Beautiful argued that the soft gentle curves appealed to the male sexual desire, while the sublime horrors appealed to our desires for self-preservation. Picturesque arose as a mediator between these opposed ideals of beauty and the sublime, showing the possibilities that existed between these two rationally idealised states. Julia Kristeva argued that the sublime is really our effort to cover over the breakdowns (and subsequent reassertion of boundaries) associated with the abject; and literature is the privileged space for both the sublime and abject. Costa defined a more general, comprehensive, aesthetic and philosophical, theory of new media, which he named the technological sublime. Lloyd's paintings are influenced by cinema, in particular film noir and science fiction, and the Romantic con

In [90]:
print(oneside+"\n\n"+otherside)

On one side User-generated content is the world of video games. Games such as Minecraft, LittleBigPlanet, and Dreams offer players the ability to create user-generated content to share with others to play. These games also feature development tools for players to create in-game elements such as games and maps. The game's large community also offers a wide variety of user-generated content, such as modifications, servers, skins, texture packs, and custom maps, which add new game mechanics and possibilities.

On the other side User-generated content is the broader context of the internet and digital media. Platforms such as YouTube, science.tv, HuffPost, House of Highlights, Historypin, Web 2.0-style websites, Smart TV devices, and Essence offer a wide variety of user-generated and corporate media videos, content posted directly on the site as well as user-generated content via video blogging, audio, and photo, and interactive features such as clickable links, the option to comment on co

In [73]:
both = oneside + " " + otherside
thisprompt = f"The following 12 texts were used to generate a paragraph:\n{combined_texts}\nThe following paragraph was generated by analyzing 12 texts:\n{both}\nThe following relates each sentence in the paragraphs to the specific 12 texts to which it was derived:\n"
print(thisprompt, "\n")
print(inference(thisprompt))

The following 12 texts were used to generate a paragraph:
['Cluster 1 representative documents:\nDocument 1: \n\nIn Minecraft, players explore a blocky, procedurally generated, three-dimensional world with virtually infinite terrain and may discover and extract raw materials, craft tools and items, and build structures, earthworks, and machines. Depending on their chosen game mode, players can fight hostile mobs, as well as cooperate with or compete against other players in the same world. Game modes include a survival mode (in which players must acquire resources to build in the world and maintain health) and a creative mode (in which players have unlimited resources and access to flight). The game\'s large community also offers a wide variety of user-generated content, such as modifications, servers, skins, texture packs, and custom maps, which add new game mechanics and possibilities.\n\nDocument 2: \n\nGames can be extended with new content and software patches through either expan

Sentence 1: Games such as Minecraft, LittleBigPlanet, and Dreams offer players the ability to create and share content with others, as well as development tools for players to create in-game elements such as games and maps. - Texts 1, 6, 5
Sentence 2: Platforms such as YouTube, science.tv, HuffPost, House of Highlights, Historypin, Web 2.0-style websites, Smart TV devices, and Essence offer a wide variety of user-generated and corporate media videos, content posted directly on the site as well as user-generated content via video blogging, audio, and photo, and interactive features such as clickable links, the option to comment on content, the option to tag images, videos or links, the option of user participation to evaluate or review the pages, or even the option to create new user-generated content. - Texts 7, 8, 9, 10, 11, 12
