In [2]:
import requests
import json
import asyncio
from openai import OpenAI
import pprint
import difflib
from IPython.display import display, HTML
import mwclient  # for downloading example Wikipedia articles
import mwparserfromhell  # for splitting Wikipedia articles into sections
import pandas as pd  # for DataFrames to store article sections and embeddings
import re  # for cutting <ref> links out of Wikipedia articles
import tiktoken  # for counting tokens
import time
from openai import RateLimitError
from scipy import spatial
import ast  # for converting embeddings saved as strings back to arrays

api_key = "..."  # Replace with your actual environment variable name


GPT_MODEL = "gpt-3.5-turbo-1106"


In [3]:
'''
As I've been trying to teach myself more about computer science, I thought it would be interesting to train GPT to have more specialized knowledge
on Computer Science using a 'Search-Ask' methodology, specifically, an embedding-based search. To accomplish this, I set out to Collect, Chunk, Embed
and Store embeddings (using online free content from Wikipedia) to feed the database GPT will draw from to help it answer questions (i.e., Retrieval 
Augmented Generation, or RAG).
'''

#############
#  COLLECT  #
#############

CATEGORY_TITLE = "Category:Computer science"
WIKI_SITE = "en.wikipedia.org"


def titles_from_category(
    category: mwclient.listing.Category, max_depth: int
) -> set[str]:
    """Return a set of page titles in a given Wiki category and its subcategories."""
    titles = set()
    for cm in category.members():
        if type(cm) == mwclient.page.Page:
            # ^type() used instead of isinstance() to catch match w/ no inheritance
            titles.add(cm.name)
        elif isinstance(cm, mwclient.listing.Category) and max_depth > 0:
            deeper_titles = titles_from_category(cm, max_depth=max_depth - 1)
            titles.update(deeper_titles)
    return titles


site = mwclient.Site(WIKI_SITE)
category_page = site.pages[CATEGORY_TITLE]
titles = titles_from_category(category_page, max_depth=1)
# ^note: max_depth=1 means we go one level deep in the category tree
print(f"Found {len(titles)} article titles in {CATEGORY_TITLE}.")


Found 1741 article titles in Category:Computer science.


In [4]:
# define functions to split Wikipedia pages into sections

SECTIONS_TO_IGNORE = [
    "See also",
    "References",
    "External links",
    "Further reading",
    "Footnotes",
    "Bibliography",
    "Sources",
    "Citations",
    "Literature",
    "Footnotes",
    "Notes and references",
    "Photo gallery",
    "Works cited",
    "Photos",
    "Gallery",
    "Notes",
    "References and sources",
    "References and notes",
]


def all_subsections_from_section(
    section: mwparserfromhell.wikicode.Wikicode,
    parent_titles: list[str],
    sections_to_ignore: set[str],
) -> list[tuple[list[str], str]]:
    """
    From a Wikipedia section, return a flattened list of all nested subsections.
    Each subsection is a tuple, where:
        - the first element is a list of parent subtitles, starting with the page title
        - the second element is the text of the subsection (but not any children)
    """
    headings = [str(h) for h in section.filter_headings()]
    title = headings[0]
    if title.strip("=" + " ") in sections_to_ignore:
        # ^wiki headings are wrapped like "== Heading =="
        return []
    titles = parent_titles + [title]
    full_text = str(section)
    section_text = full_text.split(title)[1]
    
    ''' 
    This is the base case of our recursive function.
    '''
    if len(headings) == 1:
        return [(titles, section_text)]
    else:
        first_subtitle = headings[1]
        section_text = section_text.split(first_subtitle)[0]
        results = [(titles, section_text)]
        for subsection in section.get_sections(levels=[len(titles) + 1]):
            results.extend(all_subsections_from_section(subsection, titles, sections_to_ignore))
        return results

    '''    
    And this is the recursive case of our function because its calling itself 'all_subsections_from_section' to complete its task. Notice, we use 
    results.extend() in this case vs. results.append(). This is because we want to unpack and add the iterable to our running list, rather than simply
    add the entire iterable. E.g., if we want to add [(3,c), (4,d)] to our initial list of two tuples of [(1,a), (2,b)] -> .append() would create 
    [(1,a), (2,b), [(3,c), (4,d)]], which is technically now three elements: two tuples, and one list containing two tuples. .expand() would instead
    create one list of four tuples[(1,a), (2,b), (3,c), (4,d)]. 
    '''


def all_subsections_from_title(
    title: str,
    sections_to_ignore: set[str] = SECTIONS_TO_IGNORE,
    site_name: str = WIKI_SITE,
) -> list[tuple[list[str], str]]:
    """From a Wikipedia page title, return a flattened list of all nested subsections.
    Each subsection is a tuple, where:
        - the first element is a list of parent subtitles, starting with the page title
        - the second element is the text of the subsection (but not any children)
    """
    site = mwclient.Site(site_name)
    page = site.pages[title]
    text = page.text()
    parsed_text = mwparserfromhell.parse(text)
    headings = [str(h) for h in parsed_text.filter_headings()]
    if headings:
        summary_text = str(parsed_text).split(headings[0])[0]
    else:
        summary_text = str(parsed_text)
    results = [([title], summary_text)]
    for subsection in parsed_text.get_sections(levels=[2]):
        results.extend(all_subsections_from_section(subsection, [title], sections_to_ignore))
    return results


In [8]:
# split pages into sections
# may take ~1 minute per 100 articles
wikipedia_sections = []
for title in titles:
    wikipedia_sections.extend(all_subsections_from_title(title))
print(f"Found {len(wikipedia_sections)} sections in {len(titles)} pages.")


Found 8708 sections in 1743 pages.


In [14]:
# clean text
def clean_section(section: tuple[list[str], str]) -> tuple[list[str], str]:
    """
    Return a cleaned up section with:
        - <ref>xyz</ref> patterns removed
        - leading/trailing whitespace removed
    """
    titles, text = section
    text = re.sub(r"<ref.*?</ref>", "", text)
    text = text.strip()
    return (titles, text)


wikipedia_sections = [clean_section(ws) for ws in wikipedia_sections]

# filter out short/blank sections
def keep_section(section: tuple[list[str], str]) -> bool:
    """Return True if the section should be kept, False otherwise."""
    titles, text = section
    if len(text) < 16:
        return False
    else:
        return True


original_num_sections = len(wikipedia_sections)
wikipedia_sections = [ws for ws in wikipedia_sections if keep_section(ws)]
print(f"Filtered out {original_num_sections-len(wikipedia_sections)} sections, leaving {len(wikipedia_sections)} sections.")


Filtered out 355 sections, leaving 8353 sections.


In [15]:
for ws in wikipedia_sections[:5]:
    print(ws[0])
    display(ws[1][:77] + "...")
    print()

['Eyeball network']


'{{Multiple issues|{{more citations needed|date=February 2017}}{{notability|1=...'


['Digital Humanities conference']


'{{Infobox Academic Conference\n | logo =\n | history = 1989-\n | discipline = [[...'


['Digital Humanities conference', '==History==']


'The first joint conference was held in 1989, at the [[University of Toronto]]...'


['Digital Humanities conference', '== Conferences ==']


'{| class="wikitable"\n|-\n!Year\n!Location\n!Links\n!Observations\n|-\n|1990\n|[[Univ...'


['Structural information theory']


"{{COI|date=December 2015}}\n'''Structural information theory''' ('''SIT''') is..."




In [27]:
GPT_MODEL = "gpt-3.5-turbo"  # only matters insofar as it selects which tokenizer to use


def num_tokens(text: str, model: str = GPT_MODEL) -> int:
    """Return the number of tokens in a string."""
    encoding = tiktoken.encoding_for_model(model)
    return len(encoding.encode(text))


def halved_by_delimiter(string: str, delimiter: str = "\n") -> list[str, str]:
    """Split a string in two, on a delimiter, trying to balance tokens on each side."""
    chunks = string.split(delimiter)
    if len(chunks) == 1:
        return [string, ""]  # no delimiter found
    elif len(chunks) == 2:
        return chunks  # no need to search for halfway point
    else:
        total_tokens = num_tokens(string)
        halfway = total_tokens // 2
        best_diff = halfway
        for i, chunk in enumerate(chunks):
            left = delimiter.join(chunks[: i + 1])
            left_tokens = num_tokens(left)
            diff = abs(halfway - left_tokens)
            if diff >= best_diff:
                break
            else:
                best_diff = diff
        left = delimiter.join(chunks[:i])
        right = delimiter.join(chunks[i:])
        return [left, right]


def truncated_string(
    string: str,
    model: str,
    max_tokens: int,
    print_warning: bool = True,
) -> str:
    """Truncate a string to a maximum number of tokens."""
    encoding = tiktoken.encoding_for_model(model)
    encoded_string = encoding.encode(string)
    truncated_string = encoding.decode(encoded_string[:max_tokens])
    if print_warning and len(encoded_string) > max_tokens:
        print(f"Warning: Truncated string from {len(encoded_string)} tokens to {max_tokens} tokens.")
    return truncated_string


def split_strings_from_subsection(
    subsection: tuple[list[str], str],
    max_tokens: int = 1000,
    model: str = GPT_MODEL,
    max_recursion: int = 5,
) -> list[str]:
    """
    Split a subsection into a list of subsections, each with no more than max_tokens.
    Each subsection is a tuple of parent titles [H1, H2, ...] and text (str).
    """
    titles, text = subsection
    string = "\n\n".join(titles + [text])
    num_tokens_in_string = num_tokens(string)
    # if length is fine, return string
    if num_tokens_in_string <= max_tokens:
        return [string]
    # if recursion hasn't found a split after X iterations, just truncate
    elif max_recursion == 0:
        return [truncated_string(string, model=model, max_tokens=max_tokens)]
    # otherwise, split in half and recurse
    else:
        titles, text = subsection
        for delimiter in ["\n\n", "\n", ". "]:
            left, right = halved_by_delimiter(text, delimiter=delimiter)
            if left == "" or right == "":
                # if either half is empty, retry with a more fine-grained delimiter
                continue
            else:
                # recurse on each half
                results = []
                for half in [left, right]:
                    half_subsection = (titles, half)
                    half_strings = split_strings_from_subsection(
                        half_subsection,
                        max_tokens=max_tokens,
                        model=model,
                        max_recursion=max_recursion - 1,
                    )
                    results.extend(half_strings)
                return results
    # otherwise no split was found, so just truncate (should be very rare)
    return [truncated_string(string, model=model, max_tokens=max_tokens)]


In [28]:
###########
#  CHUNK  #
###########

MAX_TOKENS = 1600
wikipedia_strings = []
for section in wikipedia_sections:
    wikipedia_strings.extend(split_strings_from_subsection(section, max_tokens=MAX_TOKENS))

print(f"{len(wikipedia_sections)} Wikipedia sections split into {len(wikipedia_strings)} strings.")


8353 Wikipedia sections split into 8538 strings.


In [21]:
# print example data
print(wikipedia_strings[0])


Eyeball network

{{Multiple issues|{{more citations needed|date=February 2017}}{{notability|1=Neologisms|date=February 2017}}
}}

'''Eyeball network''' is a [[slang]] term used by [[Computer network|network engineers]] and architects that refers to an access network whose primary users use the [[Computer network|network]] to “look at things” (browse the Internet, read email, etc.) and consume content, as opposed to a network that may be used primarily to generate its own data, or “content networks/providers”.

The term “eyeball network” is often overheard in conversations and seen in articles that discuss peering relationships between other networks, as well as [[net neutrality]] issues.

An example of an eyeball network would be any given [[Internet service provider|ISP]] that provides internet connectivity to end-users – The ISP may [[Peering|peer]] with [[Google]] (which is a content provider) where the end users consume content serviced/provided by Google, in this case the ISP is j

In [40]:
###########
#  EMBED  #
###########


# calculate embeddings
EMBEDDING_MODEL = "text-embedding-ada-002"
BATCH_SIZE = 1000
MAX_BATCH_START = 4999 * BATCH_SIZE  # Stopping condition
client = OpenAI(api_key=api_key)

embeddings = []
for batch_start in range(0, len(wikipedia_strings), BATCH_SIZE):
    if batch_start >= MAX_BATCH_START:
        print(f"Reached batch {batch_start}. Stopping.")
        break  # Break out of the loop when reaching the threshold

    batch_end = batch_start + BATCH_SIZE
    batch = wikipedia_strings[batch_start:batch_end]
    print(f"Batch {batch_start} to {batch_end-1}")
    
    try:
        response = client.embeddings.create(model=EMBEDDING_MODEL, input=batch)
        if response and hasattr(response, 'data'):
            batch_embeddings = [item.embedding for item in response.data]
            embeddings.extend(batch_embeddings)
    except RateLimitError as e:
        print("Rate limit reached. Stopping.")
        break  # Stop the script if a rate limit error occurs

df = pd.DataFrame({"text": wikipedia_strings[:len(embeddings)], "embedding": embeddings})


Batch 0 to 999
Batch 1000 to 1999
Batch 2000 to 2999
Batch 3000 to 3999
Batch 4000 to 4999
Rate limit reached. Stopping.


In [41]:
df.head(10)

Unnamed: 0,text,embedding
0,Eyeball network\n\n{{Multiple issues|{{more ci...,"[-0.00909820944070816, 0.005384792108088732, 0..."
1,Digital Humanities conference\n\n{{Infobox Aca...,"[-0.023694409057497978, -0.004818083252757788,..."
2,Digital Humanities conference\n\n==History==\n...,"[-0.02261469140648842, -0.009407395496964455, ..."
3,Digital Humanities conference\n\n== Conference...,"[-0.01651986874639988, -0.00649561220780015, 0..."
4,Digital Humanities conference\n\n== Conference...,"[-0.014532341621816158, 0.0007241961429826915,..."
5,Structural information theory\n\n{{COI|date=De...,"[-0.016226185485720634, 0.025272049009799957, ..."
6,Structural information theory\n\n== The simpli...,"[-0.016453608870506287, 0.04184497892856598, 0..."
7,Structural information theory\n\n== Simplicity...,"[0.004965457133948803, 0.01153535209596157, 0...."
8,Structural information theory\n\n== Modeling p...,"[-0.01424167025834322, 0.01915259100496769, 0...."
9,Structural information theory\n\n== Visual reg...,"[-0.009177683852612972, 0.013527488335967064, ..."


In [54]:
'''
This line of code will always produce 1536 regardless of the length of text, and its how GPT assesses and assigns semantic meaning to a piece of text. 
The 1536 are the different dimensions GPT uses to assess style, tone, meaning -> semantic purpose. So in effect, what we've done is broken down many,
many Wikipedia pages related to Comp Sci and can now assign this text to a chat completion model to see how reliably it uses the information to 
answer queries.
'''

print(len(df.loc[3, 'embedding']))

1536


In [42]:
###########
#  STORE  #
###########


# save document chunks and embeddings

SAVE_PATH = "C:/Users/jmfra/Desktop/Coding/GPT/comp_sci_embeds.csv"

df.to_csv(SAVE_PATH, index=False)

In [60]:
'''
This is now the portion that determines the distance between the query and the embedded text dimensions to approxmiate the 'relevance' of which text 
the LLM should draw from. Here, I used Kranzberg's laws of technology, which, interestingly enough, were not contained within the Wikipedia articles
I scraped (perhaps because I stopped the code at 5000 chunks since I kept getting a rate limit error) so in this example, we can see which 5 articles 
the model believes to be the most relevant to Kranzberg's laws of technology; either way, its interesting to see how GPT handles such a query 
when it has no 'knowledge' of the event we're asking it to pull. 
'''


# search function
def strings_ranked_by_relatedness(
    query: str,
    df: pd.DataFrame,
    relatedness_fn=lambda x, y: 1 - spatial.distance.cosine(x, y),
    top_n: int = 100
) -> tuple[list[str], list[float]]:
    """Returns a list of strings and relatednesses, sorted from most related to least."""
    query_embedding_response = client.embeddings.create(
        model=EMBEDDING_MODEL,
        input=query,
    )
    query_embedding = query_embedding_response.data[0].embedding
    strings_and_relatednesses = [
        (row["text"], relatedness_fn(query_embedding, row["embedding"]))
        for i, row in df.iterrows()
    ]
    strings_and_relatednesses.sort(key=lambda x: x[1], reverse=True)
    strings, relatednesses = zip(*strings_and_relatednesses)
    return strings[:top_n], relatednesses[:top_n]


In [64]:
# examples
strings, relatednesses = strings_ranked_by_relatedness("Kranzberg's laws of technology", df, top_n=5)
for string, relatedness in zip(strings, relatednesses):
    print(f"{relatedness=:.3f}")
    display(string)

relatedness=0.793


'Karl Küpfmüller\n\n==Studies in communication theory==\n\nAbout 1928, he did the same analysis that [[Harry Nyquist]] did, to show that not more than 2B independent pulses per second could be put through a channel of bandwidth B.  He did this by quantifying the time-bandwidth product \'\'k\'\' of various communication signal types, and showing that \'\'k\'\' could never be less than 1/2.  From his 1931 paper (rough translation from Swedish):<ref>Karl Küpfmüller, "Utjämningsförlopp inom Telegraf- och Telefontekniken", ("Transients in telegraph and telephone engineering"), \'\'[[Teknisk Tidskrift]]\'\', no. 9 pp.153-160 and 10 pp.178-182, 1931. (Swedish) [http://runeberg.org/tektid/1931e/0157.html] [http://runeberg.org/tektid/1931e/0182.html]\n</ref>\n\n:"The time law allows comparison of the capacity of each transfer method with various known methods. On the other hand it indicates the limits that the development of technology must stay within. One interesting question for example is w

relatedness=0.793


'Bandwidth (computing)\n\n==Edholm\'s law==\n\n{{Main|Edholm\'s law}}\n\n[[Edholm\'s law]], proposed by and named after Phil Edholm in 2004, holds that the bandwidth of [[telecommunication network]]s double every 18 months, which has proven to be true since the 1970s. The trend is evident in the cases of [[Internet]],<ref name="Cherry"/> [[cellular network|cellular]] (mobile), [[Wireless LAN|wireless]] [[Local area network|LAN]] and [[Personal area network|wireless personal area networks]].<ref name=":1" />\n\nThe [[MOSFET]] (metal–oxide–semiconductor field-effect transistor) is the most important factor enabling the rapid increase in bandwidth. The MOSFET (MOS transistor) was invented by [[Mohamed M. Atalla]] and [[Dawon Kahng]] at [[Bell Labs]] in 1959, and went on to become the basic building block of modern [[telecommunications]] technology. Continuous [[MOSFET scaling]], along with various advances in MOS technology, has enabled both [[Moore\'s law]] ([[transistor count]]s in [[in

relatedness=0.791


'Tyranny of numbers\n\n==History==\n\n[[File:Computer Museum of America (32).jpg|thumb|right|The [[Cray-1]] contained 50 miles of wiring.]]\nThe first known recorded use of the term in this context was made by the Vice President of [[Bell Labs]] in an article celebrating the 10th anniversary of the invention of the [[transistor]], for the "Proceedings of the IRE" (Institute of Radio Engineers), June 1958 [https://books.google.com/books?id=3BLGWnmQJ9IC&pg=PA21]. Referring to the problems many designers were having, he wrote:\n\n{{Quotation|For some time now, electronic man has known how \'in principle\' to extend greatly his visual, tactile, and mental abilities through the digital transmission and processing of all kinds of information. However, all these functions suffer from what has been called \'the tyranny of numbers.\' Such systems, because of their complex digital nature, require hundreds, thousands, and sometimes tens of thousands of electron devices.|Jack Morton|[http://everyt

relatedness=0.790


'Large-scale Complex IT Systems\n\n== Key publications ==\n\n=== Socio-technical systems engineering ===\n\n*{{cite journal | last1 = Baxter | first1 = G. | last2 = Sommerville | first2 = I. | year = 2010 | title = Socio-Technical Systems: From Design Methods to Systems Engineering | doi = 10.1016/j.intcom.2010.07.003 | journal = Interacting with Computers | volume =  23| pages =  4–17| doi-access = free }}\n*I. Sommerville (editor). [http://archive.cs.st-andrews.ac.uk/STSE-Handbook/ The Socio-technical Systems Engineering Handbook.] (2011). University of St Andrews.'

relatedness=0.789


"Large-scale Complex IT Systems\n\n== Key publications ==\n\n=== Predictable software systems ===\n\n*R. Calinescu, L. Grunske, M. Kwiatkowska, R. Mirandola, G. Tamburrelli (2011). [http://www.computer.org/portal/web/csdl/doi/10.1109/TSE.2010.92 Dynamic QoS Management and Optimisation in Service-Based Systems.] In: IEEE Transactions on Software Engineering.\n*L. Feng, M. Kwiatkowska and D. Parker. (2011) [http://qav.cs.ox.ac.uk/papers/fase11.pdf Automated Learning of Probabilistic Assumptions for Compositional Reasoning.] Proc. 14th International Conference on Fundamental Approaches to Software Engineering (FASE'11), volume 6603 of LNCS, pages 2–17, Springer.\n*M. Kwiatkowska. (2007) [http://qav.cs.ox.ac.uk/papers/esec-fse07.pdf Quantitative Verification: Models, Techniques and Tools.] Proc. 6th joint meeting of the European Software Engineering Conference and the ACM SIGSOFT Symposium on the Foundations of Software Engineering (ESEC/FSE), pages 449-458, ACM Press.\n*M. Kwiatkowska, G.

In [76]:
'''
In this final bit, we 1) create a token budget for the model to process, 2) use that token budget to ensure we're only modestly pulling articles and 
producing text; 3) asking ChatGPT to take our embedded text and formulate a response to whichever query we pose it. 
'''


def num_tokens(text: str, model: str = GPT_MODEL) -> int:
    """Return the number of tokens in a string."""
    encoding = tiktoken.encoding_for_model(model)
    return len(encoding.encode(text))


def query_message(
    query: str,
    df: pd.DataFrame,
    model: str,
    token_budget: int
) -> str:
    """Return a message for GPT, with relevant source texts pulled from a dataframe."""
    strings, relatednesses = strings_ranked_by_relatedness(query, df)
    introduction = 'Use the below articles on Computer Science to answer the subsequent question. If the answer cannot be found in the articles, write "I could not find an answer."'
    question = f"\n\nQuestion: {query}"
    message = introduction
    for string in strings:
        next_article = f'\n\nWikipedia article section:\n"""\n{string}\n"""'
        if (
            num_tokens(message + next_article + question, model=model)
            > token_budget
        ):
            break
        else:
            message += next_article
    return message + question


def ask(
    query: str,
    df: pd.DataFrame = df,
    model: str = GPT_MODEL,
    token_budget: int = 4096 - 500,
    print_message: bool = False,
) -> str:
    """Answers a query using GPT and a dataframe of relevant texts and embeddings."""
    message = query_message(query, df, model=model, token_budget=token_budget)
    if print_message:
        print(message)
    messages = [
        {"role": "system", "content": "You answer questions about Computer Science."},
        {"role": "user", "content": message},
    ]
    response = client.chat.completions.create(
        model=model,
        messages=messages,
        temperature=0.7
    )
    response_message = response.choices[0].message.content
    return response_message



In [72]:
ask('What is computer science?')

'Computer science is the study of the theoretical foundations of information and computation and their implementation and application in computer systems. It encompasses a wide range of topics, from theoretical studies of algorithms and the limits of computation to the practical issues of implementing computing systems in hardware and software. The field includes areas such as theoretical computer science, applied computer science, software engineering, and the study of fundamental computer algorithms.'

In [73]:
ask('Why is it important to study computer science?')

'Studying computer science is important for several reasons. Firstly, it prepares students for careers in the technology industry, which is experiencing rapid growth and high demand for skilled professionals. Additionally, computer science education promotes computational thinking skills, which are valuable in various fields such as business, healthcare, and education. Furthermore, computer science education fosters problem-solving abilities and critical thinking, making students more effective in addressing real-world challenges. Lastly, the study of computer science is essential in understanding the nature of computation and automation, which is increasingly integrated into all aspects of society.'

In [75]:
ask('Should students still learn about computer science if machines abstract away coding?')

'Yes, students should still learn about computer science even if machines abstract away coding. Computer science education encompasses a wide range of topics, from basic programming skills to advanced algorithm design and data analysis. It is essential for preparing students for careers in the technology industry and other fields that require computational skills. Additionally, computer science education promotes computational thinking skills, which are valuable in many fields, including business, healthcare, and education. As technology becomes increasingly integrated into all aspects of society, the demand for skilled computer scientists is growing, making computer science education crucial for students in the 21st century workforce.'

In [79]:
ask('What is the most versatile coding language?', print_message =True)

Use the below articles on Computer Science to answer the subsequent question. If the answer cannot be found in the articles, write "I could not find an answer."

Wikipedia article section:
"""
Information and computer science

==Areas of information and computer science==

===Programming theory===

The process of taking a given algorithm and encoding it into a language that can be understood and executed by a computer. There are many different types of programming languages and various different types of computers, however, they all have the same goal: to turn algorithms into machine code.

Popular programming languages used within the academic study of CIS include, but are not limited to: Java, Python, C#, C++, Perl, Ruby, Pascal, Swift, Visual Basic.
"""

Wikipedia article section:
"""
Computer science

==Programming paradigms==

{{main|Programming paradigm}}

Programming languages can be used to accomplish different tasks in different ways. Common programming paradigms include:
* [[

'The most versatile coding language would be Lua. Lua is commonly described as a "multi-paradigm" language, providing a small set of general features that can be extended to fit different problem types. It supports a range of programming paradigms and features, such as first-class functions, metaprogramming, dynamic typing, and a small number of atomic data structures. Additionally, Lua can be used as a scripting language, embedded in other applications, and compiled into other languages. Therefore, Lua\'s versatility makes it well-suited for various types of programming tasks and applications.'