# Extract the links, the titles and the paragraphs of the articles from the main article and sort them by similarity to the main article

In [28]:
# Import the necessary libraries
from bs4 import BeautifulSoup
import requests
import numpy as np
from openai import OpenAI
import openai
import os
from nltk.corpus import stopwords
import nltk
from sklearn.feature_extraction.text import CountVectorizer
import json

## Web_scrapping

In [3]:
# Get the main article
requete = requests.get('https://en.wikipedia.org/wiki/machine_learning')
page = BeautifulSoup(requete.text, 'html.parser')
wiki_main = {}
wiki_main['link'] = 'https://en.wikipedia.org/wiki/machine_learning'
wiki_main['title'] = page.find('h1').text
wiki_main['paragraph'] = page.find('p').text
wiki_main

{'link': 'https://en.wikipedia.org/wiki/machine_learning',
 'title': 'Machine learning',
 'paragraph': 'Machine learning (ML) is a field of study in artificial intelligence concerned with the development and study of statistical algorithms that can effectively generalize and thus perform tasks without explicit instructions.[1] Recently, generative artificial neural networks have been able to surpass many previous approaches in performance.[2][3] Machine learning approaches have been applied to large language models, computer vision, speech recognition, email filtering, agriculture and medicine, where it is too costly to develop algorithms to perform the needed tasks.[4][5]\n'}

In [4]:
# Extract the links of the articles
links = page.find_all('a')
http_links = [f"{link.get('href')}" for link in links if link.get('href') and link.get('href').startswith('/wiki')]  
wiki_list = []
wiki_dict_sans_doublon = []

In [5]:
# Create a list of dictionaries containing the links, titles, and paragraphs of the articles
for link in http_links:
    wiki_dict = {}
    requete = requests.get("https://en.wikipedia.org" + link)
    page = BeautifulSoup(requete.text, 'html.parser')
    h_1 = page.find('h1')
    p_1 = page.find('p')
    if p_1 is not None and p_1.text not in wiki_dict_sans_doublon: # We filter the duplicates
        wiki_dict_sans_doublon.append(p_1.text)
        wiki_dict["link"] = link
        wiki_dict["title"] = h_1.text
        wiki_dict["paragraph"] = p_1.text
        wiki_list.append(wiki_dict)
print(wiki_list[:5])

[{'link': '/wiki/Main_Page', 'title': 'Main Page', 'paragraph': 'December 4: Navy Day in India\n'}, {'link': '/wiki/Wikipedia:Contents', 'title': 'Wikipedia:Contents', 'paragraph': '\n'}, {'link': '/wiki/Portal:Current_events', 'title': 'Portal:Current events', 'paragraph': 'Edit instructions\n'}, {'link': '/wiki/Help:Contents', 'title': 'Help:Contents', 'paragraph': '\n\n'}, {'link': '/wiki/Special:RecentChanges', 'title': 'Recent changes', 'paragraph': 'This is a list of recent changes to Wikipedia.\n'}]


## Words Embedding

In [6]:
# Create an OpenAI client
client = OpenAI()
openai.api_key = os.getenv("OPENAI_API_KEY")

In [7]:
# Create an embedding of the paragraph and title for the wiki_main article
response = client.embeddings.create(input=wiki_main['title'] + wiki_main['paragraph'],
                                    model="text-embedding-ada-002")
                                   
wiki_main["embeddings"] = response.data[0].embedding

In [8]:
def jaccard_similarity(A, B):
    #Find intersection of two sets
    nominator = A.intersection(B)

    #Find union of two sets
    denominator = A.union(B)

    #Take the ratio of sizes
    similarity = len(nominator)/len(denominator)
    
    return similarity

In [9]:
# Create an embedding of the paragraph and title for each article in wiki_list
for wiki in wiki_list:
    response = client.embeddings.create(input=wiki['title'] + wiki['paragraph'],
                                        model="text-embedding-ada-002")
            
    wiki["embeddings"] = response.data[0].embedding
    # Calculate the similarity between the main article and each article in wiki_list using the dot product of their embeddings
    wiki['similarity_embedding_dot_product'] = np.dot(wiki_main['embeddings'], wiki['embeddings'])
    # Calculate the similarity between the main article and each article in wiki_list using the cosine similarity of their embeddings
    wiki['similarity_embedding_cosine_similarity'] = np.dot(wiki_main['embeddings'], wiki['embeddings']) / (np.linalg.norm(wiki_main['embeddings']) * np.linalg.norm(wiki['embeddings']))
    # Calculate the similarity between the main article and each article in wiki_list using the jaccard similarity of their embeddings
    wiki['similarity_embedding_jaccard_similarity'] = jaccard_similarity(set(wiki_main['embeddings']), set(wiki['embeddings']))

In [10]:
# Sort the wiki_list by similarity using the dot product of their embeddings
wiki_list.sort(key=lambda x: x['similarity_embedding_dot_product'], reverse=True)

# Print the top 3 articles by similarity using the dot product of their embeddings
for wiki in wiki_list[:3]:
    print(wiki['similarity_embedding_dot_product'])
    print("https://en.wikipedia.org" + wiki['link'])
    print(wiki['title'])
    print(wiki['paragraph'])

0.9999546493132483
https://en.wikipedia.org/wiki/Machine_learning
Machine learning
Machine learning (ML) is a field of study in artificial intelligence concerned with the development and study of statistical algorithms that can effectively generalize and thus perform tasks without explicit instructions.[1] Recently, generative artificial neural networks have been able to surpass many previous approaches in performance.[2][3] Machine learning approaches have been applied to large language models, computer vision, speech recognition, email filtering, agriculture and medicine, where it is too costly to develop algorithms to perform the needed tasks.[4][5]

0.9274254193423537
https://en.wikipedia.org/wiki/Category:Machine_learning
Category:Machine learning
Machine learning is a branch of statistics and computer science which studies algorithms and architectures that learn from observed facts.

0.9115778859782548
https://en.wikipedia.org/wiki/Automated_machine_learning
Automated machine lea

In [11]:
# Sort the wiki_list by similarity using the dot product of their embeddings
wiki_list.sort(key=lambda x: x['similarity_embedding_dot_product'], reverse=True)

# Print the bottom 3 articles by similarity using the dot product of their embeddings
for wiki in wiki_list[-3:]:
    print(wiki['similarity_embedding_dot_product'])
    print("https://en.wikipedia.org" + wiki['link'])
    print(wiki['title'])
    print(wiki['paragraph'])

0.6772509779919647
https://en.wikipedia.org/wiki/Main_Page
Main Page
December 4: Navy Day in India

0.6764058080086992
https://en.wikipedia.org/wiki/File:Regressions_sine_demo.svg
File:Regressions sine demo.svg
Original file ‎(SVG file, nominally 900 × 450 pixels, file size: 582 KB)

0.6596464515320439
https://en.wikipedia.org/wiki/File:Symbol_portal_class.svg
File:Symbol portal class.svg
Original file ‎(SVG file, nominally 180 × 185 pixels, file size: 12 KB)



In [12]:
# Sort the wiki_list by similarity using the cosine similarity of their embeddings
wiki_list.sort(key=lambda x: x['similarity_embedding_cosine_similarity'], reverse=True)

# Print the top 3 articles by similarity using the cosine similarity of their embeddings
for wiki in wiki_list[:3]:
    print(wiki['similarity_embedding_cosine_similarity'])
    print("https://en.wikipedia.org" + wiki['link'])
    print(wiki['title'])
    print(wiki['paragraph'])

0.999954589806071
https://en.wikipedia.org/wiki/Machine_learning
Machine learning
Machine learning (ML) is a field of study in artificial intelligence concerned with the development and study of statistical algorithms that can effectively generalize and thus perform tasks without explicit instructions.[1] Recently, generative artificial neural networks have been able to surpass many previous approaches in performance.[2][3] Machine learning approaches have been applied to large language models, computer vision, speech recognition, email filtering, agriculture and medicine, where it is too costly to develop algorithms to perform the needed tasks.[4][5]

0.9274253257534554
https://en.wikipedia.org/wiki/Category:Machine_learning
Category:Machine learning
Machine learning is a branch of statistics and computer science which studies algorithms and architectures that learn from observed facts.

0.9115778982802036
https://en.wikipedia.org/wiki/Automated_machine_learning
Automated machine lear

In [13]:
# Sort the wiki_list by similarity using the cosine similarity of their embeddings
wiki_list.sort(key=lambda x: x['similarity_embedding_cosine_similarity'], reverse=True)

# Print the bottom 3 articles by similarity using the cosine similarity of their embeddings
for wiki in wiki_list[-3:]:
    print(wiki['similarity_embedding_cosine_similarity'])
    print("https://en.wikipedia.org" + wiki['link'])
    print(wiki['title'])
    print(wiki['paragraph'])

0.6772509442047826
https://en.wikipedia.org/wiki/Main_Page
Main Page
December 4: Navy Day in India

0.6764058327781174
https://en.wikipedia.org/wiki/File:Regressions_sine_demo.svg
File:Regressions sine demo.svg
Original file ‎(SVG file, nominally 900 × 450 pixels, file size: 582 KB)

0.6596464297870863
https://en.wikipedia.org/wiki/File:Symbol_portal_class.svg
File:Symbol portal class.svg
Original file ‎(SVG file, nominally 180 × 185 pixels, file size: 12 KB)



In [14]:
# Sort the wiki_list by similarity using the jaccard similarity of their embeddings
wiki_list.sort(key=lambda x: x['similarity_embedding_jaccard_similarity'], reverse=True)

# Print the top 3 articles by similarity using the jaccard similarity of their embeddings
for wiki in wiki_list[:3]:
    print(wiki['similarity_embedding_jaccard_similarity'])
    print("https://en.wikipedia.org" + wiki['link'])
    print(wiki['title'])
    print(wiki['paragraph'])

0.0007079646017699115
https://en.wikipedia.org/wiki/Cheminformatics
Cheminformatics
Cheminformatics (also known as chemoinformatics) refers to the use of physical chemistry theory with computer and information science techniques—so called "in silico" techniques—in application to a range of descriptive and prescriptive problems in the field of chemistry, including in its applications to biology and related molecular fields. Such in silico techniques are used, for example, by pharmaceutical companies and in academic settings to aid and inform the process of drug discovery, for instance in the design of well-defined combinatorial libraries of synthetic compounds, or to assist in structure-based drug design. The methods can also be used in chemical and allied industries, and such fields as environmental science and pharmacology, where chemical processes are involved or studied.[1]

0.0007057163020465773
https://en.wikipedia.org/wiki/Ontology_learning
Ontology learning
Ontology learning (on

In [15]:
# Sort the wiki_list by similarity using the jaccard similarity of their embeddings
wiki_list.sort(key=lambda x: x['similarity_embedding_jaccard_similarity'], reverse=True)

# Print the bottom 3 articles by similarity using the jaccard similarity of their embeddings
for wiki in wiki_list[-3:]:
    print(wiki['similarity_embedding_jaccard_similarity'])
    print("https://en.wikipedia.org" + wiki['link'])
    print(wiki['title'])
    print(wiki['paragraph'])

0.0
https://en.wikipedia.org/wiki/Main_Page
Main Page
December 4: Navy Day in India

0.0
https://en.wikipedia.org/wiki/File:Regressions_sine_demo.svg
File:Regressions sine demo.svg
Original file ‎(SVG file, nominally 900 × 450 pixels, file size: 582 KB)

0.0
https://en.wikipedia.org/wiki/File:Symbol_portal_class.svg
File:Symbol portal class.svg
Original file ‎(SVG file, nominally 180 × 185 pixels, file size: 12 KB)



## Text Vectorization

In [16]:
# Load the stopwords
nltk.download('stopwords')
stop_en = stopwords.words('english')
stop_words_ext = list(stop_en)
vectorizer = CountVectorizer(stop_words=stop_words_ext, token_pattern=r"(?u)\b[a-zA-Z][a-zA-Z_-]+\b")

[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\User\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


In [17]:
# Create the corpus by concatenating the title and the paragraph of each article
corpus = []
for wiki in wiki_list:
    corpus.append(wiki['title'])
    corpus.append(wiki['paragraph'])
print(corpus[:5])

['Cheminformatics', 'Cheminformatics (also known as chemoinformatics) refers to the use of physical chemistry theory with computer and information science techniques—so called "in silico" techniques—in application to a range of descriptive and prescriptive problems in the field of chemistry, including in its applications to biology and related molecular fields. Such in silico techniques are used, for example, by pharmaceutical companies and in academic settings to aid and inform the process of drug discovery, for instance in the design of well-defined combinatorial libraries of synthetic compounds, or to assist in structure-based drug design. The methods can also be used in chemical and allied industries, and such fields as environmental science and pharmacology, where chemical processes are involved or studied.[1]\n', 'Ontology learning', "Ontology learning (ontology extraction, ontology generation, or ontology acquisition) is the automatic or semi-automatic creation of ontologies, in

In [18]:
# Fit the vectorizer to the corpus
vectorizer.fit_transform(corpus)

<1148x5285 sparse matrix of type '<class 'numpy.int64'>'
	with 19729 stored elements in Compressed Sparse Row format>

In [19]:
# Create the vector for the main article
wiki_main['vector'] = vectorizer.transform([wiki_main['title'] + wiki_main['paragraph']]).toarray()[0]

In [20]:
# Calculate the similarity between the main article and each article in wiki_list using the dot product of their vectors
for wiki in wiki_list:
    wiki['vector'] = vectorizer.transform([wiki['title'] + wiki['paragraph']]).toarray()[0]
    # Calculate the similarity between the main article and each article in wiki_list using the dot product of their vectors
    wiki['similarity_vector_dot_product'] = np.dot(wiki_main['vector'], wiki['vector'])
    # Calculate the similarity between the main article and each article in wiki_list using the cosine similarity of their vectors
    wiki['similarity_vector_cosine_similarity'] = np.dot(wiki_main['vector'], wiki['vector']) / (np.linalg.norm(wiki_main['vector']) * np.linalg.norm(wiki['vector']))
    # Calculate the similarity between the main article and each article in wiki_list using the jaccard similarity of their vectors
    wiki['similarity_vector_jaccard_similarity'] = jaccard_similarity(set(wiki_main['vector']), set(wiki['vector']))

In [21]:
# Sort the wiki_list by similarity to the main article using the dot product of their vectors
wiki_list.sort(key=lambda x: x['similarity_vector_dot_product'], reverse=True)

# Print the top 3 articles by similarity to the main article using the dot product of their vectors
for wiki in wiki_list[:3]:
    print(wiki['similarity_vector_dot_product'])
    print("https://en.wikipedia.org" + wiki['link'])
    print(wiki['title'])
    print(wiki['paragraph'])

68
https://en.wikipedia.org/wiki/Machine_learning
Machine learning
Machine learning (ML) is a field of study in artificial intelligence concerned with the development and study of statistical algorithms that can effectively generalize and thus perform tasks without explicit instructions.[1] Recently, generative artificial neural networks have been able to surpass many previous approaches in performance.[2][3] Machine learning approaches have been applied to large language models, computer vision, speech recognition, email filtering, agriculture and medicine, where it is too costly to develop algorithms to perform the needed tasks.[4][5]

31
https://en.wikipedia.org/wiki/Online_machine_learning
Online machine learning
In computer science, online machine learning is a method of machine learning in which data becomes available in a sequential order and is used to update the best predictor for future data at each step, as opposed to batch learning techniques which generate the best predict

In [22]:
# Sort the wiki_list by similarity to the main article using the dot product of their vectors
wiki_list.sort(key=lambda x: x['similarity_vector_dot_product'], reverse=True)

# Print the bottom 3 articles by similarity to the main article using the dot product of their vectors
for wiki in wiki_list[-3:]:
    print(wiki['similarity_vector_dot_product'])
    print("https://en.wikipedia.org" + wiki['link'])
    print(wiki['title'])
    print(wiki['paragraph'])

0
https://en.wikipedia.org/wiki/Main_Page
Main Page
December 4: Navy Day in India

0
https://en.wikipedia.org/wiki/File:Regressions_sine_demo.svg
File:Regressions sine demo.svg
Original file ‎(SVG file, nominally 900 × 450 pixels, file size: 582 KB)

0
https://en.wikipedia.org/wiki/File:Symbol_portal_class.svg
File:Symbol portal class.svg
Original file ‎(SVG file, nominally 180 × 185 pixels, file size: 12 KB)



In [23]:
# Sort the wiki_list by similarity to the main article using the cosine similarity of their vectors
wiki_list.sort(key=lambda x: x['similarity_vector_cosine_similarity'], reverse=True)

# Print the top 3 articles by similarity to the main article using the cosine similarity of their vectors
for wiki in wiki_list[:3]:
    print(wiki['similarity_vector_cosine_similarity'])
    print("https://en.wikipedia.org" + wiki['link'])
    print(wiki['title'])
    print(wiki['paragraph'])

1.0
https://en.wikipedia.org/wiki/Machine_learning
Machine learning
Machine learning (ML) is a field of study in artificial intelligence concerned with the development and study of statistical algorithms that can effectively generalize and thus perform tasks without explicit instructions.[1] Recently, generative artificial neural networks have been able to surpass many previous approaches in performance.[2][3] Machine learning approaches have been applied to large language models, computer vision, speech recognition, email filtering, agriculture and medicine, where it is too costly to develop algorithms to perform the needed tasks.[4][5]

0.3279680246763151
https://en.wikipedia.org/wiki/Computational_learning_theory
Computational learning theory
In computer science, computational learning theory (or just learning theory) is a subfield of artificial intelligence devoted to studying the design and analysis of machine learning algorithms.[1]

0.3218393429334682
https://en.wikipedia.org/wi

In [24]:
# Sort the wiki_list by similarity to the main article using the cosine similarity of their vectors
wiki_list.sort(key=lambda x: x['similarity_vector_cosine_similarity'], reverse=True)

# Print the bottom 3 articles by similarity to the main article using the cosine similarity of their vectors
for wiki in wiki_list[-3:]:
    print(wiki['similarity_vector_cosine_similarity'])
    print("https://en.wikipedia.org" + wiki['link'])
    print(wiki['title'])
    print(wiki['paragraph'])

0.0
https://en.wikipedia.org/wiki/Main_Page
Main Page
December 4: Navy Day in India

0.0
https://en.wikipedia.org/wiki/File:Regressions_sine_demo.svg
File:Regressions sine demo.svg
Original file ‎(SVG file, nominally 900 × 450 pixels, file size: 582 KB)

0.0
https://en.wikipedia.org/wiki/File:Symbol_portal_class.svg
File:Symbol portal class.svg
Original file ‎(SVG file, nominally 180 × 185 pixels, file size: 12 KB)



In [25]:
# Sort the wiki_list by similarity to the main article using the jaccard similarity of their vectors
wiki_list.sort(key=lambda x: x['similarity_vector_jaccard_similarity'], reverse=True)

# Print the top 3 articles by similarity to the main article using the jaccard similarity of their vectors
for wiki in wiki_list[:3]:
    print(wiki['similarity_vector_jaccard_similarity'])
    print("https://en.wikipedia.org" + wiki['link'])
    print(wiki['title'])
    print(wiki['paragraph'])

1.0
https://en.wikipedia.org/wiki/Machine_learning
Machine learning
Machine learning (ML) is a field of study in artificial intelligence concerned with the development and study of statistical algorithms that can effectively generalize and thus perform tasks without explicit instructions.[1] Recently, generative artificial neural networks have been able to surpass many previous approaches in performance.[2][3] Machine learning approaches have been applied to large language models, computer vision, speech recognition, email filtering, agriculture and medicine, where it is too costly to develop algorithms to perform the needed tasks.[4][5]

1.0
https://en.wikipedia.org/wiki/Neural_Designer
Neural Designer
Neural Designer is a software tool for machine learning based on neural networks, a main area of artificial intelligence research, and contains a graphical user interface which simplifies data entry and interpretation of results.

1.0
https://en.wikipedia.org/wiki/Machine_Learning_(jour

In [26]:
# Sort the wiki_list by similarity to the main article using the jaccard similarity of their vectors
wiki_list.sort(key=lambda x: x['similarity_vector_jaccard_similarity'], reverse=True)

# Print the bottom 3 articles by similarity to the main article using the jaccard similarity of their vectors
for wiki in wiki_list[-3:]:
    print(wiki['similarity_vector_jaccard_similarity'])
    print("https://en.wikipedia.org" + wiki['link'])
    print(wiki['title'])
    print(wiki['paragraph'])

0.375
https://en.wikipedia.org/wiki/White-box_testing
White-box testing
White-box testing (also known as clear box testing, glass box testing, transparent box testing, and structural testing) is a method of software testing that tests internal structures or workings of an application, as opposed to its functionality (i.e. black-box testing). In white-box testing, an internal perspective of the system is used to design test cases. The tester chooses inputs to exercise paths through the code and determine the expected outputs. This is analogous to testing nodes in a circuit, e.g. in-circuit testing (ICT).
White-box testing can be applied at the unit, integration and system levels of the software testing process. Although traditional testers tended to think of white-box testing as being done at the unit level, it is used for integration and system testing more frequently today. It can test paths within a unit, paths between units during integration, and between subsystems during a system–

## Text vectorization + Word embeddings

In [41]:
# # Create an embedding of the text vector for the main article
# response = client.embeddings.create(input=wiki_main['vector'].tolist(),
#                                     model="text-embedding-ada-002")
                                   
wiki_main["embeddings_vector"] = np.stack([wiki_main['embeddings'], wiki_main['vector']]) 

ValueError: all input arrays must have the same shape

In [None]:
# # Create an embedding of the paragraph and title for each article in wiki_list
# for wiki in wiki_list:
#     response = client.embeddings.create(input=wiki['vector'].tolist(),
#                                         model="text-embedding-ada-002")
                                    
wiki["embeddings_vector"] =  wiki['embeddings'] + wiki['vector'] 
# Calculate the similarity between the main article and each article in wiki_list using the dot product of their embeddings
wiki['similarity_embedding_vector__dot_product'] = np.dot(wiki_main['embeddings_vector'], wiki['embeddings_vector'])
# Calculate the similarity between the main article and each article in wiki_list using the cosine similarity of their embeddings
wiki['similarity_embedding_vector__cosine_similarity'] = np.dot(wiki_main['embeddings_vector'], wiki['embeddings_vector']) / (np.linalg.norm(wiki_main['embeddings_vector']) * np.linalg.norm(wiki['embeddings_vector']))
# Calculate the similarity between the main article and each article in wiki_list using the jaccard similarity of their embeddings
wiki['similarity_embedding_vector_jaccard_similarity'] = jaccard_similarity(set(wiki_main['embeddings_vector']), set(wiki['embeddings_vector']))

In [None]:
# Sort the wiki_list by similarity using the dot product of their embeddings vectors
wiki_list.sort(key=lambda x: x['similarity_embedding_vector__dot_product'], reverse=True)

# Print the top 3 articles by similarity using the dot product of their embeddings vectors
for wiki in wiki_list[:3]:
    print(wiki['similarity_embedding_vector__dot_product'])
    print("https://en.wikipedia.org" + wiki['link'])
    print(wiki['title'])
    print(wiki['paragraph'])

0.9999999384978688
https://en.wikipedia.org/wiki/Machine_learning
Machine learning
Machine learning (ML) is a field of study in artificial intelligence concerned with the development and study of statistical algorithms that can effectively generalize and thus perform tasks without explicit instructions.[1] Recently, generative artificial neural networks have been able to surpass many previous approaches in performance.[2][3] Machine learning approaches have been applied to large language models, computer vision, speech recognition, email filtering, agriculture and medicine, where it is too costly to develop algorithms to perform the needed tasks.[4][5]

0.9997666410099177
https://en.wikipedia.org/wiki/Mean_shift
Mean shift
Mean shift is a non-parametric feature-space mathematical analysis technique for locating the maxima of a density function, a so-called mode-seeking algorithm.[1] Application domains include cluster analysis in computer vision and image processing.[2]

0.999698961537

In [None]:
# Sort the wiki_list by similarity using the dot product of their embeddings vectors
wiki_list.sort(key=lambda x: x['similarity_embedding_vector__dot_product'], reverse=True)

# Print the bottom 3 articles by similarity using the dot product of their embeddings vectors
for wiki in wiki_list[-3:]:
    print(wiki['similarity_embedding_vector__dot_product'])
    print("https://en.wikipedia.org" + wiki['link'])
    print(wiki['title'])
    print(wiki['paragraph'])

0.904951025012385
https://en.wikipedia.org/wiki/Echo_state_network
Echo state network
An echo state network (ESN)[1][2] is a type of reservoir computer that uses a recurrent neural network with a sparsely connected hidden layer (with typically 1% connectivity). The connectivity and weights of hidden neurons are fixed and randomly assigned. The weights of output neurons can be learned so that the network can produce or reproduce specific temporal patterns. The main interest of this network is that although its behavior is non-linear, the only weights that are modified during training are for the synapses that connect the hidden neurons to output neurons. Thus, the error function is quadratic with respect to the parameter vector and can be differentiated easily to a linear system.

0.8890255243586205
https://en.wikipedia.org/wiki/Vision_transformer
Vision transformer
A vision transformer (ViT) is a transformer designed for computer vision. Transformers were introduced in 2017,[1] and hav

In [None]:
# Sort the wiki_list by similarity using the cosine similarity of their embeddings vectors
wiki_list.sort(key=lambda x: x['similarity_embedding_vector__cosine_similarity'], reverse=True)

# Print the top 3 articles by similarity using the cosine similarity of their embeddings vectors
for wiki in wiki_list[:3]:
    print(wiki['similarity_embedding_vector__cosine_similarity'])
    print("https://en.wikipedia.org" + wiki['link'])
    print(wiki['title'])
    print(wiki['paragraph'])

0.9999997766233459
https://en.wikipedia.org/wiki/Machine_learning
Machine learning
Machine learning (ML) is a field of study in artificial intelligence concerned with the development and study of statistical algorithms that can effectively generalize and thus perform tasks without explicit instructions.[1] Recently, generative artificial neural networks have been able to surpass many previous approaches in performance.[2][3] Machine learning approaches have been applied to large language models, computer vision, speech recognition, email filtering, agriculture and medicine, where it is too costly to develop algorithms to perform the needed tasks.[4][5]

0.9997665618787897
https://en.wikipedia.org/wiki/Mean_shift
Mean shift
Mean shift is a non-parametric feature-space mathematical analysis technique for locating the maxima of a density function, a so-called mode-seeking algorithm.[1] Application domains include cluster analysis in computer vision and image processing.[2]

0.999698875240

In [None]:
# Sort the wiki_list by similarity using the cosine similarity of their embeddings vectors
wiki_list.sort(key=lambda x: x['similarity_embedding_vector__cosine_similarity'], reverse=True)

# Print the bottom 3 articles by similarity using the cosine similarity of their embeddings vectors
for wiki in wiki_list[-3:]:
    print(wiki['similarity_embedding_vector__cosine_similarity'])
    print("https://en.wikipedia.org" + wiki['link'])
    print(wiki['title'])
    print(wiki['paragraph'])

0.9049509810174128
https://en.wikipedia.org/wiki/Echo_state_network
Echo state network
An echo state network (ESN)[1][2] is a type of reservoir computer that uses a recurrent neural network with a sparsely connected hidden layer (with typically 1% connectivity). The connectivity and weights of hidden neurons are fixed and randomly assigned. The weights of output neurons can be learned so that the network can produce or reproduce specific temporal patterns. The main interest of this network is that although its behavior is non-linear, the only weights that are modified during training are for the synapses that connect the hidden neurons to output neurons. Thus, the error function is quadratic with respect to the parameter vector and can be differentiated easily to a linear system.

0.8890254786238526
https://en.wikipedia.org/wiki/Vision_transformer
Vision transformer
A vision transformer (ViT) is a transformer designed for computer vision. Transformers were introduced in 2017,[1] and ha

In [None]:
# Sort the wiki_list by similarity using the jaccard similarity of their embeddings vectors
wiki_list.sort(key=lambda x: x['similarity_embedding_vector_jaccard_similarity'], reverse=True)

# Print the top 3 articles by similarity using the jaccard similarity of their embeddings vectors
for wiki in wiki_list[:3]:
    print(wiki['similarity_embedding_vector_jaccard_similarity'])
    print("https://en.wikipedia.org" + wiki['link'])
    print(wiki['title'])
    print(wiki['paragraph'])

0.002127659574468085
https://en.wikipedia.org/wiki/Rule-based_machine_learning
Rule-based machine learning
Rule-based machine learning (RBML) is a term in computer science intended to encompass any machine learning method that identifies, learns, or evolves 'rules' to store, manipulate or apply.[1][2][3] The defining characteristic of a rule-based machine learner is the identification and utilization of a set of relational rules that collectively represent the knowledge captured by the system. This is in contrast to other machine learners that commonly identify a singular model that can be universally applied to any instance in order to make a prediction.[clarification needed][citation needed]

0.0010634526763559022
https://en.wikipedia.org/wiki/Theoretical_computer_science
Theoretical computer science
Theoretical computer science (TCS) is a subset of general computer science and mathematics that focuses on mathematical aspects of computer science such as the theory of computation, for

In [None]:
# Sort the wiki_list by similarity using the jaccard similarity of their embeddings vectors
wiki_list.sort(key=lambda x: x['similarity_embedding_vector_jaccard_similarity'], reverse=True)

# Print the bottom 3 articles by similarity using the jaccard similarity of their embeddings vectors
for wiki in wiki_list[-3:]:
    print(wiki['similarity_embedding_vector_jaccard_similarity'])
    print("https://en.wikipedia.org" + wiki['link'])
    print(wiki['title'])
    print(wiki['paragraph'])

0.0
https://en.wikipedia.org/wiki/Echo_state_network
Echo state network
An echo state network (ESN)[1][2] is a type of reservoir computer that uses a recurrent neural network with a sparsely connected hidden layer (with typically 1% connectivity). The connectivity and weights of hidden neurons are fixed and randomly assigned. The weights of output neurons can be learned so that the network can produce or reproduce specific temporal patterns. The main interest of this network is that although its behavior is non-linear, the only weights that are modified during training are for the synapses that connect the hidden neurons to output neurons. Thus, the error function is quadratic with respect to the parameter vector and can be differentiated easily to a linear system.

0.0
https://en.wikipedia.org/wiki/Vision_transformer
Vision transformer
A vision transformer (ViT) is a transformer designed for computer vision. Transformers were introduced in 2017,[1] and have found widespread use in nat