In [1]:
import weaviate
import json

In [2]:
client = weaviate.Client(   
    url="http://localhost:8080"
)

In [10]:
def PrintResults (queryResults):
    # Prints nicer looking results that outputting the raw json

    links = []
    certainties = []

    results = queryResults["data"]["Get"]["Paper"]

    for paper in results:

        certainty = str(paper["_additional"]["certainty"])

        resultPdfLink = paper["pdfId"]

        if (paper["pdfId"].__contains__("/") is False):
            resultPdfLink = resultPdfLink.replace("-", ".");
    
        resultPdfLink = "https://arxiv.org/pdf/" + resultPdfLink + ".pdf";

        certainties.append(certainty)
        links.append(resultPdfLink)

    for c in certainties:
        print(c[:5])
    for l in links:
        print(l)

In [11]:
# test case one

inputText = "In natural language processing, Latent Dirichlet Allocation (LDA) is a generative statistical model that explains a set of observations through unobserved groups, and each group explains why some parts of the data are similar. The LDA is an example of a topic model. In this, observations (e.g., words) are collected into documents, and each word's presence is attributable to one of the document's topics. Each document will contain a small number of topics."

testArgument = {
    "concepts": [inputText]
}

data = client.query.get("Paper", ["pdfId title abstract _additional {certainty}"]).with_near_text(testArgument).with_limit(10).do()

PrintResults(data)

0.923
0.902
0.899
0.897
0.895
0.883
0.880
0.878
0.877
0.873
https://arxiv.org/pdf/1507.06593.pdf
https://arxiv.org/pdf/2112.03101.pdf
https://arxiv.org/pdf/1711.04305.pdf
https://arxiv.org/pdf/1401.6169.pdf
https://arxiv.org/pdf/1804.04749.pdf
https://arxiv.org/pdf/2207.14687.pdf
https://arxiv.org/pdf/2104.07969.pdf
https://arxiv.org/pdf/2110.08591.pdf
https://arxiv.org/pdf/2102.04449.pdf
https://arxiv.org/pdf/1511.03546.pdf


In [12]:
# test case two

inputText = "In natural language processing (NLP), a word embedding is a representation of a word. The embedding is used in text analysis. Typically, the representation is a real-valued vector that encodes the meaning of the word in such a way that words that are closer in the vector space are expected to be similar in meaning."

testArgument = {
    "concepts": [inputText]
}

data = client.query.get("Paper", ["pdfId title abstract _additional {certainty}"]).with_near_text(testArgument).with_limit(10).do()

PrintResults(data)

0.899
0.894
0.886
0.885
0.884
0.884
0.884
0.881
0.879
0.878
https://arxiv.org/pdf/2301.00709.pdf
https://arxiv.org/pdf/2208.08386.pdf
https://arxiv.org/pdf/1811.11002.pdf
https://arxiv.org/pdf/1809.02765.pdf
https://arxiv.org/pdf/1901.07176.pdf
https://arxiv.org/pdf/2007.07287.pdf
https://arxiv.org/pdf/1711.00331.pdf
https://arxiv.org/pdf/1911.00845.pdf
https://arxiv.org/pdf/2209.10583.pdf
https://arxiv.org/pdf/1911.04975.pdf


In [13]:
# test case three

inputText = "In computer science, concurrency is the ability of different parts or units of a program, algorithm, or problem to be executed out-of-order or in partial order, without affecting the outcome. This allows for parallel execution of the concurrent units, which can significantly improve overall speed of the execution in multi-processor and multi-core systems. In more technical terms, concurrency refers to the decomposability of a program, algorithm, or problem into order-independent or partially-ordered components or units of computation."

testArgument = {
    "concepts": [inputText]
}

data = client.query.get("Paper", ["pdfId title abstract _additional {certainty}"]).with_near_text(testArgument).with_limit(10).do()

PrintResults(data)

0.890
0.869
0.857
0.853
0.849
0.848
0.839
0.835
0.829
0.828
https://arxiv.org/pdf/1406.0184.pdf
https://arxiv.org/pdf/1203.4751.pdf
https://arxiv.org/pdf/1406.3485.pdf
https://arxiv.org/pdf/1710.07588.pdf
https://arxiv.org/pdf/0810.1316.pdf
https://arxiv.org/pdf/1511.01779.pdf
https://arxiv.org/pdf/1803.10067.pdf
https://arxiv.org/pdf/1705.02851.pdf
https://arxiv.org/pdf/1909.11644.pdf
https://arxiv.org/pdf/1812.06011.pdf


In [14]:
# test case four

inputText = "In computer science, a linked list is a linear collection of data elements whose order is not given by their physical placement in memory. Instead, each element points to the next. It is a data structure consisting of a collection of nodes which together represent a sequence. In its most basic form, each node contains: data, and a reference (in other words, a link) to the next node in the sequence."

testArgument = {
    "concepts": [inputText]
}

data = client.query.get("Paper", ["pdfId title abstract _additional {certainty}"]).with_near_text(testArgument).with_limit(10).do()

PrintResults(data)

0.797
0.761
0.761
0.760
0.760
0.759
0.759
0.757
0.756
0.755
https://arxiv.org/pdf/0908.3089.pdf
https://arxiv.org/pdf/1305.6757.pdf
https://arxiv.org/pdf/2207.12942.pdf
https://arxiv.org/pdf/0905.2214.pdf
https://arxiv.org/pdf/1403.0764.pdf
https://arxiv.org/pdf/cs/0308041.pdf
https://arxiv.org/pdf/1009.0929.pdf
https://arxiv.org/pdf/1504.00785.pdf
https://arxiv.org/pdf/1004.1001.pdf
https://arxiv.org/pdf/1710.08748.pdf


In [None]:
# test case five

inputText = "In mathematics, computer science and network science, network theory is a part of graph theory. It defines networks as graphs where the nodes or edges possess attributes. Network theory analyses these networks over the symmetric relations or asymmetric relations between their (discrete) components."

testArgument = {
    "concepts": [inputText]
}

data = client.query.get("Paper", ["pdfId title abstract _additional {certainty}"]).with_near_text(testArgument).with_limit(10).do()

PrintResults(data)