In [3]:
import cohere
from cohere.classify import Example
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
from annoy import AnnoyIndex
import warnings
from copy import deepcopy
warnings.filterwarnings('ignore')
pd.set_option('display.max_colwidth', None)


In [4]:
apiKey = ''
co = cohere.Client(apiKey)


semantic check

In [175]:
# Get the embeddings
def semantic_check(database, question_no, query):
    exp_answers = database[database.q_category==question_no][['answer1','answer2','answer3','answer4','answer5']].values.flatten().tolist()
    embeds = co.embed(texts=exp_answers,
                      model='large',
                      truncate='LEFT').embeddings
                      
    # Create the search index, pass the size of embedding
    search_index = AnnoyIndex(len(embeds[0]), 'angular')
    # Add all the vectors to the search index
    for i in range(len(embeds)):
        search_index.add_item(i, embeds[i])
    search_index.build(5)  # 10 trees
    search_index.save(f'temp3.ann')

    # process user input
    user_input_embeds = co.embed(texts=[query],
                                 model='large',
                                 truncate='LEFT').embeddings
    similar_item_ids = search_index.get_nns_by_vector(user_input_embeds[0], 10,
                                                      include_distances=True)
    # 1 is the question category
    # mean_distance = np.asarray(similar_item_ids[1]).mean()
    set_subset_value = set_subset(query, exp_answers[similar_item_ids[0][0]])
    min_distance = min(similar_item_ids[1])
    print(set_subset_value)
    return min_distance

Grammar Check

> generative check

In [6]:
def gen_grammar_check(query, trials = 3):
    similarity_values= []
    for i in range(trials):
        query = query.lower()
        response = co.generate(
            model='xlarge',
            prompt=f'This is a spell check generator that checks for grammar and corrects it. This also capitalizes the first letter of the sentence.\n\nSample: I would like a peice of pie.\nCorrect: I would like a piece of the pie.\n\nSample: my coworker said he used a financial planner to help choose his stocks so he wouldn\'t loose money.\nCorrect: My coworker said he used a financial planner to help him choose his stocks so he wouldn\'t lose money.\n\nSample: I ordered pizza, I also ordered garlic knots.\nCorrect: I ordered pizza; I also ordered garlic knots.\n\nSample: i bought winning lottery ticket the corner store\nCorrect: I bought my winning lottery ticket at the corner store.\n\nSample: try to reread your work to ensure you haven\'t left out any small words\nCorrect: Try to reread your work to ensure you haven\'t left out any small words.\n\nSample: I went to the movies with my sister. We will see the new comedy about dancing dogs.\nCorrect: I went to the movies with my sister. We saw the new comedy about dancing dogs.\n\nSample: the boy took their turn on the field.\nCorrect: The boy took his turn on the field.\n--\nSample: I could of won the race if I trained more.\nCorrect: I could have won the race if I had trained more.\n--\nSample: I went to the office, than i started my meeting.\nCorrect: I went to the office, then I started my meeting.\n--\nSample: {query}\nCorrect:',
            max_tokens=100,
            temperature=1.2,
            k=0,
            p=0.75,
            frequency_penalty=0,
            presence_penalty=0,
            stop_sequences=["--"],
            return_likelihoods='NONE')

        output = response.generations[0].text.rstrip("(\n)--")
        output = output.lstrip(" ")
        output = output.lower()

        embeds = co.embed(texts=[query, output],
                          model='large',
                          truncate='LEFT').embeddings

        sim_value = cosine_similarity(
            X=[embeds[0]], Y=[embeds[1]], dense_output=True)
        similarity_values.append(sim_value)
    similarity_score = max(similarity_values)
    print("gen:", similarity_values)
    return 1 if similarity_score > 0.997 else 0


> classification check

In [7]:
def class_grammar_check(query):
    response = co.classify(
        model='cdb39157-6b82-4cb4-92c5-9e6037623d79-ft',
        inputs=[f"{query}"])
    return(float(response.classifications[0].prediction))


Toxic behaviour check

> default model for toxic check

In [8]:
examples = [
    Example("you are hot trash", "Toxic"),
    Example("go to hell", "Toxic"),
    Example("get rekt moron", "Toxic"),
    Example("get a brain and use it", "Toxic"),
    Example("say what you mean, you jerk.", "Toxic"),
    Example("Are you really this stupid", "Toxic"),
    Example("I will honestly kill you", "Toxic"),
    Example("yo how are you", "Benign"),
    Example("I'm curious, how did that happen", "Benign"),
    Example("Try that again", "Benign"),
    Example("Hello everyone, excited to be here", "Benign"),
    Example("I think I saw it first", "Benign"),
    Example("That is an interesting point", "Benign"),
    Example("I love this", "Benign"),
    Example("We should try that sometime", "Benign"),
    Example("You should go for it", "Benign"),
    Example("people are not good.", "Benign")
]


def class_toxic_check(query):
    sentences = query.lower().rstrip('. ').split('.')
    for i in sentences:
        response = co.classify(
            model='large',
            inputs=[f"{i}"],
            examples=examples)
        if response.classifications[0].prediction == 'Toxic':
            return 1.0
    return 0.0


> custom model for toxic check

In [171]:
def custom_toxic_check(query):
    sentences = query.lower().rstrip('. ').split('.')
    for i in sentences:
        response = co.classify(
            model='8cec2377-0f7f-4557-81a4-7abc7dea3828-ft',
            inputs=[f"{i}"])
        if float(response.classifications[0].prediction) == 1.0:
            return 1.0
    return 0.0

Text Duplication Check

In [10]:
def Jaccard_Similarity(doc1, doc2): 
    
    if(doc1 == '' and doc2 == ''):
        return 0.0

    # List the unique words in a document
    words_doc1 = set(doc1.lower().split()) 
    words_doc2 = set(doc2.lower().split())
    
    # Find the intersection of words list of doc1 & doc2
    intersection = words_doc1.intersection(words_doc2)

    # Find the union of words list of doc1 & doc2
    union = words_doc1.union(words_doc2)
        
    # Calculate Jaccard similarity score 
    # using length of intersection set divided by length of union set
    return float(len(intersection)) / len(union)

def duplication_check(query):
    sentences = query.lower().rstrip('. ').split('.')
    similarities = []
    for i in range(len(sentences)):
        rest = deepcopy(sentences)
        rest.pop(i)
        rest = "".join(rest)
        score = Jaccard_Similarity(sentences[i], rest)
        similarities.append(score)
    duplication_ratio = sum(similarities)/(len(sentences)*0.08)

    if duplication_ratio>2.0:
        dup_score = 2
    elif duplication_ratio>1.0:
        dup_score = 1
    else:
        dup_score = 0
    return dup_score

In [172]:
from nltk.stem import porter
import numpy as np
from gensim.parsing.preprocessing import remove_stopwords
sn = porter.PorterStemmer()
# s = "My named is Aryan"
# arr = np.asarray(s.split())
# arr2 = np.vectorize(sn.stem)(arr)
# set(arr2)

def set_subset(doc1, doc2): 
    sn = porter.PorterStemmer()
    if(doc1.strip() == '' or doc2.strip() == ''):
        return 0.0

    doc1 = remove_stopwords(doc1)
    doc2 = remove_stopwords(doc2)
    # List the unique words in a document
    words_doc1 = set(np.vectorize(sn.stem)(np.asarray(doc1.lower().split())))
    words_doc2 = set(np.vectorize(sn.stem)(np.asarray(doc2.lower().split())))
    
    # Find the intersection of words list of doc1 & doc2
    intersection = words_doc1.intersection(words_doc2)

    # Find the union of words list of doc1 & doc2
    union = words_doc1.union(words_doc2)
        
    # Calculate Jaccard similarity score 
    # using length of intersection set divided by length of union set
    return float(len(intersection)) / len(union)


In [78]:
req_answer = "Pattern of interconnection of nodes is called topology. Bus topology: All the nodes are connected to single channel called bus. It has terminals to discard the packets.  Message will be broadcasted in this channel. Used in LAN i.e Ethernet cable connection. Ring Topology: nodes are connected to each other. Token will be used to send and receive the packets. It will be arranged in the form of ring. Now a days we are not using this topology. Star Topology: It is commonly used topology.  All the nodes are connected to central hub. Control will be more in this topology.  More secured topology. Most efficient topology. Down of any one node will not be interrupt the communication. Tree Topology: Combination of Bus and star topology. Complex topology. Mesh Topology: Used in WAN. More complex structure."
obs_answer1_5 = "There are different types of Topologies: Star topology: Most used topology is the STAR Topology. It has a central hub/server connected to all the other computer networks. Used in big companies. Bus topology: Here in this Topology there is a central line of connection between the different networking devices. Tree topology: It is a combination of star and bus topology. Ring topology: All the devices connected in a Ring structure. The information flows through only one direction. Also very popular but has its own disadvantage. Mesh topology: Combination of every type of topology is known as a Mesh Topology. It is used in big network communication."
obs_answer1_2 = "Topology is a pictorial representation of arrangement of hosts or working stations based on the requirement. There are 5 types of topology: Star topology: Here the hosts are connected to a main node/working station. The network connection is handled by the main node. It is arranged in the form of a star. Bus topology: Here the work station are connected through a single network. Ring topology: Here the work stations are arranged in the form of a ring and there is main node in this topology. This is not preferred these days. Mesh topology: It is the most complex topology is used over large area."
obs_answer1_0 = "The different types of topology are:- Star topology Bus topology Tree topology Star topology: In this if the main server is damaged then the system stop working but if one system damaged the rest will world. Bus topology: In this the data from each system checking. If not mailable moves to the next system. Tree topology: It is the combination of both star and bus topology."

In [99]:
remove_stopwords(req_answer)

'Pattern interconnection nodes called topology. Bus topology: All nodes connected single channel called bus. It terminals discard packets. Message broadcasted channel. Used LAN i.e Ethernet cable connection. Ring Topology: nodes connected other. Token send receive packets. It arranged form ring. Now days topology. Star Topology: It commonly topology. All nodes connected central hub. Control topology. More secured topology. Most efficient topology. Down node interrupt communication. Tree Topology: Combination Bus star topology. Complex topology. Mesh Topology: Used WAN. More complex structure.'

In [100]:
set_subset(req_answer, obs_answer1_5)

0.21333333333333335

In [101]:
set_subset(req_answer, obs_answer1_2)

0.16455696202531644

In [102]:
set_subset(req_answer, obs_answer1_0)

0.09722222222222222

#Testing

In [11]:
query_correct_answer = "The surface runoff frequently just disappears into sinkholes and swallow holes, where it flows as underground streams until emerging further downstream through a cave opening. Long, narrow to wide trenches known as valley sinks or uvalas emerge when sink holes and dolines connect together due to slumping of materials along their margins or due to roof collapse of caves."

query_incorrect_answer = "During the day the land heats up faster and become warmer than the sea. Therfre, over the land the air rises giving rise to a low pressure area, whereas the sea is relatively cool and the pressure over sea it relatively high. Thus, pressure gradient from sea to land is created and the wind blows from the sea to the land which is known sea breeze. In the night the reversal of condition takes place. The land loses heat faster and is cooler than the sea. The pressure gradient is from the land to the sea. This breeze is known land breeze. Dumbass! During the day the land heats up faster and become warmer than the sea."

In [12]:
database=pd.read_csv("Book1.csv", delimiter=',')
database.head(1)

Unnamed: 0,q_category,question,answer1,answer2,answer3,answer4,answer5
0,1,Explain the evolution of valley sinks or uvalas.,"Quite often the surface run-off simply goes down swallow and sink holes and flow as underground streams and reemerge at a distance downstream through a cave opening. When sink holes and dolines join together because of slumping of materials along their margins or due to roof collapse of caves, long, narrow to wide trenches called valley sinks or uvalas form.","Generally, the surface run-off simply goes down swallow and sink holes and flow as underground streams and re-emerge at a distance downstream through a cave opening. When sink holes and dolines join together because of slumping of materials along their margins or due to roof collapse of caves, long, narrow to wide trenches called valley sinks or Uvalas form.","A sink hole is on opening more or less circular at the top and funnel-shaped towards the bottom with sizes varying in area from a few sq.m to a hectare and with depth from a less than half a metre to thirty metres or more. Quite often the surface run-off simply goes down swallow and sink holes and flow as underground streams and re-emerge at a distance downstream through a cave opening. A doline is a collapsed sink. When sinkholes and dolines join together because of slumping of materials along their margins or due to roof collapse of caves, long narrow to wide trenches called valley sinks or uvalas form.","Normally, the run-off goes down swallow and sink holes and drifts as underground streams and re-emerges at a distance downstream through a cave starting. When sink holes and dolines are merged due to slumping of substances alongside their margins or because of roof collapse of caves, trenches known as valley sinks or Uvalas are formed.","Valley sinks or uvulas are the same things. Valley sinks are erosional landforms of Karst topography. The following way valley sinks are evolved: In dolomite, limestone, or karst zones, the rocks are permeable and are made up of highly fused and broken rocks. In karst areas, small to medium-sized shallow pits are formed due to seepage of surface water, and these shallow pits are called sinkholes. Surface water from these sinkholes drains into the underground. The bottom of these sinkholes forms underground caves and sinkholes sometimes collapse and are commonly called collapse sinks or dolines. Typically, surface runoff runs down through sinkholes and flows underground, and at some distance through cave openings again to the surface. valley sinks or uvulas are developed when sinkholes and doline join together due to the collapse of cave ceilings."


In [13]:
def answer_analysis(database, question_no, query):
    semantic_check_score = semantic_check(database=database, question_no=question_no,query=query)
    gen_grammar_check_score = gen_grammar_check(query)
    class_grammar_check_score = class_grammar_check(query)
    class_toxic_check_score = class_toxic_check(query)
    custom_toxic_check_score = custom_toxic_check(query)
    duplication_check_score = duplication_check(query)
    print(f"semantic check score (less than 1 for correct answer): {semantic_check_score}")
    print(f"generative grammar check score (1 means correct): {gen_grammar_check_score}")
    print(f"class-based grammar check score (1 means correct): {class_grammar_check_score}")
    print(f"default model toxic check score (1 means toxic!): {class_toxic_check_score}")
    print(f"custom model toxic check score (1 means toxic!): {custom_toxic_check_score}")
    print(f"duplication check score (0=>no, 1=>detected, 2=>extreme)): {duplication_check_score}")


In [14]:
answer_analysis(database,1,query_correct_answer)

gen: [array([[1.]]), array([[1.]]), array([[0.85283154]])]


CohereError: finetune cdb39157-6b82-4cb4-92c5-9e6037623d79-ft is paused, but will be deploying now. please try again in a few minutes.

In [15]:
answer_analysis(database,1,query_incorrect_answer)

gen: [array([[0.97896633]]), array([[0.97822709]]), array([[0.97573551]])]


CohereError: finetune cdb39157-6b82-4cb4-92c5-9e6037623d79-ft is deploying, please try again in a few minutes.

In [110]:
query = "Quite often the surface run-off simply goes down swallow and sink holes and flow as underground streams and reemerge at a distance downstream through a cave opening. When sink holes and dolines join together because of slumping of materials along their margins or due to roof collapse of caves, long, narrow to wide trenches called valley sinks or uvalas form."
semantic_check(database, 1, query) # complete answer (exact)

1.0


0.002149369800463319

In [111]:
query = "The surface runoff frequently just disappears into sinkholes and swallow holes, where it flows as underground streams until emerging further downstream through a cave opening. Long, narrow to wide trenches known as valley sinks or uvalas emerge when sink holes and dolines connect together due to slumping of materials along their margins or due to roof collapse of caves."
semantic_check(database, 1, query) # complete answer but paraphrased

0.5116279069767442


0.275849312543869

In [112]:
query = "Surface run-off frequently just flows down swallow and sink holes as subterranean streams, reemerging downstream through a cave mouth. Valley sinks or uvalas arise when sink holes and dolines connect together owing to slumping of materials along their edges or ceiling collapse of caves."
semantic_check(database, 1, query) # complete answer but extremely paraphrased

0.3488372093023256


0.332807719707489

In [113]:
query = "Surface run-off frequently just flows down swallow and sink holes as subterranean streams, reemerging downstream through a cave mouth. Valley sinks or uvalas arise when sink holes and dolines connect together owing to slumping of materials along their edges or ceiling collapse of caves. Surface run-off frequently just flows down swallow and sink holes as subterranean streams, reemerging downstream through a cave mouth."
semantic_check(database, 1, query) # complete answer but extremely paraphrased and dup

0.3181818181818182


0.37579426169395447

In [114]:
query = "Surface run-off frequently just flows down swallow and sink holes as subterranean streams, reemerging downstream through a cave mouth."
semantic_check(database, 1, query) # complete answer but incomplete but extremely paraphrased 

0.24324324324324326


0.6971410512924194

In [115]:
query = "Surface run-off frequently just flows down swallow and sink holes as subterranean streams, reemerging downstream through a cave mouth. Surface run-off frequently just flows down swallow and sink holes as subterranean streams, reemerging downstream through a cave mouth."
semantic_check(database, 1, query) # complete answer but incomplete but extremely paraphrased but dup

0.21052631578947367


0.6022235751152039

In [116]:
query = "Surface run-off frequently just flows down swallow and sink holes as subterranean streams."
semantic_check(database, 1, query) # very incomplete but extremely paraphrased

0.16666666666666666


0.8316517472267151

In [117]:
query = "Surface run-off frequently just flows down swallow and sink holes as subterranean streams. Surface run-off frequently just flows down swallow and sink holes as subterranean streams. Surface run-off frequently just flows down swallow and sink holes as subterranean streams."
semantic_check(database, 1, query) # very incomplete but extremely paraphrased but dup

0.16666666666666666


0.7320248484611511

In [118]:
query = "When sink holes and dolines join together because of slumping of materials along their margins or due to roof collapse of caves, long, narrow to wide trenches called valley sinks or uvalas form."
semantic_check(database, 1, query) # complete answer but incomplete (exact) 

0.3333333333333333


0.4655098617076874

In [119]:
query = "The surface runoff frequently just disappears into sinkholes and swallow holes, where it flows as underground streams until emerging further downstream through a cave opening."
semantic_check(database, 1, query) # complete answer but incomplete (but paraphrased)

0.2


0.7054688334465027

In [120]:
query = "Quite often the surface run-off simply goes down swallow and sink holes and flow as underground streams and reemerge at a distance downstream through a cave opening. When sink holes and dolines join together because of slumping of materials along their margins or due to roof collapse of caves, long, narrow to wide trenches called valley sinks or uvalas form. Quite often the surface run-off simply goes down swallow and sink holes and flow as underground streams and reemerge at a distance downstream through a cave opening."
semantic_check(database, 1, query) # complete answer (exact) and duplicated

1.0


0.252985417842865

In [121]:
query = "The surface runoff frequently just disappears into sinkholes and swallow holes, where it flows as underground streams until emerging further downstream through a cave opening. Long, narrow to wide trenches known as valley sinks or uvalas emerge when sink holes and dolines connect together due to slumping of materials along their margins or due to roof collapse of caves. The surface runoff frequently just disappears into sinkholes and swallow holes, where it flows as underground streams until emerging further downstream through a cave opening."
semantic_check(database, 1, query) # complete answer but paraphrased and duplicated

0.5116279069767442


0.3165651261806488

In [122]:
query = "When sink holes and dolines join together because of slumping of materials along their margins or due to roof collapse of caves, long, narrow to wide trenches called valley sinks or uvalas form. When sink holes and dolines join together because of slumping of materials along their margins or due to roof collapse of caves, long, narrow to wide trenches called valley sinks or uvalas form."
semantic_check(database, 1, query) # complete answer but incomplete (exact) and duplicated

0.3333333333333333


0.5324751138687134

In [123]:
query = "The surface runoff frequently just disappears into sinkholes and swallow holes, where it flows as underground streams until emerging further downstream through a cave opening. The surface runoff frequently just disappears into sinkholes and swallow holes, where it flows as underground streams until emerging further downstream through a cave opening."
semantic_check(database, 1, query) # complete answer but incomplete (but paraphrased) and duplicated

0.2


0.6661074161529541

In [124]:
query = "When sink holes and dolines join together because of slumping of materials along their margins or due to roof collapse of caves."
semantic_check(database, 1, query) # very incomplete (exact)

0.29411764705882354


0.7567181587219238

In [125]:
query = "The surface runoff frequently just disappears into sinkholes and swallow holes."
semantic_check(database, 1, query) # very incomplete (but paraphrased)

0.05128205128205128


0.8588343858718872

In [126]:
query = "When sink holes and dolines join together because of slumping of materials along their margins or due to roof collapse of caves. When sink holes and dolines join together because of slumping of materials along their margins or due to roof collapse of caves. When sink holes and dolines join together because of slumping of materials along their margins or due to roof collapse of caves."
semantic_check(database, 1, query) # very incomplete (exact) and extremely duplicated

0.2


0.7632430791854858

In [127]:
query = "The surface runoff frequently just disappears into sinkholes and swallow holes. The surface runoff frequently just disappears into sinkholes and swallow holes. The surface runoff frequently just disappears into sinkholes and swallow holes."
semantic_check(database, 1, query) # very incomplete (but paraphrased) and extremely duplicated

0.05128205128205128


0.8007097840309143

In [128]:
query = "Quite often the surface run-off simply goes down swallow and sink holes and flow as underground streams and reemerge at a distance downstream through a cave opening. During the day the land heats up faster and become warmer than the sea. Therfre, over the land the air rises giving rise to a low pressure area, whereas the sea is relatively cool and the pressure over sea it relatively high."
semantic_check(database, 1, query) # complete answer (exact) + incorrect

0.2641509433962264


0.8410668969154358

In [129]:
query = "Quite often the surface run-off simply goes down swallow and sink holes and flow as underground streams and reemerge at a distance downstream through a cave opening. During the day the land heats up faster and become warmer than the sea. Therfre, over the land the air rises giving rise to a low pressure area, whereas the sea is relatively cool and the pressure over sea it relatively high. Quite often the surface run-off simply goes down swallow and sink holes and flow as underground streams and reemerge at a distance downstream through a cave opening. During the day the land heats up faster and become warmer than the sea. "
semantic_check(database, 1, query) # complete answer (exact) + incorrect (correct dup)

0.2641509433962264


0.7823556065559387

In [130]:
query = "Quite often the surface run-off simply goes down swallow and sink holes and flow as underground streams and reemerge at a distance downstream through a cave opening. During the day the land heats up faster and become warmer than the sea. Therfre, over the land the air rises giving rise to a low pressure area, whereas the sea is relatively cool and the pressure over sea it relatively high. During the day the land heats up faster and become warmer than the sea. Therfre, over the land the air rises giving rise to a low pressure area, whereas the sea is relatively cool and the pressure over sea it relatively high."
semantic_check(database, 1, query) # complete answer (exact) + incorrect (incorrect dup)

0.2641509433962264


0.9282629489898682

In [131]:
query = "Quite often the surface run-off simply goes down swallow and sink holes and flow as underground streams and reemerge at a distance downstream through a cave opening. Quite often the surface run-off simply goes down swallow and sink holes and flow as underground streams and reemerge at a distance downstream through a cave opening. Quite often the surface run-off simply goes down swallow and sink holes and flow as underground streams and reemerge at a distance downstream through a cave opening. During the day the land heats up faster and become warmer than the sea. Therfre, over the land the air rises giving rise to a low pressure area, whereas the sea is relatively cool and the pressure over sea it relatively high. Quite often the surface run-off simply goes down swallow and sink holes and flow as underground streams and reemerge at a distance downstream through a cave opening."
semantic_check(database, 1, query) # complete answer (exact) + incorrect

0.2641509433962264


0.6884654760360718

In [180]:
query = "The surface runoff frequently just disappears into sinkholes and swallow holes, where it flows as underground streams until emerging further downstream through a cave opening. During the day the land heats up faster and become warmer than the sea. Therfre, over the land the air rises giving rise to a low pressure area, whereas the sea is relatively cool and the pressure over sea it relatively high."
semantic_check(database, 1, query) # complete answer but paraphrased + incorrect

0.13793103448275862


0.8982481956481934

In [179]:
query = "The surface runoff frequently just disappears into sinkholes and swallow holes, where it flows as underground streams until emerging further downstream through a cave opening. During the day the land heats up faster and become warmer than the sea. Therfre, over the land the air rises giving rise to a low pressure area, whereas the sea is relatively cool and the pressure over sea it relatively high. The surface runoff frequently just disappears into sinkholes and swallow holes, where it flows as underground streams until emerging further downstream through a cave opening."
semantic_check(database, 1, query) # complete answer but paraphrased + incorrect + dup correct

0.13793103448275862


0.752900242805481

In [134]:
query = "The surface runoff frequently just disappears into sinkholes and swallow holes, where it flows as underground streams until emerging further downstream through a cave opening. During the day the land heats up faster and become warmer than the sea. The surface runoff frequently just disappears into sinkholes and swallow holes, where it flows as underground streams until emerging further downstream through a cave opening. During the day the land heats up faster and become warmer than the sea. Therfre, over the land the air rises giving rise to a low pressure area, whereas the sea is relatively cool and the pressure over sea it relatively high. The surface runoff frequently just disappears into sinkholes and swallow holes, where it flows as underground streams until emerging further downstream through a cave opening."
semantic_check(database, 1, query) # complete answer but paraphrased + incorrect + dup correct extreeme

0.13793103448275862


0.7369017601013184

In [135]:
query = "The surface runoff frequently just disappears into sinkholes and swallow holes, where it flows as underground streams until emerging further downstream through a cave opening. During the day the land heats up faster and become warmer than the sea. Therfre, over the land the air rises giving rise to a low pressure area, whereas the sea is relatively cool and the pressure over sea it relatively high. During the day the land heats up faster and become warmer than the sea. Therfre, over the land the air rises giving rise to a low pressure area, whereas the sea is relatively cool and the pressure over sea it relatively high."
semantic_check(database, 1, query) # complete answer but paraphrased + incorrect + dup incorrect

0.13793103448275862


0.9982625842094421

In [136]:
query = "The surface runoff frequently just disappears into sinkholes and swallow holes, where it flows as underground streams until emerging further downstream through a cave opening. During the day the land heats up faster and become warmer than the sea. Therfre, over the land the air rises giving rise to a low pressure area, whereas the sea is relatively cool and the pressure over sea it relatively high. The surface runoff frequently just disappears into sinkholes and swallow holes, where it flows as underground streams until emerging further downstream through a cave opening. During the day the land heats up faster and become warmer than the sea. Therfre, over the land the air rises giving rise to a low pressure area, whereas the sea is relatively cool and the pressure over sea it relatively high."
semantic_check(database, 1, query) # complete answer but paraphrased + incorrect + dup both

0.13793103448275862


0.7950761914253235

In [137]:
query = "Surface run-off frequently just flows down swallow and sink holes as subterranean streams, reemerging downstream through a cave mouth. During the day the land heats up faster and become warmer than the sea. Therfre, over the land the air rises giving rise to a low pressure area, whereas the sea is relatively cool and the pressure over sea it relatively high."
semantic_check(database, 1, query) # complete answer but extremely paraphrased + incorrect

0.14285714285714285


0.830892026424408

In [44]:
query = "Surface run-off frequently just flows down swallow and sink holes as subterranean streams, reemerging downstream through a cave mouth. During the day the land heats up faster and become warmer than the sea. Therfre, over the land the air rises giving rise to a low pressure area, whereas the sea is relatively cool and the pressure over sea it relatively high. Surface run-off frequently just flows down swallow and sink holes as subterranean streams, reemerging downstream through a cave mouth."
semantic_check(database, 1, query) # complete answer but extremely paraphrased + incorrect + correct dup

0.7125484943389893

In [45]:
query = "Surface run-off frequently just flows down swallow and sink holes as subterranean streams, reemerging downstream through a cave mouth. Surface run-off frequently just flows down swallow and sink holes as subterranean streams, reemerging downstream through a cave mouth. During the day the land heats up faster and become warmer than the sea. Therfre, over the land the air rises giving rise to a low pressure area, whereas the sea is relatively cool and the pressure over sea it relatively high. Surface run-off frequently just flows down swallow and sink holes as subterranean streams, reemerging downstream through a cave mouth."
semantic_check(database, 1, query) # complete answer but extremely paraphrased + incorrect + correct dup extreme

0.6847242712974548

In [46]:
query = "Surface run-off frequently just flows down swallow and sink holes as subterranean streams, reemerging downstream through a cave mouth. During the day the land heats up faster and become warmer than the sea. Therfre, over the land the air rises giving rise to a low pressure area, whereas the sea is relatively cool and the pressure over sea it relatively high. During the day the land heats up faster and become warmer than the sea. Therfre, over the land the air rises giving rise to a low pressure area, whereas the sea is relatively cool and the pressure over sea it relatively high."
semantic_check(database, 1, query) # complete answer but extremely paraphrased + incorrect + incorrect dup

0.8826870918273926

In [47]:
query = "Surface run-off frequently just flows down swallow and sink holes as subterranean streams, reemerging downstream through a cave mouth. During the day the land heats up faster and become warmer than the sea. Therfre, over the land the air rises giving rise to a low pressure area, whereas the sea is relatively cool and the pressure over sea it relatively high. Surface run-off frequently just flows down swallow and sink holes as subterranean streams, reemerging downstream through a cave mouth. During the day the land heats up faster and become warmer than the sea. Therfre, over the land the air rises giving rise to a low pressure area, whereas the sea is relatively cool and the pressure over sea it relatively high."
semantic_check(database, 1, query) # complete answer but extremely paraphrased + incorrect

0.7562952637672424

In [138]:
query = "During the day the land heats up faster and become warmer than the sea."
semantic_check(database, 1, query) # incorrect answer (small)

0.0


1.3018137216567993

In [139]:
query = "During the day the land heats up faster and become warmer than the sea. During the day the land heats up faster and become warmer than the sea."
semantic_check(database, 1, query) # incorrect answer (small) dup

0.0


1.2998847961425781

In [140]:
query = "During the day the land heats up faster and become warmer than the sea. During the day the land heats up faster and become warmer than the sea. During the day the land heats up faster and become warmer than the sea. During the day the land heats up faster and become warmer than the sea."
semantic_check(database, 1, query) # incorrect answer (small) ext dup

0.0


1.3155983686447144

In [141]:
query = "During the day the land heats up faster and become warmer than the sea. Therfre, over the land the air rises giving rise to a low pressure area, whereas the sea is relatively cool and the pressure over sea it relatively high. Thus, pressure gradient from sea to land is created and the wind blows from the sea to the land which is known sea breeze. In the night the reversal of condition takes place. The land loses heat faster and is cooler than the sea. The pressure gradient is from the land to the sea. This breeze is known land breeze. Dumbass! During the day the land heats up faster and become warmer than the sea."
semantic_check(database, 1, query) # incorrect answer (large)

0.02247191011235955


1.3219001293182373

In [161]:
query = "During the day the land heats up faster and become warmer than the sea. Therfre, over the land the air rises giving rise to a low pressure area, whereas the sea is relatively cool and the pressure over sea it relatively high. Thus, pressure gradient from sea to land is created and the wind blows from the sea to the land which is known sea breeze. In the night the reversal of condition takes place. The land loses heat faster and is cooler than the sea. The pressure gradient is from the land to the sea. This breeze is known land breeze. Dumbass! During the day the land heats up faster and become warmer than the sea. During the day the land heats up faster and become warmer than the sea. Therfre, over the land the air rises giving rise to a low pressure area, whereas the sea is relatively cool and the pressure over sea it relatively high. In the night the reversal of condition takes place. The land loses heat faster and is cooler than the sea."
semantic_check(database, 1, query) # incorrect answer (large) + dup

OSError: Unable to open: Invalid argument (22)

In [176]:
query = "During the day the land heats up faster and become warmer than the sea. Therfre, over the land the air rises giving rise to a low pressure area, whereas the sea is relatively cool and the pressure over sea it relatively high. Thus, pressure gradient from sea to land is created and the wind blows from the sea to the land which is known sea breeze. In the night the reversal of condition takes place. The land loses heat faster and is cooler than the sea. The pressure gradient is from the land to the sea. This breeze is known land breeze. Dumbass! During the day the land heats up faster and become warmer than the sea. During the day the land heats up faster and become warmer than the sea. Therfre, over the land the air rises giving rise to a low pressure area, whereas the sea is relatively cool and the pressure over sea it relatively high. In the night the reversal of condition takes place. The land loses heat faster and is cooler than the sea. During the day the land heats up faster and become warmer than the sea. Therfre, over the land the air rises giving rise to a low pressure area, whereas the sea is relatively cool and the pressure over sea it relatively high. Thus, pressure gradient from sea to land is created and the wind blows from the sea to the land which is known sea breeze. In the night the reversal of condition takes place. The land loses heat faster and is cooler than the sea. The pressure gradient is from the land to the sea. This breeze is known land breeze. Dumbass! During the day the land heats up faster and become warmer than the sea. During the day the land heats up faster and become warmer than the sea. Therfre, over the land the air rises giving rise to a low pressure area, whereas the sea is relatively cool and the pressure over sea it relatively high. In the night the reversal of condition takes place. The land loses heat faster and is cooler than the sea."
semantic_check(database, 1, query) # incorrect answer (large) + dup + ext dup

0.02247191011235955


1.3185865879058838

In [177]:
query = " "
semantic_check(database, 1, query) # boundary case: space

0.0


1.405097246170044

In [178]:
query = ""
semantic_check(database, 1, query) # boundary case: empty string

0.0


1.405097246170044