In [1]:
import string
import pandas as pd
import jellyfish
import random

# Introducing Helper Functions

In [2]:
def dice_coefficient(a, b):
    """dice coefficient 2nt/(na + nb)."""
    a_bigrams = set(a.lower())
    b_bigrams = set(b.lower())
    overlap = len(a_bigrams & b_bigrams)
    return overlap * 2.0 / (len(a_bigrams) + len(b_bigrams))

In [3]:
def clear_special_characters(s1, s2):
    invalidcharacters = set(string.punctuation)
    if any(char in invalidcharacters for char in s1):
        s1_ = s1.lower().translate(str.maketrans('', '', string.punctuation))
    else:
        s1_ = s1
    if any(char in invalidcharacters for char in s2):
        s2_ = s2.lower().translate(str.maketrans('', '', string.punctuation))
    else:
        s2_ = s2
    return s1_, s2_

In [13]:
def stop_words_handling(term):
    splitted_term = term.split()
    stop_words = set(["for", "and", "of", "in", "via", "be"])
    
    if splitted_term[0] in stop_words:
        stop_words = stop_words - set([splitted_term[0]])
                
    for sw in stop_words:
        while sw in splitted_term:
            splitted_term.remove(sw)
    sanitized_term = " ".join([w for w in splitted_term]) 
        
    return sanitized_term

In [14]:
def clean_string(s):
    s_lower = s.lower()
    invalidcharacters = set(string.punctuation)
    if any(char in invalidcharacters for char in s):
        s_ = s_lower.translate(str.maketrans('', '', string.punctuation))
    else:
        s_ = s_lower
    return s_

In [15]:
def clean_string_pair_and_reduce_expansion(abb, term):
    abb_lower = abb.lower()
    term_lower = term.lower()
    sanitized_abbv, sanitized_term = clear_special_characters(abb_lower, term_lower) 
    sanitized_term_without_stopswords = stop_words_handling(sanitized_term)
    initial_letters_of_tokens_of_sanitized_term_without_stopswords = ''.join([c[0] for c in sanitized_term_without_stopswords.split()])
    return sanitized_abbv, initial_letters_of_tokens_of_sanitized_term_without_stopswords

# Characteristics of abbreviation data

In [6]:
data = pd.read_csv('abbr_db.CSV', names=['abbr', 'long_forms'], sep=';', encoding='utf8')
abbr = list(data['abbr'].values)
expansions = list(data['long_forms'].values)
for i, abb in enumerate(abbr):
    print(str(i) + ": " + abb + "| " + expansions[i])

0: AA| Anti-alias
1: AAC| Advanced Audio Coding
2: AAM| autmoatic acoustic management
3: AAS| Auto Area Segmentation
4: AAS| as a service
5: ABC| Atanasoff-Berry Computer
6: ABIOS| Advanced BIOS
7: ABP| AdBlock Plus
8: AC| alternating current
9: ACCT| account
10: ACE| access control entry
11: Ack| acknowledgment
12: ACL| access control list
13: ACM| Association for Computing Machinery
14: ACPI| Advanced Configuration and Power Interface
15: ACR| annual compliance report
16: ACR| actual cell rate
17: ACR| attenuation crosstalk ratio
18: ACR| absolute cell reference
19: ACS| access control system
20: AD| Active Directory
21: ADB| Apple Desktop Bus
22: ADB| Android Debug Bridge
23: ADC| analog-to-digital
24: ADO| ActiveX Data Object
25: ADPCM| adaptive delta pulse code modulation
26: ADSI| analog display service interface
27: ADSI| Active Directory Service Interface
28: ADSL| asymmetric digital subscriber line
29: ADSM| ADSTAR Distributed Storage Management
30: AE| automatic exposure
31: 

In [16]:
# Average distance before pre-processing between (a,t)
similarity_measures = [jellyfish.levenshtein_distance, jellyfish.jaro_winkler_similarity, dice_coefficient]
for sim in similarity_measures:
    tmp_sim = 0
    for index, abb in enumerate(abbr):
        e = expansions[index]
        pot_abb = ''.join([c[0] for c in e.split()])
        if sim == jellyfish.levenshtein_distance:
            tmp_sim = tmp_sim + (1- (sim(abb, e))/max(len(abb), len(e)))
        else:
            tmp_sim = tmp_sim + sim(abb, e)
    print(tmp_sim/len(abbr))

0.09244587264953216
0.3096125301007985
0.41868180511878067


In [24]:
# Average distance before pre-processing between (a^{c},t^{c})
similarity_measures = [jellyfish.levenshtein_distance, jellyfish.jaro_winkler_similarity, dice_coefficient]
for sim in similarity_measures:
    tmp_sim = 0
    for index, abb in enumerate(abbr):
        a_clean = clean_string(abb)
        t_clean = clean_string(expansions[index])
        if sim == jellyfish.levenshtein_distance:
            tmp_sim = tmp_sim + (1- (sim(a_clean, t_clean))/max(len(a_clean), len(t_clean)))
        else:
            tmp_sim = tmp_sim + sim(a_clean, t_clean)
    print(tmp_sim/len(abbr))

0.18185380910770227
0.6364646881747009
0.4223538221671407


In [18]:
# Average distance before pre-processing between (a, â)
similarity_measures = [jellyfish.levenshtein_distance, jellyfish.jaro_winkler_similarity, dice_coefficient]
for sim in similarity_measures:
    tmp_sim = 0
    for index, abb in enumerate(abbr):
        exp = expansions[index]
        pot_abb = ''.join([c[0] for c in exp.split()])
        if sim == jellyfish.levenshtein_distance:
            tmp_sim = tmp_sim + (1- (sim(abb, pot_abb))/max(len(abb), len(pot_abb)))
        else:
            tmp_sim = tmp_sim + sim(abb, pot_abb)
    print(tmp_sim/len(abbr))

0.3613525520945073
0.42247094680421177
0.8611033252550622


In [19]:
# Average distance after pre-processing between (a^{c}, â^{c})
similarity_measures = [jellyfish.levenshtein_distance, jellyfish.jaro_winkler_similarity, dice_coefficient]
for sim in similarity_measures:
    tmp_sim = 0
    for index, abb in enumerate(abbr):
        abb_cleaned, exp_cleaned = clean_string_pair_and_reduce_expansion(abb, expansions[index])
        if sim == jellyfish.levenshtein_distance:
            tmp_sim = tmp_sim + (1 - (sim(abb_cleaned, exp_cleaned))/max(len(abb_cleaned), len(exp_cleaned)))
        else:
            tmp_sim = tmp_sim + sim(abb_cleaned, exp_cleaned)
    print(tmp_sim/len(abbr))

0.7961787056286678
0.8958528772830068
0.864156008542907


In [9]:
# Average length after pre-processing
tmp_len = 0
for abb in abbr:
    abb_, x = clean_string_pair_and_reduce_expansion(abb, "Platzhalter")
    tmp_len = tmp_len + len(abb_)
print(tmp_len/len(abbr))

3.5498320268756998


In [10]:
# construction and cardinality of S:
S = set()
for i, abb in enumerate(abbr):
    for j, exp in enumerate (expansions):
        if abb != abbr[j]:
            S.add((abb, exp))
print(len(S))

2710125


# ILLOD with its Methods

In [160]:
def check_initial_letters(a, t):
    initial_letters_of_tokens_of_t = ''.join([c[0] for c in t.split()])
    if initial_letters_of_tokens_of_t == a or initial_letters_of_tokens_of_t.upper() == a:
        return True

In [161]:
def check_length_consistency(a, t):
    length_consistency = False
    if len(t.split()) <= len(a):
        length_consistency = True
    return length_consistency

In [177]:
def check_order(a, t):
    abbv_reversed = a.lower()[::-1]
    term_reversed = t.lower()[::-1]
    len_of_term = len(t)
    
    pos_memory = 0
    pos_memory_list = []
    order_matching_string_rev = ""
    
    for j, char_from_abbv in enumerate(abbv_reversed):
        if j == len(abbv_reversed) - 1 and len(pos_memory_list) > 0 and pos_memory == len(term_reversed):
            break
        else:
            for i, char_from_term in enumerate(term_reversed[pos_memory:]):
                if char_from_abbv == char_from_term:
                    order_matching_string_rev = order_matching_string_rev + char_from_abbv
                    pos_memory = pos_memory + i + 1
                    pos_memory_list.append(len_of_term - pos_memory)
                    break
    if order_matching_string_rev == abbv_reversed:
        return True, pos_memory_list[::-1]
    else:
        return False, []

In [163]:
def check_distribution_of_matching_characters(pos_of_chars_list, t):
    term_intervals = []
    len_of_term = len(t)
    i = 0
    while i < len_of_term:
        sublist = []
        j = i
        while j < len_of_term and t[j] != " ":
            sublist.append(j)
            j = j+ 1
        i = j+1
        term_intervals.append(sublist)
        
    splitted_term = t.split()      
    
    containment_list = []
    for i, interval in enumerate(term_intervals):
        contanment_sublist = []
        for pos in pos_of_chars_list:
            if (pos in interval) and (splitted_term[i][0] == t[pos]):
                contanment_sublist.append(0)
            elif pos in interval:
                contanment_sublist.append(interval.index(pos))
        if len(contanment_sublist) == 0:
            contanment_sublist.append(-1)
        containment_list.append(contanment_sublist)
    
    result_of_distribution_check = False
    if len(containment_list) <= 1:
        result_of_distribution_check = True
    elif len (containment_list) >= 2:
        non_zero_count = 0
        for sublist in containment_list[1:]:
            if len(sublist) == 1 and 0 not in sublist:
                non_zero_count += 1
        if non_zero_count == 0:
            result_of_distribution_check = True
    
    return result_of_distribution_check

In [164]:
def illod(abbv, term, threshold=None):
    if (abbv[0].lower() == term[0].lower()):
        
        
        ###################################### Step (a) ##########################################
        # check wether initial letters of tokens in t match with the letters in abbreviation
        if check_initial_letters(abbv, term):
            return True
        
        
        
        ###################################### Step (b) ########################################
        # clean abbreviation and term from special characters and stopwords
        a_, t_ = clean_string_pair_and_reduce_expansion(abbv, term)
        if a_ == t_:
            return True
        
        sanitized_abbv, sanitized_term = clear_special_characters(abbv, term) 
        sanitized_term_without_stopswords = stop_words_handling(sanitized_term)
        sanitized_term_without_stopswords_splitted  = sanitized_term_without_stopswords.split()
        
        ###################################### Step (c) ##########################################
        # Sequential call of the methods that check and compare lengths, order and distribution of characters
        length_consistency = check_length_consistency(sanitized_abbv, sanitized_term_without_stopswords)
        order, pos_of_chars_list = check_order(sanitized_abbv, sanitized_term_without_stopswords)
        distribution = check_distribution_of_matching_characters(pos_of_chars_list, sanitized_term_without_stopswords)


        if length_consistency and order and distribution:
            return True
        else:
            return False

        ################################## in case nothing matches #################################
    else:
        return False

# Classifiers based on syntactic similarity

In [165]:
def dice_coefficient_on_reduction_of_expansion (a, term, threshold):
    a_, t_ = clean_string_pair_and_reduce_expansion(a, term)
    if dice_coefficient(a_, t_) >= threshold:
        return True
    else:
        return False

In [166]:
def levensthein_distance_on_reduction_of_expansion(a, term, threshold):
    a_, t_ = clean_string_pair_and_reduce_expansion(a, term)
    if jellyfish.levenshtein_distance(a_, t_) <= threshold:
        return True
    else:
        return False

In [167]:
def jaro_winkler_similarity_on_reduction_of_expansion (a, term, threshold):
    a_, t_ = clean_string_pair_and_reduce_expansion(a, term)
    if jellyfish.jaro_winkler_similarity(a_, t_) >= threshold:
        return True
    else:
        return False

In [168]:
def simple_jaro_winkler_similarity (a, term, threshold):
    if jellyfish.jaro_winkler_similarity(a, term) >= threshold:
        return True
    else:
        return False

In [169]:
def simple_dice_coefficient(a, term, threshold):
    if dice_coefficient(a, term) >= threshold:
        return True
    else:
        return False

# Base Algorithm

In [170]:
def base_algo(abbv, term, threshold = None):
    valid_order, pos_of_chars_list = check_order(abbv, term)
    return valid_order

# Data and Evaluation

In [4]:
data = pd.read_csv('abbr_db.CSV', names=['abbr', 'long_forms'], sep=';', encoding='utf8')
abbr = list(data['abbr'].values)
expansions = list(data['long_forms'].values)

In [172]:
def find_and_count_false_negatives(algo, threshold):
    FN = 0
    for i, abb in enumerate(abbr):
        if not algo(abb, expansions[i], threshold):
            # print("\""+abb+"\""+", "+"\""+expansions[i]+"\"")
            FN += 1
    return FN, str(FN) + " FALSE NEGATIVES. Pairs that could not be detected out of " + str(len(abbr)) + " given pairs"

In [173]:
def find_and_count_false_positives(algo, threshold, alpha):
    test_set = []
    while len(test_set) <= alpha * len(abbr):
        rd1 = random.randint(0, len(abbr)-1)
        rd2 = random.randint(0, len(abbr)-1)
        if abbr[rd1] != abbr[rd2]:
            test_set.append ((abbr[rd1], expansions[rd2]))
            
    count_of_false_examples = 0
    FP = 0
    for j, tup in enumerate (test_set): 
        if algo(tup[0], tup[1], threshold):
            count_of_false_examples += 1
            FP +=1
    return FP, str(FP) + " FALSE POSITIVE detections out of " +  str(len(test_set)) + " created false examples"

In [174]:
for alpha in [8, 16, 24, 48, 72]:
    print("alpha: " + str(alpha))
    algorithms = [dice_coefficient_on_reduction_of_expansion, jaro_winkler_similarity_on_reduction_of_expansion]
    for algo in algorithms:
        max_f1 = 0
        best_values = []
        for th in range(0,101):
            th_ = th/100
            result_on_L = find_and_count_false_negatives(algo, th_)
            result_on_S = find_and_count_false_positives(algo, th_, alpha)
            FN = result_on_L[0]
            FP = result_on_S[0]
            TP = len(abbr) - FN
            precision = TP/(TP + FP)
            recall = TP/(TP + FN)
            f1 = (2*precision*recall)/(precision+recall)
            if f1 > max_f1:
                best_values = [th_, precision, recall, f1]
                max_f1 = f1
        print(best_values)
    print("#########################################################")

alpha: 8
[0.67, 0.9251837007348029, 0.7754759238521837, 0.8437404812671337]
[0.75, 0.9021932424422051, 0.8521836506159015, 0.8764756694500432]
#########################################################
alpha: 16
[0.71, 0.8491722869405273, 0.7754759238521837, 0.8106526192566579]
[0.79, 0.9346049046321526, 0.7681970884658454, 0.8432698217578366]
#########################################################
alpha: 24
[0.76, 0.8085867620751341, 0.7592385218365062, 0.7831360092405428]
[0.79, 0.9014454664914586, 0.7681970884658454, 0.8295042321644497]
#########################################################
alpha: 48
[0.84, 0.8721017202692596, 0.6528555431131019, 0.7467178994556517]
[0.83, 0.8560460652591171, 0.7491601343784995, 0.7990444908928038]
#########################################################
alpha: 72
[0.82, 0.805939226519337, 0.6534154535274356, 0.7217068645640075]
[0.84, 0.9012527634487841, 0.6847704367301232, 0.7782373528475979]
##################################################

In [175]:
for alpha in [8, 16, 24, 48, 72]:
    print("alpha: " + str(alpha))
    max_f1 = 0
    best_values = []
    for th_ in range(0,4):
        result_on_L = find_and_count_false_negatives(levensthein_distance_on_reduction_of_expansion, th_)
        result_on_S = find_and_count_false_positives(levensthein_distance_on_reduction_of_expansion, th_, alpha)
        FN = result_on_L[0]
        FP = result_on_S[0]
        TP = len(abbr) - FN
        precision = TP/(TP + FP)
        recall = TP/(TP + FN)
        f1 = (2*precision*recall)/(precision+recall)
        if f1 > max_f1:
            best_values = [th_, precision, recall, f1]
            max_f1 = f1
    print(best_values)
print("#########################################################")

alpha: 8
[1, 0.9177295918367347, 0.8057110862262038, 0.8580799045915325]
alpha: 16
[1, 0.8251146788990825, 0.8057110862262038, 0.8152974504249291]
alpha: 24
[1, 0.7638004246284501, 0.8057110862262038, 0.7841961852861035]
alpha: 48
[0, 0.9900891972249752, 0.5593505039193729, 0.7148479427549195]
alpha: 72
[0, 0.9794117647058823, 0.5593505039193729, 0.7120456165359944]
#########################################################


In [178]:
for alpha in [8, 16, 24, 48, 72]:
    print("alpha: " + str(alpha))
    algorithms = [illod, base_algo]
    for algo in algorithms:
        th_ = 0
        result_on_L = find_and_count_false_negatives(algo, th_)
        result_on_S = find_and_count_false_positives(algo, th_, alpha)
        FN = result_on_L[0]
        FP = result_on_S[0]
        TP = len(abbr) - FN
        if FP == 0:
            FP = 1
        if TP == 0:
            TP = 1
        if FN == 0:
            FN = 1
        precision = TP/(TP + FP)
        recall = TP/(TP + FN)
        f1 = (2*precision*recall)/(precision+recall)
        evaluation_values = [precision, recall, f1]
        print(evaluation_values)

alpha: 8
[0.9810975609756097, 0.9008958566629339, 0.9392877991827202]
[0.5913194444444444, 0.9535274356103024, 0.7299614230604372]
alpha: 16
[0.9722054380664653, 0.9008958566629339, 0.9351932577739029]
[0.4117504835589942, 0.9535274356103024, 0.5751435325903411]
alpha: 24
[0.9617453676031081, 0.9008958566629339, 0.9303266840127203]
[0.3227212431305666, 0.9535274356103024, 0.4822313464533484]
alpha: 48
[0.9305957200694043, 0.9008958566629339, 0.9155049786628735]
[0.19057743957027753, 0.9535274356103024, 0.3176646148106696]
alpha: 72
[0.8998881431767338, 0.9008958566629339, 0.9003917179630666]
[0.13878249531415532, 0.9535274356103024, 0.2422992103578288]


In [179]:
for alpha in [8, 16, 24, 48, 72]:
    print("alpha: " + str(alpha))
    algorithms = [simple_dice_coefficient, simple_jaro_winkler_similarity]
    for algo in algorithms:
        max_f1 = 0
        best_values = []
        for th in range(0,101):
            th_ = th/100
            result_on_L = find_and_count_false_negatives(algo, th_)
            result_on_S = find_and_count_false_positives(algo, th_, alpha)
            FN = result_on_L[0]
            FP = result_on_S[0]
            TP = len(abbr) - FN
            if FP == 0:
                FP = 1
            if TP == 0:
                TP = 1
            if FN == 0:
                FN = 1
            precision = TP/(TP + FP)
            recall = TP/(TP + FN)
            f1 = (2*precision*recall)/(precision+recall)
            if f1 > max_f1:
                best_values = [th_, precision, recall, f1]
                max_f1 = f1
        print(best_values)
    print("#########################################################")

alpha: 8
[0.36, 0.32583957812933667, 0.6573348264277715, 0.43570235665244017]
[0.52, 0.6700091157702825, 0.41153415453527437, 0.509885535900104]
#########################################################
alpha: 16
[0.37, 0.19894086496028243, 0.6310190369540873, 0.302509730237552]
[0.52, 0.49528301886792453, 0.41153415453527437, 0.44954128440366975]
#########################################################
alpha: 24
[0.38, 0.14744370054777844, 0.5425531914893617, 0.23187365398420673]
[0.55, 0.5366269165247018, 0.3527435610302352, 0.4256756756756757]
#########################################################
alpha: 48
[0.41, 0.08602778102019376, 0.41265397536394177, 0.14237419105573262]
[0.57, 0.40355677154582764, 0.3303471444568869, 0.36330049261083747]
#########################################################
alpha: 72
[0.61, 0.14925373134328357, 0.08958566629339305, 0.1119664100769769]
[0.58, 0.36629526462395545, 0.2945128779395297, 0.32650527622594666]
#################################

# Characteristics of requirements data

In [None]:
nlp = spacy.load("en_core_web_sm")

In [None]:
pure_data = pd.read_csv('pure_data.CSV', names=["dataset", "id", "req_texts"], sep='\t', encoding='utf8')
ids = list(pure_data['id'].values)
reqs = list(pure_data['req_texts'].values)
dataset = list(pure_data['dataset'].values)

In [None]:
def upper_ratio(w):
    upper_cases = ''.join([c for c in w if c.isupper()])
    return len(upper_cases)/len(w)

In [None]:
abbv_set = set()
ordinary_terms = set()
for req in reqs:
    doc = nlp(req)
    for chunk in doc.noun_chunks:
        term = chunk.text
        term_splitted = term.split()
        abbreviation_in_term = False
        for word in term_splitted:
            if len(word) <= 8 and upper_ratio(word) >= 0.33:
                abbv_set.add(word)
            else:
                ordinary_terms.add(term)
print(len(ordinary_terms))
print(len(abbv_set))
print(len(ordinary_terms)/len(abbv_set))

In [None]:
print(abbv_set)

In [None]:
def strict_base_algo(SF, LF):
    lIndex = len(LF)-1
    for sIndex in range (len(SF)-1, 0, -1):
        currCharr = SF[sIndex].lower()
        if not currCharr.isalnum():
            continue
        while (lIndex >= 0 and LF[lIndex].lower() != currCharr) or ((sIndex == 0) and (lIndex>0) and LF[lIndex-1].isalnum()):
            lIndex = lIndex - 1
        if(lIndex < 0):
            return False
        lIndex = lIndex -1
        
    lIndex = LF[:lIndex].rfind(" ")+1
    return LF[lIndex:]

In [None]:
# some testing of order matching

In [60]:
def check_order_old_version(a, t):
    abbv_reversed = a.lower()[::-1]
    term_reversed = t.lower()[::-1]
    len_of_term = len(t)
    
    pos_memory = 0
    pos_memory_list = []
    order_matching_string_rev = ""
    
    for j, char_from_abbv in enumerate(abbv_reversed):
        if j == len(abbv_reversed) - 1:
            if char_from_abbv == term_reversed[-1]:
                order_matching_string_rev = order_matching_string_rev + char_from_abbv
                pos_memory_list.append(0)
        else:
            for i, char_from_term in enumerate(term_reversed[pos_memory:]):
                if char_from_abbv == char_from_term:
                    order_matching_string_rev = order_matching_string_rev + char_from_abbv
                    pos_memory = pos_memory + i + 1
                    pos_memory_list.append(len_of_term - pos_memory)
                    break
    if order_matching_string_rev == abbv_reversed:
        return True, pos_memory_list[::-1], order_matching_string_rev
    else:
        return False, [], order_matching_string_rev

In [98]:
def check_order_new_version(a, t):
    abbv_reversed = a.lower()[::-1]
    term_reversed = t.lower()[::-1]
    len_of_term = len(t)
    
    pos_memory = 0
    pos_memory_list = []
    order_matching_string_rev = ""
    
    for j, char_from_abbv in enumerate(abbv_reversed):
        if j == len(abbv_reversed) - 1:
            print(j)
            if len(pos_memory_list) > 0 and pos_memory == len(term_reversed)-1:
                order_matching_string_rev = order_matching_string_rev + char_from_abbv
                pos_memory_list.append(0)
                break
        else:
            for i, char_from_term in enumerate(term_reversed[pos_memory:]):
                if char_from_abbv == char_from_term:
                    order_matching_string_rev = order_matching_string_rev + char_from_abbv
                    pos_memory = pos_memory + i + 1
                    pos_memory_list.append(len_of_term - pos_memory)
                    print(pos_memory_list)
                    break
    if order_matching_string_rev == abbv_reversed:
        return True, pos_memory_list[::-1], order_matching_string_rev
    else:
        return False, [], order_matching_string_rev

In [13]:
def check_order_new_version(a, t):
    abbv_reversed = a.lower()[::-1]
    term_reversed = t.lower()[::-1]
    len_of_term = len(t)
    
    pos_memory = 0
    pos_memory_list = []
    order_matching_string_rev = ""
    
    for j, char_from_abbv in enumerate(abbv_reversed):
        # print("j = " +str(j))
        if j == len(abbv_reversed) - 1 and len(pos_memory_list) > 0 and pos_memory == len(term_reversed):
            break
        else:
            for i, char_from_term in enumerate(term_reversed[pos_memory:]):
                # print("i = " +str(i))
                if char_from_abbv == char_from_term:
                    order_matching_string_rev = order_matching_string_rev + char_from_abbv
                    pos_memory = pos_memory + i + 1
                    pos_memory_list.append(len_of_term - pos_memory)
                    break
    if order_matching_string_rev == abbv_reversed:
        return True, pos_memory_list[::-1], order_matching_string_rev
    else:
        return False, [], order_matching_string_rev

In [6]:
print(check_order_old_version("AA", "Anti ant"))

NameError: name 'check_order_old_version' is not defined

In [7]:
print(check_order_old_version("AA", "Anti"))

NameError: name 'check_order_old_version' is not defined

In [8]:
print(check_order_new_version("AA", "Anti ant"))

j = 0
i = 0
i = 1
i = 2
j = 1
i = 0
i = 1
i = 2
i = 3
i = 4
(True, [0, 5], 'aa')


In [9]:
print(check_order_new_version("AA", "Anti"))

j = 0
i = 0
i = 1
i = 2
i = 3
j = 1
(False, [], 'a')


In [17]:
count = 0
for i, abb in enumerate(abbr):
    # print(abb + "; " + expansions[i])
    if not check_order_new_version(abb, expansions[i])[0]:
        count += 1
        print(abb + "; " + expansions[i])
        print("new_version: FALSE!")
        print(count)
        print("##############################################")

ADC; analog-to-digital
new_version: FALSE!
1
##############################################
ASN.1; abstract syntax number one
new_version: FALSE!
2
##############################################
B2B; business-to-business
new_version: FALSE!
3
##############################################
B2B; business-to-consumer
new_version: FALSE!
4
##############################################
Bi-di; bidirectional
new_version: FALSE!
5
##############################################
CDA; compact audio disc
new_version: FALSE!
6
##############################################
CD-i; Compact Disc Interactive
new_version: FALSE!
7
##############################################
CD-R; Compact Disc Recordable
new_version: FALSE!
8
##############################################
CD-ROM; Compact Disc Read-Only Memory
new_version: FALSE!
9
##############################################
CD-RW; Compact Disc Re-Writable
new_version: FALSE!
10
##############################################
CD-WO; Compact Disc Writ