In [243]:
import spacy
import de_core_news_sm
nlp = de_core_news_sm.load()
from spacy.matcher import Matcher
from charsplit import Splitter
import string
splitter = Splitter()
import pandas as pd
import jellyfish
import random

In [244]:
def clear_special_characters(s1, s2):
    invalidcharacters = set(string.punctuation)
    if any(char in invalidcharacters for char in s1):
        s1_ = s1.lower().translate(str.maketrans('', '', string.punctuation))
    else:
        s1_ = s1
    if any(char in invalidcharacters for char in s2):
        s2_ = s2.lower().translate(str.maketrans('', '', string.punctuation))
    else:
        s2_ = s2
    return s1_, s2_

In [245]:
def stop_words_handling(term):
    splitted_term = term.split()
    stop_words = set(["for", "and", "of", "in", "&", "via", "be"])
    
    if splitted_term[0] in stop_words:
        stop_words = stop_words - set([splitted_term[0]])
                
    for sw in stop_words:
        while sw in splitted_term:
            splitted_term.remove(sw)
    sanitized_term = " ".join([w for w in splitted_term]) 
        
    return sanitized_term

In [246]:
def check_initial_letters(a, t):
    initial_letters_of_tokens_of_t = ''.join([c[0] for c in t.split()])
    if initial_letters_of_tokens_of_t == a or initial_letters_of_tokens_of_t.upper() == a:
        return True

In [247]:
def check_length_consistency(a, t):
    length_consistency = False
    if len(t.split()) <= len(a):
        length_consistency = True
    return length_consistency

In [248]:
def check_order(a, t):
    abbv_reversed = a.lower()[::-1]
    term_reversed = t.lower()[::-1]
    len_of_term = len(t)
    
    pos_memory = 0
    pos_memory_list = []
    order_matching_string_rev = ""
    
    for j, char_from_abbv in enumerate(abbv_reversed):
        if j == len(abbv_reversed) - 1:
            if char_from_abbv == term_reversed[-1]:
                order_matching_string_rev = order_matching_string_rev + char_from_abbv
                pos_memory_list.append(0)
        else:
            for i, char_from_term in enumerate(term_reversed[pos_memory:]):
                if char_from_abbv == char_from_term:
                    order_matching_string_rev = order_matching_string_rev + char_from_abbv
                    pos_memory = pos_memory + i + 1
                    pos_memory_list.append(len_of_term - pos_memory)
                    break
                
    if order_matching_string_rev == abbv_reversed:
        return True, pos_memory_list[::-1]
    else:
        return False, []

In [249]:
check_order("XMPP", "xtensibxle Messaging and Presence Protocol")

(True, [0, 11, 25, 34])

In [250]:
def check_distribution_of_matching_characters(pos_of_chars_list, t):
    term_intervals = []
    len_of_term = len(t)
    i = 0
    while i < len_of_term:
        sublist = []
        j = i
        while j < len_of_term and t[j] != " ":
            sublist.append(j)
            j = j+ 1
        i = j+1
        term_intervals.append(sublist)
        
    splitted_term = t.split()      
    
    containment_list = []
    for i, interval in enumerate(term_intervals):
        contanment_sublist = []
        for pos in pos_of_chars_list:
            if (pos in interval) and (splitted_term[i][0] == t[pos]):
                contanment_sublist.append(0)
            elif pos in interval:
                contanment_sublist.append(interval.index(pos))
        if len(contanment_sublist) == 0:
            contanment_sublist.append(-1)
        containment_list.append(contanment_sublist)
    
    result_of_distribution_check = False
    if len(containment_list) <= 1:
        result_of_distribution_check = True
    elif len (containment_list) >= 2:
        non_zero_count = 0
        for sublist in containment_list[1:]:
            if len(sublist) == 1 and 0 not in sublist:
                non_zero_count += 1
        if non_zero_count == 0:
            result_of_distribution_check = True
    
    return result_of_distribution_check

In [251]:
def base_algo(abbv, term):
    valid_order, pos_of_chars_list = check_order(abbv, term)
    return valid_order

In [252]:
def match_rating_comparison(a, term):
    if jellyfish.match_rating_comparison(a, term):
        return 1
    elif not jellyfish.match_rating_comparison(a, term):
        return 0

In [253]:
def check_wether_abbv_is_proper_short_form_of_term(abbv, term):
    if (abbv[0].lower() == term[0].lower()):
        
        
        ###################################### Step (a) ##########################################
        # check wether initial letters of tokens in t match with the letters in abbreviation
        if check_initial_letters(abbv, term):
            return True
        
        
        
        ###################################### Step (b) ########################################
        # clean abbreviation and term from special characters and stopwords
        sanitized_abbv, sanitized_term = clear_special_characters(abbv, term) 
        sanitized_term_without_stopswords = stop_words_handling(sanitized_term)
        sanitized_term_without_stopswords_splitted  = sanitized_term_wo_stopswords.split()
        
        # check wether initial letters of tokens in t' match with the letters in a'
        if check_initial_letters(sanitized_abbv, sanitized_term_without_stopswords):
            return True
        
        
        ###################################### Step (c) ##########################################
        # Sequential call of the methods that check and compare lengths, order and distribution of characters
        length_consistency = check_length_consistency(sanitized_abbv, sanitized_term_without_stopswords)
        order, pos_of_chars_list = check_order(sanitized_abbv, sanitized_term_without_stopswords)
        distribution = check_distribution_of_matching_characters(pos_of_chars_list, sanitized_term_without_stopswords)


        if length_consistency and order and distribution:
            return True
        else:
            return False

        ################################## in case nothing matches #################################
    else:
        return False

In [254]:
data = pd.read_csv('abbr_db.CSV', names=['abbr', 'long_forms'], sep=';', encoding='utf8')
abbr = list(data['abbr'].values)
long_forms = list(data['long_forms'].values)

In [255]:
def calc_fn(algo):
    FN = 0
    for i, abb in enumerate(abbr):
        if not algo(abb, long_forms[i]):
            print("\"" + abb + "\", " + "\"" + long_forms[i] + "\"")
            print("##################################")
            FN += 1
    return str(FN) + " FALSE NEGATIVES. Pairs that could not be detected out of " + str(len(abbr)) + " given pairs"

In [256]:
def calc_fp(algo):
    count_of_false_examples = 0
    FP = 0
    for i, abb in enumerate(abbr):
        len_ = len(abbr)
        for j in range (i, len_):
            if abb != abbr[j]:
                count_of_false_examples += 1
                if algo(abb, long_forms[j]):
                    print("\"" + abb + "\", " + "\"" + long_forms[j] + "\"")
                    FP +=1
    return str(FP) + " FALSE POSITIVE detections out of " +  str(count_of_false_examples) + " created false examples"

In [257]:
a = calc_fp(base_algo)
print(a)

"AA", "Advanced Audio Coding"
"AA", "autmoatic acoustic management"
"AA", "Auto Area Segmentation"
"AA", "as a service"
"AA", "Atanasoff-Berry Computer"
"AA", "abnormabl end"
"AA", "Advanced BIOS"
"AA", "AdBlock Plus"
"AA", "anonymous coward"
"AA", "Asheron's Call"
"AA", "alternating current"
"AA", "account"
"AA", "access control entry"
"AA", "acknowledgment"
"AA", "access control list"
"AA", "Association for Computing Machinery"
"AA", "Advanced Configuration and Power Interface"
"AA", "annual compliance report"
"AA", "actual cell rate"
"AA", "attenuation crosstalk ratio"
"AA", "absolute cell reference"
"AA", "access control system"
"AA", "Active Directory"
"AA", "Apple Desktop Bus"
"AA", "Android Debug Bridge"
"AA", "analog-to-digital"
"AA", "ActiveX Data Object"
"AA", "adaptive delta pulse code modulation"
"AA", "analog display service interface"
"AA", "Active Directory Service Interface"
"AA", "asymmetric digital subscriber line"
"AA", "ADSTAR Distributed Storage Management"
"AA", "

"ACPI", "annual compliance report"
"ACPI", "Advanced Encryption Standard"
"ACPI", "accelerated graphics port"
"ACPI", "aspect-oriented programming"
"ACPI", "application programming interface"
"ACPI", "Advanced Programmable Interrupt Controller"
"ACPI", "Automatic Private Internet Protocol Addressing"
"ACPI", "advanced persistent threat"
"ACPI", "advanced packaging tool"
"ACPI", "advanced processing unit"
"ACPI", "accelerated processing unit"
"ACPI", "authorized service provider"
"ACPI", "application service provider"
"ACPI", "advanced SCSI programming interface"
"ACPI", "automatic speech recognition"
"ACPI", "AT Attachment Packet Interface"
"ACPI", "acceptable use policy"
"ACR", "access control system"
"ACR", "Active Directory"
"ACR", "analog display service interface"
"ACR", "Active Directory Service Interface"
"ACR", "asymmetric digital subscriber line"
"ACR", "automatic exposure"
"ACR", "Advanced Encryption Standard"
"ACR", "accelerated graphics port"
"ACR", "Advanced Host Controlle

"AES", "American Standard Code for Information Interchange"
"AES", "American Society of Mechanical Engineers"
"AES", "autonomous sensory meridian response"
"AES", "answer set programming"
"AES", "authorized service provider"
"AES", "Active Server Pages"
"AES", "advanced SCSI programming interface"
"AES", "automated system recovery "
"AES", "advanced transfer cache"
"AES", "Adaptive Transform Acoustic Coding"
"AES", "acceptable use policy"
"AES", "automatic voice response"
"AES", "Amazon Web Services"
"AES", "Adobe Flash"
"AF", "Advanced Host Controller Interface"
"AF", "artificial intelligence"
"AF", "Artificial Intelligent Markup Language"
"AF", "Automatic Number Identification"
"AF", "application programming interface"
"AF", "Android package file"
"AF", "American Standard Code for Information Interchange"
"AF", "American Society of Mechanical Engineers"
"AF", "advanced SCSI programming interface"
"AF", "AT Attachment Packet Interface"
"AF", "advanced transfer cache"
"AF", "above the 

"AOSP", "as soon as possible"
"AOSP", "autonomous sensory meridian response"
"AOSP", "authorized service provider"
"AOSP", "application service provider"
"AOSP", "automatic speech recognition"
"AOSP", "automatic voice response"
"API", "Advanced Programmable Interrupt Controller"
"API", "Automatic Private Internet Protocol Addressing"
"API", "Android package file"
"API", "application"
"API", "advanced persistent threat"
"API", "advanced packaging tool"
"API", "advanced processing unit"
"API", "accelerated processing unit"
"API", "Application System/400"
"API", "as soon as possible"
"API", "answer set programming"
"API", "authorized service provider"
"API", "application service provider"
"API", "advanced SCSI programming interface"
"API", "automatic speech recognition"
"API", "AT Attachment Packet Interface"
"API", "Adaptive Transform Acoustic Coding"
"API", "acceptable use policy"
"APIC", "Automatic Private Internet Protocol Addressing"
"APIC", "application"
"APIC", "Application System/

"AVI", "automatic voice response"
"AVI", "automatic voice recognition"
"AVI", "Amazon Web Services"
"BAE", "Beginners All-purpose Symbolic Instruction Code"
"BAE", "bulletin board system"
"BAE", "binary-coded decimal"
"BAE", "best current practices"
"BAE", "Basic Combined Programming Language"
"BAE", "backup domain controller"
"BAE", "bidirectional override"
"BAE", "beyond economical repair"
"BAE", "bit error rate"
"BAE", "bit error rate test"
"BAE", "bit error rate tester"
"BAE", "Battlefield"
"BAE", "battleground"
"BAE", "bad game"
"BAE", "Border Gateway Multicast Protocol"
"BAE", "Border Gateway Protocol"
"BAE", "Berkeley Internet Name Domain"
"BAE", "basic input/output system"
"BAE", "because it's time network"
"BAE", "bold, italic, underline"
"BAE", "Bayonet Neill-Concelman"
"BAE", "back orifice"
"BAE", "Bastard Operator From Hell"
"BAE", "Berkeley Open Infrastructure for Network Computing"
"BAE", "beginning of message"
"BAE", "bill of materials"
"BAE", "BIOS pattern block"
"BAE",

"BERT", "Border Gateway Multicast Protocol"
"BERT", "Border Gateway Protocol"
"BERT", "Browser Helper Object"
"BERT", "Berkeley Internet Name Domain"
"BERT", "Bastard Operator From Hell"
"BERT", "Berkeley Open Infrastructure for Network Computing"
"BERT", "Bridge Protocol Data Unit"
"BERT", "bridge router"
"BERT", "Blue Screen of Death"
"BERT", "basic service set"
"BERT", "Behavior Tech Computers"
"BERT", "been there done that"
"BF", "back orifice"
"BF", "Bastard Operator From Hell"
"BF", "Berkeley Open Infrastructure for Network Computing"
"BF", "beginning of medium"
"BF", "beginning of message"
"BF", "bill of materials"
"BF", "biphase shift keying"
"BF", "basic rate interface"
"BF", "Blue Screen of Death"
"BF", "Black Screen of Death"
"BG", "ball grid array"
"BG", "Border Gateway Multicast Protocol"
"BG", "Border Gateway Protocol"
"BG", "biography"
"BG", "binary digit"
"BG", "Berkeley Open Infrastructure for Network Computing"
"BG", "beginning of medium"
"BG", "beginning of message"


"CC", "charge-coupled device"
"CC", "Cisco Certified Design Expert"
"CC", "cold cathode fluorescent lighting"
"CC", "Cisco Certified Internetworking Expert"
"CC", "Comit Consultatif International Tlphonique et Tlgraphique"
"CC", "Cisco Certified Network Associate"
"CC", "Cisco Certified Network Professional"
"CC", "common command set"
"CC", "call-control signaling "
"CC", "closed circuit television"
"CC", "counterclockwise"
"CC", "compact disc"
"CC", "change directory"
"CC", "compact audio disc"
"CC", "Communication Decency Act"
"CC", "compact disc digital audio"
"CC", "compact disc database"
"CC", "Copper Distributed Data Interface"
"CC", "Common Desktop Environment"
"CC", "Compact Disc file system"
"CC", "Compact Disc Interactive"
"CC", "Code Division Multiple Access"
"CC", "content distribution network"
"CC", "content delivery network"
"CC", "Cache Discovery Protocol"
"CC", "Certificate in Data Processing"
"CC", "Columbia Data Products"
"CC", "content delivery platform"
"CC", "conte

"CCTV", "Coordinated Universal Time"
"CCW", "content distribution network"
"CCW", "content delivery network"
"CCW", "Compact Disc Re-Writable"
"CCW", "Compact Disc Write Once"
"CCW", "Consumer Electronics Show"
"CCW", "common gateway interface"
"CCW", "connectionless network service"
"CCW", "cyan magenta yellow black"
"CCW", "Certified Network Computer Technician"
"CCW", "Communication and Network Riser"
"CCW", "Certified Network Systems Technician"
"CCW", "comprehensive Perl archive network"
"CCW", "Corporation for Research and Educational Networking"
"CCW", "Computer Science Network"
"CCW", "Campus Wide Information System"
"CD", "compact audio disc"
"CD", "Communication Decency Act"
"CD", "compact disc digital audio"
"CD", "compact disc database"
"CD", "Copper Distributed Data Interface"
"CD", "Common Desktop Environment"
"CD", "Compact Disc file system"
"CD", "Compact Disc Interactive"
"CD", "Code Division Multiple Access"
"CD", "content distribution network"
"CD", "content delivery

"CDSL", "Challenge-Handshake Authentication Protocol"
"CDSL", "certified information systems security professional"
"CDSL", "ceramic leadless lead carrier"
"CDSL", "Children's Online Privacy Protection Act"
"CDSL", "Cascading Style Sheets"
"CDSL", "cascading style sheets"
"CDSL", "Coordinated Universal Time"
"CDSL", "Coordinated Universal Time"
"CE", "cellular telephone"
"CE", "Community Enterprise Operating System"
"CE", "chief executive officer"
"CE", "Conseil Europeen pour la Recherche Nuclaire"
"CE", "certified"
"CE", "Computer Emergency Response Team"
"CE", "certification"
"CE", "certified"
"CE", "certificate"
"CE", "Consumer Electronics Show"
"CE", "cubic feet per minute"
"CE", "Color Graphics Adapter"
"CE", "computer-generated imagery"
"CE", "common gateway interface"
"CE", "College Humor"
"CE", "Computer Hope"
"CE", "Challenge-Handshake Authentication Protocol"
"CE", "character"
"CE", "change directory"
"CE", "cylinder-head-sector"
"CE", "classless inter-domain routing"
"CE", "

"CHS", "Cache On A STick"
"CHS", "Computer Technology Industry Association"
"CHS", "Children's Online Privacy Protection Act"
"CHS", "comprehensive Perl archive network"
"CHS", "characters per inch"
"CHS", "cost per thousand"
"CHS", "characters per second"
"CHS", "cost per targeted thousand"
"CHS", "Cascading Style Sheets"
"CHS", "chirp spread spectrum"
"CHS", "cascading style sheets"
"CHS", "Channel Service Unit/Data Service Unit"
"CHS", "crash to desktop"
"CHS", "character user interface"
"CIDR", "committed information rate"
"CIDR", "certified information systems security professional"
"CIDR", "ceramic leadless lead carrier"
"CIDR", "complementary metal-oxide semiconductor"
"CIDR", "Certified Network Computer Technician"
"CIDR", "Certified Novell Engineer"
"CIDR", "Communication and Network Riser"
"CIDR", "Certified Network Systems Technician"
"CIDR", "Compression / Decompression"
"CIDR", "COmpression / DECompression"
"CIDR", "Computer Technology Industry Association"
"CIDR", "Childr

"COAST", "Coordinated Universal Time"
"COB", "Common Business Oriented Language"
"COB", "Component Object Model"
"COB", "Computer Dealers' Exhibition"
"COB", "Combined Programming Language"
"COB", "cathode ray tube"
"COB", "Content Scramble System"
"COB", "Common Vulnerabilities and Exposures"
"COB", "Common Business Oriented Language"
"COB", "Component Object Model"
"COB", "Computer Dealers' Exhibition"
"COB", "Combined Programming Language"
"COB", "cathode ray tube"
"COB", "Content Scramble System"
"COB", "Common Vulnerabilities and Exposures"
"COBOL", "Component Object Model"
"COBOL", "Combined Programming Language"
"Code", "Compression / Decompression"
"Code", "COmpression / DECompression"
"Code", "Component Object Model"
"Code", "Computer Dealers' Exhibition"
"Code", "confidence"
"Code", "Combined Programming Language"
"Code", "Corporation for Research and Educational Networking"
"Code", "cathode ray tube"
"Code", "code segment"
"Code", "carrier sense multiple access/collision det

"CPU", "Corporation for Research and Educational Networking"
"CPU", "computer science"
"CPU", "Computer Science Network"
"CPU", "chirp spread spectrum"
"CPU", "Computer Service Technician"
"CPU", "comma-separated values"
"CPU", "Capture the Flag"
"CPU", "Common Vulnerabilities and Exposures"
"CPU", "Campus Wide Information System"
"CR", "cyclic redundancy check"
"CR", "Corporation for Research and Educational Networking"
"CR", "continuity RIMM"
"CR", "Certificate Revocation List"
"CR", "carriage return/line feed"
"CR", "customer relationship management"
"CR", "cathode ray tube"
"CR", "cryptography"
"CR", "customer service"
"CR", "computer science"
"CR", "counter-strike"
"CR", "carrier sense multiple access/collision detection"
"CR", "Computer Science Network"
"CR", "chirp spread spectrum"
"CR", "Content Scramble System"
"CR", "Computer Service Technician"
"CR", "Channel Service Unit/Data Service Unit"
"CR", "comma-separated values"
"CR", "client-to-client protocol"
"CR", "crash to desk

"CU", "command-line user interface"
"CU", "character user interface"
"CU", "Common Unix Printing System"
"CU", "Current Mailbox"
"CU", "Coordinated Universal Time"
"CU", "Common Vulnerabilities and Exposures"
"CU", "Concurrent Version System"
"CU", "Campus Wide Information System"
"CU", "Coordinated Universal Time"
"CUI", "Common Unix Printing System"
"CUI", "Current Mailbox"
"CUI", "Coordinated Universal Time"
"CUI", "Common Vulnerabilities and Exposures"
"CUI", "Concurrent Version System"
"CUI", "Campus Wide Information System"
"CUI", "Coordinated Universal Time"
"CUI", "Common Unix Printing System"
"CUI", "Current Mailbox"
"CUI", "Coordinated Universal Time"
"CUI", "Common Vulnerabilities and Exposures"
"CUI", "Concurrent Version System"
"CUI", "Campus Wide Information System"
"CUI", "Coordinated Universal Time"
"CUPS", "Common Vulnerabilities and Exposures"
"CUT", "Common Vulnerabilities and Exposures"
"CUT", "Concurrent Version System"
"CUT", "Campus Wide Information System"
"CUT"

"DBA", "Distributed Management Task Force"
"DBA", "Double-precision floating-point"
"DBA", "Debian package manager"
"DBA", "Darik's Boot and Nuke"
"DBA", "database management system"
"DBA", "distributed database management system"
"DBA", "Distributed Denial of Service"
"DBA", "double data rate"
"DBA", "double data rate two"
"DBA", "double data rate three"
"DBA", "double data rate four"
"DBA", "double data rate synchronous dynamic random access memory"
"DBA", "dynamic-link library"
"DBA", "Distributed Management Task Force"
"DBA", "Double-precision floating-point"
"DBA", "Debian package manager"
"DBAN", "database management system"
"DBAN", "distributed database management system"
"DBAN", "double data rate synchronous dynamic random access memory"
"DBAN", "Distributed Management Task Force"
"DBAN", "Double-precision floating-point"
"DBAN", "Debian package manager"
"DBMS", "distributed database management system"
"DBMS", "double data rate synchronous dynamic random access memory"
"DBMS", 

"DDS", "Data Encryption Standard"
"DDS", "Dynamic Host Configuration Protocol"
"DDS", "direct Internet message encapsulation"
"DDS", "digital video express"
"DDS", "disc jockey"
"DDS", "drive letter access"
"DDS", "Digital Light Processing"
"DDS", "discrete logarithm problem"
"DDS", "data level parallelism"
"DDS", "data loss prevention"
"DDS", "direct message"
"DDS", "direct memory access"
"DDS", "Direct Memory Access Control"
"DDS", "destination memory address register"
"DDS", "Desktop Management Interface"
"DDS", "Distributed Management Task Force"
"DDS", "Domain Name System"
"DDS", "Do Communications Over Mobile"
"DDS", "Department of Defense"
"DDS", "denial of service"
"DDS", "disk operating system"
"DDS", "Double-precision floating-point"
"DDS", "DisplayPort"
"DDS", "dots per inch"
"DDS", "DOS Protected Mode Interface"
"DDS", "display power management system"
"DDS", "damage per second"
"DDS", "dynamic random access memory"
"DDS", "digital rights management"
"DDS", "data request"
"

"DM", "data transformation services"
"DM", "digital thermal sensor"
"DM", "discontinuous transmission"
"DM", "Dynamic Video Memory Technology"
"DM", "direct memory access"
"DM", "Direct Memory Access Control"
"DM", "destination memory address register"
"DM", "Digital Millennium Copyright Act"
"DM", "Desktop Management Interface"
"DM", "data manipulation language"
"DM", "Distributed Management Task Force"
"DM", "demilitarized zone"
"DM", "Domain Name System"
"DM", "Do Communications Over Mobile"
"DM", "Department of Defense"
"DM", "disk operating system"
"DM", "damage over time"
"DM", "Debian package manager"
"DM", "DOS Protected Mode Interface"
"DM", "display power management system"
"DM", "damage per second"
"DM", "dynamic random access memory"
"DM", "digital rights management"
"DM", "Damn Small Linux"
"DM", "Dynamic Source Routing"
"DM", "Dell System Restore"
"DM", "Digital Simultaneous Voice and Data"
"DM", "data terminating equipment"
"DM", "data terminal equipment"
"DM", "Data Ter

"DSR", "Digital Signature Standard"
"DSR", "Digital Satellite Service"
"DSR", "data transformation services"
"DSR", "digital thermal sensor"
"DSR", "discontinuous transmission"
"DSR", "digital visual interface"
"DSR", "Digital Signature Standard"
"DSR", "Digital Satellite Service"
"DSR", "data transformation services"
"DSR", "digital thermal sensor"
"DSR", "discontinuous transmission"
"DSR", "digital visual interface"
"DSS", "Digital Simultaneous Voice and Data"
"DSS", "data transformation services"
"DSS", "digital thermal sensor"
"DSS", "discontinuous transmission"
"DSS", "digital versatile disc"
"DSS", "DirectX diagnostics"
"DSS", "Digital Simultaneous Voice and Data"
"DSS", "data transformation services"
"DSS", "digital thermal sensor"
"DSS", "discontinuous transmission"
"DSS", "digital versatile disc"
"DSS", "DirectX diagnostics"
"DTE", "Data Terminal Ready"
"DTE", "data transformation services"
"DTE", "Digital Theater Sound"
"DTE", "digital thermal sensor"
"DTE", "device under tes

"EDI", "end of topic"
"EDI", "End-of-Transmission"
"EDI", "electronic paper display"
"EDI", "encapsulated PostScript"
"EDI", "Extended System Configuration Data"
"EDI", "electrostatic discharge"
"EDI", "Enhanced Small Disk Interface"
"EDI", "Enhanced Serial Interface"
"EDI", "estimated time of arrival"
"EDI", "End-User License Agreement"
"EDI", "enhanced versatile disc"
"EDI", "Evolution-Data Optimized"
"EDI", "enhanced virus protection"
"EDI", "Extended File Allocation Table"
"EDI", "eVade o' Matic Module"
"EDI", "extended density format"
"EDI", "Extended Graphics Array"
"EDI", "extended memory specification"
"EDIINT", "Enhanced Interior Gateway Routing Protocol"
"EDIINT", "Extended System Configuration Data"
"EDIINT", "Enhanced Small Disk Interface"
"EDIINT", "Enhanced Serial Interface"
"EDIINT", "Extended File Allocation Table"
"EDO", "electronic data processing"
"EDO", "Electronic Delay Storage Automatic Calculator"
"EDO", "Electronic Discrete Variable Automatic Computer"
"EDO", "e

"EPD", "Erasable Programmable Read-Only Memory"
"EPD", "encapsulated PostScript"
"EPD", "e-mail server provider"
"EPD", "Evolution-Data Optimized"
"EPIC", "electronic publication"
"EPIC", "Extended Graphics Array"
"EPIC", "extended memory specification"
"EPP", "encapsulated PostScript"
"EPP", "Extensible Hypertext markup Language"
"EPP", "Extensible Messaging and Presence Protocol"
"EPS", "Extended Graphics Array"
"EPS", "Extensible Messaging and Presence Protocol"
"ESA", "Extended System Configuration Data"
"ESA", "electrostatic discharge"
"ESA", "Enhanced Small Disk Interface"
"ESA", "Enhanced Serial Interface"
"ESA", "Entertainment Software Rating Board"
"ESA", "estimated time of arrival"
"ESA", "Electronic Technicians Association International"
"ESA", "End-User License Agreement"
"ESA", "enhanced versatile disc"
"ESA", "extended density format"
"ESA", "Extended Graphics Array"
"ESA", "Extensible Hypertext markup Language"
"ESA", "Extensible Markup Language"
"ESA", "Extensible Messa

"FOSS", "front-side bus"
"FOSS", "Federal Trade Commission"
"FP", "field-programmable gate array"
"FP", "fast page mode"
"FP", "field programmable read-only memory"
"FP", "frames per second"
"FP", "floating-point unit"
"FP", "field-replacable unit"
"FP", "free-space optical communication"
"FP", "File Sharing Protocol"
"FP", "film supertwist nematic"
"FP", "file system type"
"FP", "File Transfer Protocol"
"FP", "field-programmable gate array"
"FP", "fast page mode"
"FP", "field programmable read-only memory"
"FP", "frames per second"
"FP", "floating-point unit"
"FP", "field-replacable unit"
"FP", "free-space optical communication"
"FP", "File Sharing Protocol"
"FP", "film supertwist nematic"
"FP", "file system type"
"FP", "File Transfer Protocol"
"FPGA", "field programmable read-only memory"
"FPM", "field programmable read-only memory"
"FPM", "free-space optical communication"
"FPM", "film supertwist nematic"
"FPS", "film supertwist nematic"
"FPU", "field-replacable unit"
"FPU", "free-s

"H", "high availability cluster multiprocessing"
"H", "hardware abstraction layer"
"H", "home area network"
"H", "host bus adapter"
"H", "human computer interface"
"H", "hardware compatibility list"
"H", "high-definition"
"H", "high-density"
"H", "hard drive"
"H", "hard disk controller"
"H", "High-bandwidth Digital Content Protection"
"H", "hard disk drive"
"H", "High Definition Digital Versatile Disc"
"H", "High-level Data Link Control"
"H", "High-Definition Multimedia Interface"
"H", "handheld device markup language"
"H", "high-density read-only memory"
"H", "high-bit-rate digital subscriber line"
"H", "high-definition TV"
"H", "High Efficiency Image File format"
"H", "hypertext editing system"
"H", "high efficiency video coding"
"H", "hybrid fiber coax"
"H", "hierarchical file system"
"H", "Hercules graphics adapter"
"H", "human interface device"
"H", "hacker's view"
"H", "high resolution"
"H", "human intelligence task"
"H", "high-level language"
"H", "high-memory area"
"H", "hit me

"HR", "Hypertext reference"
"HR", "Hue, Saturation, and Brightness"
"HR", "Hue, Saturation, and Lightness"
"HR", "High-Speed Serial Interface"
"HR", "Horizontal Synchronization"
"HR", "Hyper-Threading"
"HR", "HyperTransport"
"HR", "Hypertext Markup Language"
"HR", "home theater personal computer"
"HR", "Hypertext Transfer Protocol"
"HR", "Have you heard"
"HR", "hardware"
"HR", "Hertz"
"HR", "Hypertext Preprocessor"
"HREF", "High-Speed Serial Interface"
"HREF", "Hypertext Transfer Protocol"
"HSF", "High-Speed Serial Interface"
"HSF", "Hypertext Transfer Protocol"
"HSL", "High-Speed Serial Interface"
"HSL", "home theater personal computer"
"HSL", "Hypertext Transfer Protocol"
"HSL", "heads up display"
"HT", "Hypertext Markup Language"
"HT", "home theater personal computer"
"HT", "Hypertext Transfer Protocol"
"HT", "Hertz"
"HT", "Hypertext Preprocessor"
"HT", "Hypertext Markup Language"
"HT", "home theater personal computer"
"HT", "Hypertext Transfer Protocol"
"HT", "Hertz"
"HT", "Hyperte

"IDS", "Internet Movie Database"
"IDS", "International Committee for Information Technology Standards"
"IDS", "Internet Obfuscated C Code Contest"
"IDS", "illustrated parts breakdown"
"IDS", "interpupillary distance"
"IDS", "input, processing, output, and storage"
"IDS", "Infrared Data Association"
"IDS", "Industry Standard Architecture"
"IDS", "Information System Audit and Control Association"
"IDS", "indexed sequential access method"
"IDS", "Intra-Site Automatic Tunnel Addressing Protocol"
"IDS", "Integrated Services Digital Network"
"IDS", "integrated software for imagers and spectrometers"
"IDS", "Image and Scanner Interface Specification"
"IDS", "independent software vendor"
"IDSL", "indexed sequential access method"
"IDSL", "Intra-Site Automatic Tunnel Addressing Protocol"
"IDSL", "Integrated Services Digital Network"
"IE", "International Electrotechnical Commision"
"IE", "Institute of Electrical and Electronics Engineers"
"IE", "Image-Enabled Netware"
"IE", "Internet Engineering

"INTR", "Integrated Services Digital Network"
"INTR", "integrated software for imagers and spectrometers"
"INTR", "infinitely scalable intelligent storage"
"INTR", "Image and Scanner Interface Specification"
"INTR", "International Organization for Standardization"
"INTR", "Internet service provider"
"INTR", "independent software vendor"
"INTR", "International Typeface Corporation"
"INTR", "Information Technology Infrastructure Library"
"INTR", "International Telecommunication Union"
"I/O", "input/output controller"
"I/O", "input/output operations per second"
"I/O", "Input/Output Supervisor"
"IOC", "Internet Obfuscated C Code Contest"
"IOC", "input/output operations per second"
"IOC", "Internet Protocol"
"IOC", "ingress protection"
"IOC", "instructions per cycle"
"IOC", "interprocess communication"
"IOC", "Internet Protocol next generation"
"IOC", "input, processing, output, and storage"
"IOC", "Ingress Protection Rating"
"IOC", "Internet Protocol security"
"IOC", "Information Processin

"ISBN", "infinitely scalable intelligent storage"
"ISDN", "International Organization for Standardization"
"ISO", "Internet service provider"
"ISO", "independent software vendor"
"IT", "International Typeface Corporation"
"IT", "Information Technology Infrastructure Library"
"IT", "International Telecommunication Union"
"IT", "Information Technology eXtended"
"ITC", "Information Technology Infrastructure Library"
"ITC", "International Telecommunication Union"
"ITC", "Information Technology eXtended"
"ITIL", "International Telecommunication Union"
"ITIL", "Information Technology eXtended"
"JAR", "Java Platform, Enterprise Edition"
"JAR", "Java Intermediate Language"
"JAR", "Java Native Interface"
"JAR", "Joint Photographic Experts Group"
"JAR", "Java Runtime Environment"
"JAR", "JavaScript Object Notation"
"JAR", "JavaServer Pages"
"JAR", "Joint Test Action Group"
"JAR", "Java virtual machine"
"JAR", "Java servlet"
"Java EE", "Java Database Connectivity"
"Java EE", "Java Development Kit

"Lzone", "Landing Zone"
"m", "meta key"
"m", "media access control"
"m", "Macintosh"
"m", "macroinstruction"
"m", "Metropolitan Area Ethernet"
"m", "milliamp hour"
"m", "manual"
"m", "Message Application Programming Interface"
"m", "Mail Abuse Prevention Systems"
"m", "memory address register"
"m", "matrix laboratory"
"m", "Matrix code"
"m", "media access unit"
"m", "maximize"
"m", "maximum"
"m", "motherboard"
"m", "megabit"
"m", "megabyte"
"m", "Malwarebytes"
"m", "Managed Beans"
"m", "microcomputer-based labs"
"m", "megabits per second"
"m", "megabytes per second"
"m", "master boot record"
"m", "megacycle"
"m", "Micro Channel Architecture"
"m", "multicolor graphics array"
"m", "Microsoft Certified IT Professional"
"m", "Microsoft Certified Systems Administrator"
"m", "Microsoft Certified System Engineer"
"m", "Monochrome Display Adapter"
"m", "Multidimensional Database Management System"
"m", "modification detection code"
"m", "main distribution frame"
"m", "multiple-document interfa

"MIDI", "mobile identification number"
"MIDI", "MultiMediaCard"
"MIDI", "Multimedia Message Service"
"MIDI", "MultiMedia eXtension"
"MIDI", "magneto-optical diskette"
"MIDI", "Mozilla Foundation"
"MIDI", "metal-oxide-semiconductor"
"MIDI", "metal-oxide semiconductor field-effect transistor"
"MIDI", "Metal Oxide Varistor"
"MIDI", "Multimedia Personal Computer"
"MIDI", "Most Significant Digit"
"MIDI", "Microsoft Diagnostics"
"MIDI", "Microsoft Disk Operating System"
"MIDI", "Microsoft Visual Basic Scripting Edition"
"MIME", "multiple-input, multiple-output"
"MIME", "mobile identification number"
"MIME", "management information system"
"MIME", "man-in-the-middle attack"
"MIME", "MultiMediaCard"
"MIME", "Microsoft Management Console"
"MIME", "massively multiplayer online role playing game"
"MIME", "Multimedia Message Service"
"MIME", "MultiMedia eXtension"
"MIME", "Microcom Network Protocol"
"MIME", "metal-oxide semiconductor field-effect transistor"
"MIME", "Motion Picture Association of 

"MUT", "mobile virtual network operator"
"MUT", "multiple virtual storage"
"MUT", "Microsoft Visual Basic Scripting Edition"
"MVNO", "Microsoft Visual Basic Scripting Edition"
"MVP", "Microsoft Visual Basic Scripting Edition"
"MVS", "Microsoft Visual Basic Scripting Edition"
"NAK", "nonmaskable interrupt"
"NAK", "National Science Foundation Network"
"NAK", "National Standards Systems Network"
"NAS", "Network Address Translation"
"NAS", "National Center for Supercomputing Applications"
"NAS", "National Cybersecurity Center"
"NAS", "Network Direct Attached Storage"
"NAS", "network driver interface specification"
"NAS", "network data representation"
"NAS", "NetBIOS Enhanced User Interface"
"NAS", "Network Basic Input/Output System"
"NAS", "National Information Infrastructure"
"NAS", "National Institute Machine Radiating On Downs"
"NAS", "natural language processing"
"NAS", "nonmaskable interrupt"
"NAS", "network management system"
"NAS", "Network News Transfer Protocol"
"NAS", "network op

"NTLDR", "non-volatile random-access memory"
"NTP", "Next Unit of Computing"
"NUC", "Network Auto Magic"
"Num", "Network Auto Magic"
"OA", "optical character recognition"
"OA", "operationally critical threat, asset, and vulnerability evaluation"
"OA", "Open Database Connectivity"
"OA", "Open Data-Link Interface"
"OA", "OverDrive Processor Replacement"
"OA", "original equipment manufacturer"
"OA", "Open Host Controller Inferface"
"OA", "online analytical processing"
"OA", "Object Linking and Embedding"
"OA", "organic light-emitting diode"
"OA", "one laptop per child"
"OA", "online transaction processing"
"OA", "optical mark reading"
"OA", "object-oriented programming"
"OA", "operator"
"OA", "original poster"
"OA", "operation"
"OA", "Open Audio Library"
"OA", "Open Graphics Library"
"OA", "operations research"
"OA", "OR operator"
"OA", "operating system"
"OA", "object-based storage device"
"OA", "on-screen display"
"OA", "Open Software Foundation"
"OA", "Occupational Safety and Health Ad

"PC", "Portable Network Graphics"
"PC", "plain old documentation"
"PC", "Post Office Protocol"
"PC", "packet over SONET"
"PC", "Portable operating system interface for Unix"
"PC", "pay per click"
"PC", "Plastic Pin Grid Array"
"PC", "pixels per inch"
"PC", "Point-to-Point Protocol"
"PC", "Point-to-Point Protocol over Ethernet"
"PC", "Privacy Policy Statement"
"PC", "Point-to-Point Tunnel Protocol"
"PC", "Planning Tool for Resource Integration,  and Management"
"PC", "Programming in Logic"
"PC", "print screen"
"PC", "Preliminary Scholastic Assessment Test"
"PC", "Photoshop Document"
"PC", "programmable system on a chip"
"PC", "Problem Steps Recorder"
"PC", "Pacific Standard Time"
"PC", "public switched telephone network"
"PC", "Public Test Realm"
"PC", "physical unit"
"PC", "pickup group"
"PC", "personal video recorder"
"PC", "progressive web application"
"PC", "print working directory"
"PC", "preboot execution environment"
"PC", "PCI extensions for instrumentation"
"PCB", "Packet Ensem

"PM", "pseudomachine"
"PM", "polarization maintaining fiber"
"PM", "Pantone Matching System"
"PM", "part number"
"PM", "plain old documentation"
"PM", "Portable operating system interface for Unix"
"PM", "plain old telephone system"
"PM", "pages per minute"
"PM", "Privacy Policy Statement"
"PM", "Parameter RAM"
"PM", "print formatted"
"PM", "Planning Tool for Resource Integration,  and Management"
"PM", "Private Message"
"PM", "Programming in Logic"
"PM", "programmable read-only memory"
"PM", "Personal System/2"
"PM", "Preliminary Scholastic Assessment Test"
"PM", "Photoshop Document"
"PM", "programmable system on a chip"
"PM", "Problem Steps Recorder"
"PM", "Pacific Standard Time"
"PM", "Public Test Realm"
"PM", "pseudoterminal slave"
"PM", "pseudoterminal"
"PM", "Player vs. Environment"
"PM", "preboot execution environment"
"PM", "PCI extensions for instrumentation"
"PM", "pseudomachine"
"PM", "polarization maintaining fiber"
"PM", "Pantone Matching System"
"PM", "part number"
"PM", 

"PSU", "pseudoterminal slave"
"PSU", "pseudoterminal"
"PSU", "physical unit"
"PSU", "PlayerUnknown's Battlegrounds"
"PSU", "PCI extensions for instrumentation"
"PTR", "pseudoterminal slave"
"PTR", "pseudoterminal"
"PTR", "PlayerUnknown's Battlegrounds"
"PTR", "print working directory"
"PTR", "preboot execution environment"
"PTR", "PCI extensions for instrumentation"
"PTS", "PlayerUnknown's Battlegrounds"
"PTS", "PCI extensions for instrumentation"
"PTY", "print working directory"
"PU", "PlayerUnknown's Battlegrounds"
"PU", "pickup group"
"PU", "preboot execution environment"
"PU", "PCI extensions for instrumentation"
"PVE", "Player vs. Player"
"PVE", "personal video recorder"
"PVE", "progressive web application"
"PVE", "Personal Web Server"
"PVE", "preboot execution environment"
"PVP", "progressive web application"
"PVR", "Personal Web Server"
"PVR", "preboot execution environment"
"PW", "progressive web application"
"PW", "password"
"PW", "print working directory"
"PW", "Personal Web 

"SaaS", "Standalone Server"
"SaaS", "Serial Attached SCSI"
"SaaS", "Security Administrator Tool for Analyzing Networks"
"SaaS", "Serial Communications Controller"
"SaaS", "Search for Extraterrestrial Intelligence"
"SaaS", "Self-Monitoring Analysis and Reporting Technology"
"SaaS", "system management bus"
"SaaS", "System Management Bus"
"SaaS", "system management server"
"SaaS", "storage management subsystem"
"SaaS", "Simple Mail Transfer Protocol"
"SaaS", "Scalar Processor Architecture"
"SaaS", "stateful packet inspection"
"SaaS", "spam over instant message"
"SaaS", "simultaneous peripheral operatings online"
"SaaS", "Standard Power Supply"
"SaaS", "static random access memory"
"SACD", "Serial Attached SCSI"
"SACD", "Secure Digital High Capacity card"
"SACD", "single edge contact cartridge"
"SACD", "Security-Enhanced Linux"
"SACD", "sealed lead-acid"
"SACD", "SmartMedia card"
"SACD", "Serial Presence Detection"
"SACD", "static random access memory"
"SACD", "storage service provider"
"S

"Sig", "symmetric multiprocessing"
"Sig", "Simple Network Management Protocol"
"Sig", "synchronous optical networking"
"Sig", "spelling"
"Sig", "Sony and Phillips Digital Interconnect Format"
"Sig", "spam over instant message"
"Sig", "systems programming language"
"Sig", "simultaneous peripheral operatings online"
"Sig", "Streaming SIMD Extensions"
"Sig", "small-scale integration"
"Sig", "Secure Socket Tunneling Protocol"
"Sig", "Spanning Tree Protocol"
"Sig", "synchronous transport signal"
"Sig", "Super Video Graphics Array"
"SIGINT", "single instruction, multiple data"
"SIGINT", "Self-Monitoring Analysis and Reporting Technology"
"SIGINT", "Sony and Phillips Digital Interconnect Format"
"SIGINT", "single instruction, multiple data"
"SIGINT", "Self-Monitoring Analysis and Reporting Technology"
"SIGINT", "Sony and Phillips Digital Interconnect Format"
"Sim", "Subscriber Identity Module card"
"Sim", "single instruction, multiple data"
"Sim", "single inline memory module"
"Sim", "service

"TA", "tape operating system"
"TA", "tracks per inch"
"TA", "Trusted Platform Module"
"TA", "Twisted-Pair Physical Medium Dependant"
"TA", "technical report"
"TA", "transfer resistance"
"TA", "Tripple Data Encryption Algorithm"
"TA", "Telecommunications Relay Service"
"TA", "terminate-and-stay-resident"
"TA", "transistor-transistor logic"
"TA", "trasmit data"
"Ta", "Terminal Access Controller Access Control System"
"Ta", "Telephony Application Programming Interface"
"Ta", "tape archive"
"Ta", "total cost of ownership"
"Ta", "Trusted Computer Platform Alliance"
"Ta", "Transmission Control Protocol/Internet Protocol"
"Ta", "table data"
"Ta", "telecommunications device for the deaf"
"Ta", "team deathmatch"
"Ta", "thermal design power"
"Ta", "telecommunications line"
"Ta", "The Exiled Ream of Arborea"
"Ta", "text enhancement technology"
"Ta", "Trivial File Transfer Protocol"
"Ta", "table head"
"Ta", "total harmonic distortion"
"Ta", "Titanium"
"Ta", "Textas Instruments"
"Ta", "Tagged Image

"W", "Western Digital"
"W", "Window Driver Foundation"
"W", "web application"
"W", "Wired Equivalent Privacy"
"W", "Wired for Management"
"W", "Windows File Protection"
"W", "Windows Genuine Advantage"
"W", "Worldwide Interoperability for Microwave Access"
"W", "Windows Pre-install Environment"
"W", "Windows socket"
"W", "wireless Internet service provider"
"W", "Windows metafile format"
"W", "Windows Management Instrumentation"
"W", "Website META Language"
"W", "Wireless Markup Language"
"W", "Wake-on-LAN"
"W", "word of the day"
"W", "World of Warcraft"
"W", "Windows on Windows"
"W", "WordPress"
"W", "word processor"
"W", "write pre-comp"
"W", "word processing competency"
"W", "Wide Quad Extended Graphics Array"
"W", "Windows RAM"
"W", "Widescreen Super eXtended Graphics Array"
"W", "what the hell"
"W", "World Wide Web"
"W", "what you see is what you get"
"W", "what you see is what you print"
"WAIS", "Web-based Enterprise Management"
"WAIS", "Wideband Code Division Multiple Access"
"W

In [258]:
b = calc_fn(base_algo)
print(b)

"ADC", "analog-to-digital"
##################################
"ASN.1", "abstract syntax number one"
##################################
"B2B", "business-to-business"
##################################
"B2B", "business-to-consumer"
##################################
"BDF", "Glyph Bitmap Distribution Format"
##################################
"Bi-di", "bidirectional"
##################################
"CDA", "compact audio disc"
##################################
"CD-i", "Compact Disc Interactive"
##################################
"CD-R", "Compact Disc Recordable"
##################################
"CD-ROM", "Compact Disc Read-Only Memory"
##################################
"CD-RW", "Compact Disc Re-Writable"
##################################
"CD-WO", "Compact Disc Write Once"
##################################
"CLCC", "ceramic leadless lead carrier"
##################################
"Code", "source code"
##################################
"Coil", "electromagnetic coil"
###############

In [259]:
a = calc_fp(check_wether_abbv_is_proper_short_form_of_term)
print(a)

"AA", "account"
"AA", "acknowledgment"
"AA", "analog-to-digital"
"AA", "Autofocus"
"AA", "All-in-One"
"AA", "alphabet"
"AA", "ampere"
"AA", "application"
"AA", "argument"
"AA", "automobile"
"AA", "autonomous"
"AA", "automatic"
"AA", "avatar"
"AA", "antivirus"
"AAC", "Atanasoff-Berry Computer"
"AAC", "anonymous coward"
"AAC", "Asheron's Call"
"AAC", "alternating current"
"AAC", "account"
"AAC", "acknowledgment"
"AAC", "Autofocus"
"AAC", "application"
"AAC", "automatic"
"AAM", "acknowledgment"
"AAM", "ante meridiem"
"AAM", "amplitude modulation"
"AAM", "ampere"
"AAM", "aspect-oriented programming"
"AAM", "argument"
"AAM", "automobile"
"AAM", "autonomous"
"AAM", "automatic"
"AAS", "Autofocus"
"AAS", "autonomous system"
"AAS", "Application System/400"
"AAS", "autonomous"
"AAS", "antivirus"
"AAS", "Adobe Flash"
"AAS", "Autofocus"
"AAS", "autonomous system"
"AAS", "Application System/400"
"AAS", "autonomous"
"AAS", "antivirus"
"AAS", "Adobe Flash"
"AC", "account"
"AC", "acknowledgment"
"AC",

"COBOL", "Component Object Model"
"Code", "Compression / Decompression"
"Code", "COmpression / DECompression"
"Code", "Computer Dealers' Exhibition"
"Code", "confidence"
"Codec", "confidence"
"Codec", "confidence"
"Coil", "Combined Programming Language"
"Coil", "Certificate Revocation List"
"Con", "counter-strike"
"Con", "counter-strike"
"Con", "counter-strike"
"Core", "customer service"
"Core", "counter-strike"
"Core", "click-through rate"
"CPA", "cryptography"
"CPA", "cryptography"
"CPAN", "Combined Programming Language"
"CPAN", "cost per thousand"
"CPC", "client-to-client protocol"
"CPGA", "Combined Programming Language"
"CPGA", "cryptography"
"CPI", "cross-site scripting"
"CPL", "client-to-client protocol"
"CPS", "computer science"
"CR", "continuity RIMM"
"CR", "cryptography"
"CR", "counter-strike"
"CR", "click-through rate"
"CRC", "customer service"
"CRC", "client-to-client protocol"
"CREN", "customer relationship management"
"CREN", "computer science"
"C-RIMM", "customer relation

"EFS", "end-of-message"
"EFS", "End-of-Transmission"
"EGA", "Extended Graphics Array"
"EIA", "Extended ISA"
"EIA", "electroluminescent display"
"EIA", "electronic publication"
"EIA", "electrostatic discharge"
"EIDE", "electrostatic discharge"
"EISA", "electroluminescent display"
"EISA", "electrostatic discharge"
"EISA", "End-User License Agreement"
"EISA", "electroluminescent display"
"EISA", "electrostatic discharge"
"EISA", "End-User License Agreement"
"ELD", "electrostatic discharge"
"ELF", "electromotive force"
"EM", "electronic mail"
"EM", "enhanced metafile"
"EM", "expanded-memory manager"
"EM", "end-of-message"
"EM", "End-of-Transmission"
"E-mail", "enhanced metafile"
"EMI", "End-of-Transmission"
"EMI", "Evolution-Data Optimized"
"EMS", "end-of-message"
"EMS", "End-of-Transmission"
"EMS", "extended memory specification"
"EMS", "end-of-message"
"EMS", "End-of-Transmission"
"EMS", "extended memory specification"
"EOF", "End-of-Line"
"EOF", "end-of-message"
"EOF", "end-of-page"
"EO

"mb", "mebibit"
"mb", "mebibyte"
"mb", "mobile blog"
"Mb", "megabyte"
"Mb", "Malwarebytes"
"Mb", "Managed Beans"
"Mb", "mebibit"
"Mb", "mebibyte"
"Mb", "mobile blog"
"MB", "Malwarebytes"
"MB", "Managed Beans"
"MB", "mebibit"
"MB", "mebibyte"
"MB", "mobile blog"
"MBAM", "Multibank DRAM"
"MBean", "mobile identification number"
"MBL", "mobile blog"
"Mbps", "megabytes per second"
"MBR", "model number"
"MC", "MultiMediaCard"
"MC", "metal-oxide-semiconductor"
"MC", "Microsoft"
"MC", "millisecond"
"MC", "microarchitecture"
"MCA", "multi-factor authentication"
"MCA", "MultiMediaCard"
"MCA", "microarchitecture"
"MCSE", "magneto-optical diskette"
"MCSE", "Microsoft Cluster Server"
"MDA", "Multibank DRAM"
"MDA", "man-in-the-middle attack"
"MDA", "MultiMediaCard"
"MDA", "modulator/demodulator"
"MDA", "Mozilla Foundation"
"MDA", "Microsoft Diagnostics"
"MDC", "make directory"
"MDC", "MultiMediaCard"
"MDC", "metal-oxide-semiconductor"
"MDC", "Microsoft Diagnostics"
"MDI", "make directory"
"MDI", "Mu

"PP", "Pagerank"
"PP", "printer"
"PP", "PowerShell"
"PP", "PlayStation Portable"
"PP", "pseudoterminal"
"PP", "password"
"PP", "password"
"PP", "pixel"
"PPT", "printer"
"PPT", "PlayStation Portable"
"PPT", "pseudoterminal"
"PPTP", "PlayStation Portable"
"PR", "Parameter RAM"
"PR", "printer"
"PR", "PowerShell"
"PR", "pseudoterminal"
"PR", "password"
"PR", "password"
"PR", "Parameter RAM"
"PR", "printer"
"PR", "PowerShell"
"PR", "pseudoterminal"
"PR", "password"
"PR", "password"
"PRAM", "Private Message"
"PRAM", "programmable read-only memory"
"PRN", "print screen"
"PRN", "pseudoterminal"
"PRN", "PlayerUnknown's Battlegrounds"
"Proglog", "Programming in Logic"
"PS", "Personal System/2"
"PS", "Personal Store"
"PS", "pseudoterminal slave"
"PS", "pseudoterminal"
"PS", "password"
"PS", "password"
"PS", "Personal System/2"
"PS", "Personal Store"
"PS", "pseudoterminal slave"
"PS", "pseudoterminal"
"PS", "password"
"PS", "password"
"PS", "Personal System/2"
"PS", "Personal Store"
"PS", "pseudot

"SOA", "software"
"SOA", "system operator"
"SOA", "Solitaire"
"SOA", "super operator"
"SOA", "Suspend-to-RAM"
"SOA", "software"
"SOA", "system operator"
"SoC", "source code"
"SOI", "Solitaire"
"SOI", "synchronize"
"SOI", "synchronize"
"SONET", "Spam over Internet Telephony"
"SOP", "system operator"
"SP", "specification"
"SP", "Suspend-to-RAM"
"SP", "subscript"
"SP", "superscript"
"SPARC", "system packet interface"
"SPARC", "serial peripheral interface"
"SPD", "Suspend-to-RAM"
"Spec", "system packet interface"
"Spec", "serial peripheral interface"
"Spec", "supertwisted nematic"
"Spec", "superscript"
"SPI", "superscript"
"SPI", "superscript"
"SPI", "superscript"
"Spit", "superscript"
"Spool", "Synchronous Transport Module"
"SPP", "superscript"
"SPS", "superscript"
"SRAM", "Synchronous Transport Module"
"SRAM", "Suspend-to-RAM"
"SS", "Suspend-to-RAM"
"SS", "subscript"
"SS", "superscript"
"SS", "software"
"SS", "synchronize"
"SS", "synchronize"
"SS", "synthesizer"
"SSD", "Suspend-to-RAM"
"

In [260]:
b = calc_fn(check_wether_abbv_is_proper_short_form_of_term)
print(b)

"ADC", "analog-to-digital"
##################################
"AIX", "advanced interactive executive"
##################################
"APK", "Android package file"
##################################
"ARCnet", "Attached Resource Computer network"
##################################
"ARPANET", "Advanced Research Projects Agency Network"
##################################
"ARQ", "automatic repeat request"
##################################
"ASN.1", "abstract syntax number one"
##################################
"ATX", "Advanced Technology eXtended"
##################################
"B2B", "business-to-business"
##################################
"B2B", "business-to-consumer"
##################################
"Basic", "Beginners All-purpose Symbolic Instruction Code"
##################################
"BDF", "Glyph Bitmap Distribution Format"
##################################
"Br", "Break key"
##################################
"BTX", "Balanced Technology eXtended"
###################

In [261]:
a = calc_fp(match_rating_comparison)
print(a)

"AAC", "account"
"AAC", "icon"
"AAC", "receive"
"AAM", "ampere"
"AAM", "maximum"
"AAM", "minimum"
"AAM", "Titanium"
"AAS", "Celsius"
"AAS", "release"
"AAS", "user"
"AAS", "versus"
"AAS", "Celsius"
"AAS", "release"
"AAS", "user"
"AAS", "versus"
"ABC", "automobile"
"ABC", "automatic"
"ABC", "kibibit"
"ABC", "mebibit"
"ABC", "Tebi"
"Abend", "Advanced BIOS"
"Abend", "account"
"Abend", "absolute cell reference"
"Abend", "ampere"
"Abend", "as soon as possible"
"Abend", "autonomous system number"
"Abend", "above the fold"
"Abend", "Advanced Technology eXtended"
"Abend", "autonomous"
"Abend", "avatar"
"Abend", "broadband"
"Abend", "class ID"
"Abend", "source code"
"Abend", "download"
"Abend", "download"
"Abend", "Gigabits per second"
"Abend", "kibibyte"
"Abend", "line feed"
"Abend", "memorandum"
"Abend", "mebibyte"
"Abend", "NOT NULL"
"Abend", "nanosecond"
"Abend", "Record"
"Abend", "source code"
"Abend", "table data"
"Abend", "table head"
"Abend", "upload"
"Abend", "upload"
"Abend", "XP mode"

"ARPANET", "service-level agreement"
"ARPANET", "super-large scale integration"
"ARPANET", "Serial Presence Detection"
"ARPANET", "Source Service Access Point"
"ARPANET", "standard input"
"ARPANET", "terminal adapter"
"ARPANET", "terminate-and-stay-resident"
"ARPANET", "user agent"
"ARPANET", "user-defined function"
"ARPANET", "user-defined font"
"ARPANET", "User State Migration Tool"
"ARPANET", "unique selling point"
"ARPANET", "Microsoft Visual Basic Scripting Edition"
"ARPANET", "Version"
"ARPANET", "very-large-scale integration"
"ARPANET", "Virtual Machine Monitor"
"ARPANET", "wireless access point"
"ARPANET", "web application"
"ARPANET", "Wired for Management"
"ARQ", "Core 2"
"ARQ", "versus"
"AS", "Celsius"
"AS", "release"
"AS", "user"
"AS", "versus"
"ASAP", "Google Plus"
"ASAP", "page up"
"ASAP", "user"
"ASCII", "automatic"
"ASCII", "nanosecond"
"ASCII", "user"
"ASME", "automobile"
"ASME", "automatic"
"ASME", "loss of frame"
"ASME", "maximum"
"ASME", "minimum"
"ASME", "Titanium"


"BITNET", "Standard Parallel Port"
"BITNET", "standard input"
"BITNET", "toolkit"
"BITNET", "Turing test"
"BNC", "Boolean"
"BNC", "bitcoin"
"BNC", "function"
"BNC", "manual"
"BNC", "minimum"
"BNC", "Titanium"
"BOFH", "Boolean"
"BOINC", "Boolean"
"BOINC", "bitcoin"
"BOINC", "function"
"BOINC", "manual"
"BOINC", "minimum"
"BOINC", "Titanium"
"BOM", "Boolean"
"BOM", "maximum"
"BOM", "minimum"
"BOM", "Titanium"
"BOM", "Boolean"
"BOM", "maximum"
"BOM", "minimum"
"BOM", "Titanium"
"BOM", "Boolean"
"BOM", "maximum"
"BOM", "minimum"
"BOM", "Titanium"
"BOM", "Boolean"
"BOM", "maximum"
"BOM", "minimum"
"BOM", "Titanium"
"Bool", "Celsius"
"Bool", "level"
"Bool", "milli"
"Bool", "manual"
"Bool", "modulo"
"Bool", "pixel"
"Bool", "release"
"BPB", "opinion"
"BPB", "Tebi"
"BPDU", "core dump"
"BPDU", "opinion"
"BPDU", "Super Video"
"BPDU", "upload"
"BPDU", "upload"
"BPDU", "XP mode"
"BPL", "level"
"BPL", "milli"
"BPL", "manual"
"BPL", "modulo"
"BPL", "opinion"
"BPL", "pixel"
"BPL", "spelling"
"BPL", "t

"CentOS", "intrusion detection system"
"CentOS", "Internet Engineering Notes"
"CentOS", "International Federation for Information Processing"
"CentOS", "Internet Movie Database"
"CentOS", "International Committee for Information Technology Standards"
"CentOS", "integer"
"CentOS", "Internetworking operating system"
"CentOS", "intrusion prevention system"
"CentOS", "Internet Protocol version 4"
"CentOS", "Internet Protocol version 6"
"CentOS", "integrated software for imagers and spectrometers"
"CentOS", "one thousand"
"CentOS", "line feed"
"CentOS", "media access unit"
"CentOS", "maximum transmission unit"
"CentOS", "nanometer"
"CentOS", "picture"
"CentOS", "Pentium 2"
"CentOS", "Pantone Matching System"
"CentOS", "packet over SONET"
"CentOS", "printer"
"CentOS", "Request for Comments"
"CentOS", "Reseaux IP Europeens notes"
"CentOS", "single-density diskette"
"CentOS", "single-sided diskette"
"CentOS", "sound effects"
"CentOS", "signature"
"CentOS", "Solitaire"
"CentOS", "Tantalum"
"Cen

"CSMA/CD", "Ultimate Boot CD"
"CSMA/CD", "User Datagram Protocol"
"CSMA/CD", "universal product code"
"CSMA/CD", "Unix-to-Unix encode"
"CSMA/CD", "valve-regulated lead-acid"
"CSMA/CD", "Visual Studio"
"CSNET", "Channel Service Unit/Data Service Unit"
"CSNET", "client-to-client protocol"
"CSNET", "Clear to Send"
"CSNET", "change teletype"
"CSNET", "control unit"
"CSNET", "Drive Active Slave Present"
"CSNET", "distributed computing environment"
"CSNET", "Destination Service Access Point"
"CSNET", "drive secondary present"
"CSNET", "evaluate"
"CSNET", "favorite"
"CSNET", "function"
"CSNET", "height"
"CSNET", "kilobit"
"CSNET", "kilobit"
"CSNET", "kibibyte"
"CSNET", "logic unit"
"CSNET", "media access unit"
"CSNET", "megabit"
"CSNET", "mebibyte"
"CSNET", "maximum transmission unit"
"CSNET", "National Center for Supercomputing Applications"
"CSNET", "NOT NULL"
"CSNET", "no operatation"
"CSNET", "nanosecond"
"CSNET", "operation"
"CSNET", "open shortest path first"
"CSNET", "petabit"
"CSNET",

"DMA", "Titanium"
"DMAR", "dead on arrival"
"DMAR", "feature"
"DMAR", "nanometer"
"DMAR", "number"
"DMAR", "remark"
"DMAR", "user"
"DMI", "maximum"
"DMI", "minimum"
"DMI", "Titanium"
"DML", "dead on arrival"
"DML", "Google Mail"
"DML", "level"
"DML", "milli"
"DML", "manual"
"DML", "modulo"
"DML", "pixel"
"DML", "simulation"
"DML", "voicemail"
"DMTF", "Domain Name System"
"DMTF", "damage over time"
"DMTF", "form feed"
"DMTF", "Hertz"
"DMTF", "nanometer"
"DMTF", "number"
"DMTF", "parameter"
"DMTF", "picture"
"DMTF", "Pentium 2"
"DMTF", "remark"
"DMTF", "simulation"
"DMTF", "Solitaire"
"DMTF", "width"
"DMZ", "maximize"
"DNS", "dead on arrival"
"DNS", "In Any Case"
"DNS", "manual"
"DNS", "minimum"
"DNS", "release"
"DNS", "Titanium"
"DNS", "Version"
"DNS", "versus"
"DoCoMo", "icon"
"DoCoMo", "maximum"
"DoCoMo", "minimum"
"DoCoMo", "receive"
"DoCoMo", "Titanium"
"DoCoMo", "voicemail"
"DoS", "release"
"DoS", "user"
"DoS", "versus"
"DOS", "release"
"DOS", "user"
"DOS", "versus"
"DoT", "feature

"ESD", "password"
"ESD", "user"
"ESDI", "nanosecond"
"ESDI", "pseudocode"
"ESDI", "password"
"ESDI", "password"
"ESDI", "user"
"ESI", "release"
"ESI", "user"
"ESI", "versus"
"ESP", "Google Plus"
"ESP", "page up"
"ESP", "user"
"ESRB", "evaluate"
"ESRB", "favorite"
"ESRB", "GeForce"
"ESRB", "loss of frame"
"ESRB", "password"
"ESRB", "password"
"ESRB", "Record"
"ESRB", "remark"
"ESRB", "set user ID"
"ESRB", "user"
"ESRB", "Extended Graphics Array"
"ETA", "evaluate"
"ETA", "feature"
"ETA", "kibibit"
"ETA", "mebibit"
"ETA-I", "feature"
"ETA-I", "Not-Or"
"ETX", "feature"
"EULA", "level"
"EULA", "milli"
"EULA", "manual"
"EULA", "modulo"
"EULA", "pixel"
"EULA", "release"
"Eval", "level"
"Eval", "milli"
"Eval", "manual"
"Eval", "modulo"
"Eval", "pixel"
"EVD", "hard drive"
"EVD", "level"
"EVD", "Super Video"
"EV-DO", "favorite"
"EV-DO", "line feed"
"EV-DO", "Not-Or"
"EV-DO", "Record"
"EV-DO", "received data"
"EV-DO", "source code"
"EV-DO", "upload"
"EV-DO", "upload"
"EV-DO", "XP mode"
"EV-DO", "

"GPRS", "report distribution management system"
"GPRS", "Record"
"GPRS", "remark"
"GPRS", "Super Video Graphics Array"
"GPRS", "Super Video"
"GPRS", "Super Extended Graphics Array"
"GPRS", "Super XGA Plus"
"GPRS", "top of file"
"GPRS", "tape operating system"
"GPRS", "uppercase"
"GPRS", "upload"
"GPRS", "upload"
"GPRS", "user"
"GPRS", "versus"
"GPRS", "WordPress"
"GPRS", "XP mode"
"GPS", "opinion"
"GPS", "release"
"GPS", "uppercase"
"GPS", "versus"
"GPT", "kibibit"
"GPT", "mebibit"
"GPT", "opinion"
"GPT", "teletype"
"GPU", "GNU zip"
"GPU", "opinion"
"GPU", "page up"
"Gram", "loss of frame"
"Gram", "maximum"
"Gram", "memorandum"
"Gram", "minimum"
"Gram", "parameter"
"Gram", "Titanium"
"Gram", "versus"
"Gravatar", "GRand Unified Bootloader"
"Gravatar", "Greenwich Mean Time"
"Gravatar", "globally unique identifier"
"Gravatar", "Hercules graphics adapter"
"Gravatar", "Heal over Time"
"Gravatar", "hardware"
"Gravatar", "Hertz"
"Gravatar", "Layered Service Provider"
"Gravatar", "memory addre

"INCITS", "Windows File Protection"
"Inetd", "information"
"Inetd", "index node"
"Inetd", "integer"
"Inetd", "interrupt"
"Inetd", "Internet Obfuscated C Code Contest"
"Inetd", "input/output operations per second"
"Inetd", "Internetworking operating system"
"Inetd", "Internet Protocol"
"Inetd", "ingress protection"
"Inetd", "intellectual property"
"Inetd", "interprocess communication"
"Inetd", "interpupillary distance"
"Inetd", "initial program load"
"Inetd", "Internet Protocol next generation"
"Inetd", "intrusion prevention system"
"Inetd", "Internet Protocol security"
"Inetd", "Internet Protocol television"
"Inetd", "Internet Protocol version 4"
"Inetd", "Internet Protocol version 6"
"Inetd", "Internetwork Packet Exchange/Sequenced Packet Exchange"
"Inetd", "infrared"
"Inetd", "Internet Relay Chat"
"Inetd", "Infrared Data Association"
"Inetd", "interrecord gap"
"Inetd", "Intensity-Red-Green-Blue"
"Inetd", "interrupt request"
"Inetd", "Industry Standard Architecture"
"Inetd", "Informat

"LFAP", "level"
"LFAP", "page up"
"LFD", "level"
"LFG", "level"
"LFM", "loss of frame"
"LFM", "level"
"LFM", "maximum"
"LFM", "minimum"
"LFM", "Titanium"
"LFN", "level"
"LFN", "opinion"
"LG", "level"
"LG", "page up"
"LGA", "level"
"LGA", "page up"
"LGPL", "Logical Link Control"
"LGPL", "Loss of signal"
"LGPL", "line printer terminal"
"LGPL", "logic unit"
"LGPL", "level"
"LGPL", "megabit"
"LGPL", "megaflop"
"LGPL", "not equal"
"LGPL", "not gonna lie"
"LGPL", "NOT NULL"
"LGPL", "page up"
"LGPL", "top of file"
"LGPL", "voicemail"
"LH", "level"
"LIFO", "level"
"Li-ion", "level"
"Li-ion", "opinion"
"Li-polymer", "list server"
"Li-polymer", "loss of frame"
"Li-polymer", "line printer daemon"
"Li-polymer", "line printer terminal"
"Li-polymer", "Low Profile eXtension"
"Li-polymer", "least significant character"
"Li-polymer", "Layered Service Provider"
"Li-polymer", "non-impact printer"
"Li-polymer", "non-printing character"
"Li-polymer", "non-playable character"
"Li-polymer", "non-return-to-ze

"MTBF", "not equal"
"MTBF", "NOT NULL"
"MTBF", "Not-Or"
"MTBF", "number"
"MTBF", "out of office"
"MTBF", "petabit"
"MTBF", "petabyte"
"MTBF", "retweet"
"MVNO", "opinion"
"MVP", "page up"
"MVS", "release"
"MVS", "versus"
"MX record", "network interface card"
"MX record", "nickel-metal hydride"
"MX record", "OverDrive Processor"
"MX record", "overpowered"
"MX record", "on-screen keyboard"
"MX record", "one-time password"
"MX record", "overtype mode"
"MX record", "printed circuit board"
"MX record", "Microsoft Plus!"
"MX record", "print screen"
"MX record", "Resource Description Framework"
"MX record", "Recording Industry Association of America"
"MX record", "Super Audio CD"
"MX record", "storage area network"
"MX record", "Scroll Lock key"
"MX record", "Secure Digital High Capacity card"
"MX record", "safe for work"
"MX record", "Subscriber Identity Module card"
"MX record", "SmartMedia card"
"MX record", "Super Video CD"
"MX record", "Super Video Graphics Array"
"MX record", "Super Exte

"P-machine", "Small Scale Experimental Machine"
"P-machine", "tape archive"
"P-machine", "user-defined function"
"P-machine", "Unique Device Identification"
"P-machine", "universal Turing machine"
"P-machine", "Valve Anti-Cheat"
"P-machine", "Video Electronics Standard Association"
"P-machine", "virtual machine"
"P-machine", "web application"
"P-machine", "Windows File Protection"
"P-machine", "extended memory specification"
"PMF", "pixel"
"PMS", "pixel"
"PMS", "release"
"PMS", "versus"
"PN", "pixel"
"PN", "Titanium"
"PNG", "Pagerank"
"PNG", "pixel"
"PNG", "spelling"
"PNG", "Titanium"
"PnP", "Pagerank"
"PnP", "pixel"
"PnP", "Titanium"
"POD", "pixel"
"PoS", "pixel"
"PoS", "release"
"PoS", "user"
"PoS", "versus"
"POS", "pixel"
"POS", "release"
"POS", "user"
"POS", "versus"
"POSIX", "password"
"POSIX", "password"
"POSIX", "pixel"
"POSIX", "user"
"POST", "password"
"POST", "password"
"POST", "pixel"
"POST", "user"
"POTS", "printer"
"POTS", "pixel"
"POTS", "release"
"POTS", "set user ID"
"P

"SERP", "source code"
"SERP", "set user ID"
"SERP", "versus"
"Servlet", "single instruction, multiple data"
"Servlet", "super input/output"
"Servlet", "service-level agreement"
"Servlet", "secure login"
"Servlet", "super-large scale integration"
"Servlet", "source memory address register"
"Servlet", "server message block"
"Servlet", "storage management subsystem"
"Servlet", "Surface-mount Technology"
"Servlet", "service-oriented architecture"
"Servlet", "start of authority"
"Servlet", "silicon on insulator"
"Servlet", "super operator"
"Servlet", "source code"
"Servlet", "Serial Presence Detection"
"Servlet", "Structured Query Language"
"Servlet", "Source Service Access Point"
"Servlet", "service set identifier"
"Servlet", "Secure Sockets Layer"
"Servlet", "storage service provider"
"Servlet", "supertwisted nematic"
"Servlet", "superscript"
"Servlet", "Super XGA Plus"
"Servlet", "Universal Disk Format"
"Servlet", "unordered list"
"Servlet", "User State Migration Tool"
"Servlet", "Univer

In [262]:
b = calc_fn(match_rating_comparison)
print(b)

"AA", "Anti-alias"
##################################
"AAC", "Advanced Audio Coding"
##################################
"AAM", "autmoatic acoustic management"
##################################
"AAS", "Auto Area Segmentation"
##################################
"AAS", "as a service"
##################################
"ABC", "Atanasoff-Berry Computer"
##################################
"ABIOS", "Advanced BIOS"
##################################
"ABP", "AdBlock Plus"
##################################
"AC", "anonymous coward"
##################################
"AC", "Asheron's Call"
##################################
"AC", "alternating current"
##################################
"ACE", "access control entry"
##################################
"Ack", "acknowledgment"
##################################
"ACL", "access control list"
##################################
"ACM", "Association for Computing Machinery"
##################################
"ACPI", "Advanced Configuration and Power Inter

##################################
"mAH", "milliamp hour"
##################################
"MAPI", "Message Application Programming Interface"
##################################
"MAPS", "Mail Abuse Prevention Systems"
##################################
"MAR", "memory address register"
##################################
"Matlab", "matrix laboratory"
##################################
"MAU", "media access unit"
##################################
"mb", "motherboard"
##################################
"Mb", "megabit"
##################################
"MB", "megabyte"
##################################
"MBAM", "Malwarebytes"
##################################
"MBean", "Managed Beans"
##################################
"MBL", "microcomputer-based labs"
##################################
"Mbps", "megabits per second"
##################################
"MBps", "megabytes per second"
##################################
"MBR", "master boot record"
##################################
"MC", "mega

In [None]:
######################### Experiments with syntactic and semantic similarity  ###################################################

In [229]:
def dice_coefficient(a, b):
    """dice coefficient 2nt/(na + nb)."""
    a_bigrams = set(a.lower())
    b_bigrams = set(b.lower())
    overlap = len(a_bigrams & b_bigrams)
    return overlap * 2.0 / (len(a_bigrams) + len(b_bigrams))

In [233]:
from random import randint
random_pairs = []
for i in range (0, 10):
    rd = randint(0, len(abbr))
    abbv = abbr[rd]
    term = long_forms[rd]
    random_pairs.append([abbv, term])

for pair in random_pairs:
    print("++++++++++++++++++++++++++++++++++++++++++")
    print("\"" + pair[0] + "\", " + "\"" + pair[1] + "\"")
    for j, measure in enumerate([jellyfish.levenshtein_distance, jellyfish.damerau_levenshtein_distance, jellyfish.hamming_distance, jellyfish.jaro_similarity, jellyfish.jaro_winkler_similarity, jellyfish.match_rating_comparison, dice_coefficient]):
        print(measure(pair[0], pair[1]))
    

++++++++++++++++++++++++++++++++++++++++++
"LED monitor", "light-emitting diode"
17
17
20
0.4348484848484849
0.4348484848484849
True
0.8181818181818182
++++++++++++++++++++++++++++++++++++++++++
"Int", "integer"
5
5
5
0.6507936507936508
0.6507936507936508
True
0.6666666666666666
++++++++++++++++++++++++++++++++++++++++++
"PS/2", "Personal System/2"
13
13
16
0.4362745098039216
0.4362745098039216
True
0.4444444444444444
++++++++++++++++++++++++++++++++++++++++++
"IANA", "Internet Assigned Numbers Authority"
31
31
34
0.611904761904762
0.611904761904762
None
0.3157894736842105
++++++++++++++++++++++++++++++++++++++++++
"SMM", "System Management Mode"
19
19
21
0.5858585858585859
0.5858585858585859
None
0.3076923076923077
++++++++++++++++++++++++++++++++++++++++++
"U/L", "upload"
6
6
6
0.0
0.0
True
0.4444444444444444
++++++++++++++++++++++++++++++++++++++++++
"IAP", "Internet access provider"
23
23
23
0.4583333333333333
0.4583333333333333
None
0.375
++++++++++++++++++++++++++++++++++++++++++

In [None]:
########################### comparison on abbreviation level #######################################################

In [238]:
for k, abb in enumerate(abbr):
    abb_lower = abb.lower()
    term_lower = long_forms[k].lower()
    sanitized_abbv, sanitized_term = clear_special_characters(abb_lower, term_lower) 
    sanitized_term_without_stopswords = stop_words_handling(sanitized_term)
    initial_letters_of_tokens_of_sanitized_term_without_stopswords = ''.join([c[0] for c in sanitized_term_without_stopswords.split()])
    tmp_sum = 0
    for measure in [jellyfish.levenshtein_distance, jellyfish.damerau_levenshtein_distance, jellyfish.hamming_distance, jellyfish.jaro_similarity, jellyfish.jaro_winkler_similarity, match_rating_comparison, dice_coefficient]:
        tmp_sum = tmp_sum + measure(initial_letters_of_tokens_of_sanitized_term_without_stopswords, sanitized_abbv)
    print(tmp_sum/len(abbr))
        

0.0036192544335866814
0.002171552660152009
0.002171552660152009
0.002171552660152009
0.002171552660152009
0.0038122813367113036
0.006879168605552971
0.00660772452303397
0.0038122813367113036
0.002171552660152009
0.002171552660152009
0.002171552660152009
0.005971769815418024
0.002171552660152009
0.00437326577391724
0.002171552660152009
0.002171552660152009
0.002171552660152009
0.002171552660152009
0.002171552660152009
0.002171552660152009
0.002171552660152009
0.002171552660152009
0.002171552660152009
0.002171552660152009
0.002171552660152009
0.00437326577391724
0.002171552660152009
0.002171552660152009
0.002171552660152009
0.002171552660152009
0.002171552660152009
0.002171552660152009
0.002171552660152009
0.002171552660152009
0.008805129000568741
0.002171552660152009
0.0034382917119073473
0.002171552660152009
0.002171552660152009
0.002171552660152009
0.002171552660152009
0.002171552660152009
0.002171552660152009
0.002171552660152009
0.004916153938955242
0.0033779708046809023
0.004252623

0.004916153938955242
0.004916153938955242
0.0053384002895403545
0.002171552660152009
0.0038122813367113036
0.0038122813367113036
0.002171552660152009
0.003632180342278062
0.01651931130758492
0.00437326577391724
0.002171552660152009
0.002171552660152009
0.002171552660152009
0.002171552660152009
0.002171552660152009
0.012353628998580139
0.002171552660152009
0.005066956207021353
0.0034382917119073473
0.0038122813367113036
0.016537208719619143
0.002171552660152009
0.002171552660152009
0.002171552660152009
0.0038122813367113036
0.009009681505609844
0.016442930938044942
0.002171552660152009
0.002171552660152009
0.002171552660152009
0.002171552660152009
0.002171552660152009
0.004717956672354067
0.002171552660152009
0.002171552660152009
0.006659428157799494
0.002171552660152009
0.002171552660152009
0.002171552660152009
0.002171552660152009
0.002171552660152009
0.002171552660152009
0.0034382917119073473
0.0034382917119073473
0.0038122813367113036
0.002171552660152009
0.002171552660152009
0.0021

In [242]:
 for measure in [jellyfish.levenshtein_distance, jellyfish.damerau_levenshtein_distance, jellyfish.hamming_distance, jellyfish.jaro_similarity, jellyfish.jaro_winkler_similarity, match_rating_comparison, dice_coefficient, check_wether_abbv_is_proper_short_form_of_term]:
    tmp_sum = 0
    for k, abb in enumerate(abbr):
        abb_lower = abb.lower()
        term_lower = long_forms[k].lower()
        sanitized_abbv, sanitized_term = clear_special_characters(abb_lower, term_lower) 
        sanitized_term_without_stopswords = stop_words_handling(sanitized_term)
        initial_letters_of_tokens_of_sanitized_term_without_stopswords = ''.join([c[0] for c in sanitized_term_without_stopswords.split()])
        tmp_sum = tmp_sum + measure(initial_letters_of_tokens_of_sanitized_term_without_stopswords, sanitized_abbv)
    print(tmp_sum/len(abbr))

0.8517915309446255
0.8501628664495114
1.1346362649294246
0.895096960284256
0.8961961651972516
0.9142236699239956
0.8650353883660081
0.9272529858849077
