## Annotation ingestion notebook

Given a pair of strings representing a well formatted list of base companies and a list
of annotated peers respectively, this notebook processes them into an easy machine-understandable dictionary that can be used for evaluation.

In [1]:
from collections import defaultdict
from elasticsearch import Elasticsearch, NotFoundError
from elasticsearch_dsl import Search
import numpy as np
import ssl
from elasticsearch.connection import create_ssl_context
from elasticsearch_dsl import Q
from graphene import ObjectType, String, Int, ID, Float, List, Schema
import urllib3

urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)

HOST = "host.docker.internal"
PORT = "9200"
SSL = False
INDEX = "company"

if SSL:
    ssl_context = create_ssl_context()
    ssl_context.check_hostname = False
    ssl_context.verify_mode = ssl.CERT_NONE
else:
    ssl_context = None

client = Elasticsearch(
    hosts=[{'host': HOST, 'port': PORT}],
    indices=[INDEX],
    scheme="https" if SSL else "http",
    ssl_context=ssl_context)

In [2]:
region_to_currency = {
    "europe": "EUR",
    "asia pacific": "MYR",
}

In [3]:
import requests

headers = {
  "X-API-Key": "ONAI_API_KEY",
  "X-Stack-Host": "Development",
  "X-Request-Id": "-1",
  "X-Stack-User": "Anonymous User"
}

get_conversion_rates_query = '''
query{
  currencyConversionRates(keys:
    [
      {sourceCurrency: "MYR",
        targetCurrency:"EUR",
        spotRate: "2019-11-30"
      },
      {sourceCurrency: "USD",
        targetCurrency:"EUR",
        spotRate: "2019-11-30"
      },
    ]
  )
  {
    sourceCurrency{
      code
    }
    rate
  }
}
'''

res = requests.post('https://data-services.onai.cloud/api/', 
                            json={'query': get_conversion_rates_query}, 
                            headers=headers).json()['data']['currencyConversionRates']
conversion_rates_to_eur = {
    el["sourceCurrency"]["code"]: el["rate"]
    for el in res
}

In [4]:
def convert_number(val, currency):
    numb = {"b":1e9, "m":1e6, "k": 1e3}
    if currency == "EUR":
        revenue_min,revenue_max = [numb[e.strip()[-1]]*float(e.strip()[:-1]) 
                                           for e in val.lower().split("-")
                                          ]
        return {"min":revenue_min, "max":revenue_max}
    if currency == "MYR":
        el = val.strip("MYR").strip("n").strip()
        no = numb[el[-1]]*float(el[:-1]) * conversion_rates_to_eur["MYR"]
        return {"min": no/10, "max": no*10}
    if currency == "USD":
        revenue_min,revenue_max = [numb[e.strip()[-1]]*float(e.strip()[:-1]) 
                                           for e in val.lower().split("-")
                                          ]
        return {"min":revenue_min * conversion_rates_to_eur["USD"], 
                "max":revenue_max* conversion_rates_to_eur["USD"]}

In [5]:
base_companies_str = '''
1	Gebrüder Kofler GmbH ML Test	Gebrüder Kofler GmbH (henceforth also referred as ‘Kofler’ or ‘Borrower’ in this report) was established in 1965 as a supplier of fresh fruits and vegetables to Tyrol’s catering industry by the second generation of Kofler family. It is currently managed by the third generation of Kofler family and the business is headquartered in Landeck (Austria). It supplies fresh and frozen food products to hotel and catering industry (also known as gastronomy sector) in the Tyrol state of Austria.	Credit Paper	Europe	2.08m-208m 	Food and Beverage Distribution		
2	HAZET Bauunternehmung GmbH ML Test	HAZET Bauunternehmung GmbH provides construction and restructuring services. The company was founded in 1997 and is based in Vienna, Austria. As of July 15, 2013, HAZET Bauunternehmung GmbH operates as a subsidiary of Bauservice-Fuhs Gesellschaft M.B.H.\	Credit Paper	Europe	9.75m-975m	Construction of Residential and Non- Residential Buildings		
3	"Holmes Place Wien GmbH ML Test"	Headquartered in Vienna (Austria), Holmes Place Wien GmbH (“Holmes Place” or “Company”) is part of Holmes Place Group which runs international chain of health clubs and operates 80 clubs in eight countries spread across Europe and Israel. The group has patronage of ~290,000 members. Holmes Place Group is owned by Fisher & Kirsh families. ▪ Holmes Place which started with one club in December 2000, currently operates four luxury health clubs in Vienna, Austria. Clubs are located in Wipplingerstrasse, Hütteldorfer Straße, Wehlistraße & Wagramer Str. Holmes Place clubs in Vienna offer various facilities including sports, pool, fitness, spa, Yoga, aerobics, wellness services and personal training.	Credit Paper	Europe	1.46m-146m	Fitness Centres		
4	Innofreight Speditions GmbH ML Test	Formed in 2002, Innofreight Group (“IF Group” or “the Group”) is an Austria-based integrated rail logistics systems provider. It manufactures, sells and leases wagons, containers, pallets, and unloading systems. It develops innovative wagons, containers, and unloading systems in cooperation with customers for industries like steel, energy, timber, building material, agriculture, and fluids. It moves a million container loads a year using 12,000 containers, 1,200 InnoWaggons, 150 block trains, 58 forklifts, and 6 unloading stations. The Group is a technology leader as reflected in its innovative products, such as ultra-light, multipurpose container waggon, the InnoWaggon. With its ITECCO1 program, it proposes to achieve leadership in Steel industry material transport and unloading systems.	Credit Paper	Europe	7.83m-783m	Transport / Rail Logistics		
5	"Interseroh Austria GmbH ML Test"	Interseroh Austria GmbH (“ISA” or “the Company”), incorporated in 1987 and based in Austria, operates waste collection and recycling system (translating to ‘Sammel und Verwertungssysteme in German’ or SVS) for packaging, electrical and electronic equipment and batteries throughout Austria on behalf of its customers. In addition, it also offers consulting solutions in waste management and undertakes waste bin product sales. ISA also acts as the holding company for the Groups non-German operations. ISA is part of the Interseroh group (Parent) which has a strong presence in Germany. Interseroh group is held by Germany based Alba group and China based Techcent group which are large players in waste management.	Credit Paper	Europe	2.39m-239m	Waste Management		
6	ISTAC Promotion GmbH	ISTAC Service GmbH (ISG), established in October 2017, is engaged in providing office supplies to banks and financial institutions. ISG is wholly owned by ISTAC Promotion GmbH (IPG). IPG undertakes orders for customized promotional products and gifts. ISG and IPG are together referred to as ISTAC Group (‘Group’ or ‘ISTAC’) and are based out of Pasching, Austria. The Group is a full-service promotional product and office supplies provider offering wide range of products with services from design to order to storage to worldwide delivery. ▪ The Group has a showroom in Pasching and an online web-shop to showcase the products on offer. It also offers its clients customized web-shops showing promotional products to employees, customers and sales partners.	Credit Paper	Europe	1m-51m	Promotional Products and Office Supplies		
7	Molin Industrie Inbetriebnahme & Montage GesmbH & Co KG ML Test	Molin-Industrie-Inbetriebnahme-Montage-Gesellschaft mbH & Co KG (“Molin”, “Company”) was established in 1983 in Linz, Austria. Ranked among the top 10 HVAC service providers in Austria, it is involved in the business of planning, installation, start-up and servicing & maintenance for building services and industrial plant engineering. Its client list includes market leaders from various sectors like BMW, Shell, BP, Opel, Spar, Aldi etc. ▪ Heard-quartered in Wels, Molin has branch offices in Vienna, Spillern and Hartberg in Austria. Majority of revenue comes from Austria (70%+) while the rest comes from overseas, primarily Germany. It had operations in other countries in Central and Eastern European, however, foreign subsidiaries have been closed in 2017 and 2018 in order to streamline the business. Further closure of subsidiaries in Germany and Romania is planned by 2019. ▪ Molin underwent major restructuring drive during 2017 and 2018. In addition to shutting down of foreign subsidiaries, certain business division and investment from one of the shareholders was merged into Molin.	Credit Paper	Europe	5.29m-529m	Heating, Ventilation and Air Conditioning		
8	Alpin Gastronomie GmbH ML Test	Incorporated in 2010, Alpin Gastronomie GmbH (“AlpinGast” or “the Company” or “the Borrower”) operates 6 restaurants of L’ Osteria under franchise model, owned and managed by Ms. Maria Klara Heinritzi, daughter of Mr. Micheal Heinritzi. Ms. Heinritzi also owns 2 other companies i.e. Alpin Restaurant GmbH (AlpinRest) and F.u.B. Alpin GmbH (FUB), which also operate restaurants of L’Osteria. As on Dec 2018, the 3 companies together (hereinafter referred to as “the group”) operate 13 restaurants throughout Austria. With all the restaurants on lease, the companies operate with asset-light model and differ only by location assignment. In FY19, the group has started the 14th restaurant in Austria. She also holds 50% stake in JV Alpin PMK Limited, UK which operates one L’Osteria restaurant in UK.	Credit Paper	Europe	1.64m-164m 	Restaurant		
9	Akatronik GmbH ML Test	Not found	Not found	Not found	Not found	Not found		
10	Backwelt Pilz GmbH	Backwelt Pilz GmbH (“Backwelt”) is a Lower-Austria based industrial producer of pre-baked and deep-frozen pastry and bread products. Its products enjoy premium pricing owing to high-quality, backed by flexible and efficient technology-based machinery. Founded in 2002 by Johannes Pilz senior and his son DI Johannes Pilz junior (Jr), Backwelt was one of the first in Austria to offer deep-frozen, pre-baked (80% baked) bread. While Pilz Jr is a trained food biologist and act as Managing Director with 75.5% stake, his father had a small, third generation bakery in Gmünd, its history dating to 1904.	Credit Paper	Europe	1.73m-173m 	Food and Beverages		
11	BHK Bau ML Test	Founded in 2002, BM BHK Bau GmbH (“BMBHK” or “the Company” or “the Borrower”) is a subcontractor in the construction services industry. Company undertakes activities ranging from construction of buildings, Civil Engineering, design and construction of Green and Leisure facilities, Infrastructure services and Traffic Routes.It also provides labour and supervisory personnel for various construction activities. Since fag end of FY17, the company is also taking up self-direct projects for construction of residential projects.	Credit Paper	Europe	5.68m-568m 	Construction Services		
12	Biomay AG ML Test	Biomay Produktions- und Handels AG develops pharmaceutical products and technology for allergies. It develops bio-compatible method of building up allergen tolerance. Its system involves covalently bonding the allergen extracts to carbohydrate beads such as polyarylamide, vinyl polymer, dextran or agarose beads. Biomay Produktions- und Handels AG is based in Vienna, Austria.	Credit Paper	Europe	1.40m-140m 	Pharmaceutical R&D and Contract Manufacturing		
13	Compact-Electric GmbH ML Test	Compact-Electric GmbH (CE) is a Vienna based electrical equipment (EE) and electronics manufacturing company with a vintage of more than 50 years. CE is run by Ms. Ulrike Kellner Haslauer (UH), Managing Director (MD) and majority shareholder (76%). UH has been associated with CE for the last 29 years. CE’s products have applications in Heating Ventilation and Air Conditioning industry, commercial construction, Infrastructure, smaller power plants, automation solutions and various other industries.	Credit Paper	Europe	1m-790m 	Electrical Equipment		
14	Faber GmbH ML Test	Faber Gmbh imports and distributes scooters and motorcycles. The company was founded in 1948 and is based in Vienna, Austria.	Credit Paper	Europe	3.63m-363m 	Automotive two-wheeler distributor		
15	Mona Naturprodukte GmbH	Founded in 2003, Mona Naturprodukte GmbH (“Mona” or “the Company” or “the Borrower”) is a subsidiary of Hain Celestial Group. It sells and distributes soy, oats and other plant-based drinks and dairy alternative products - known as plant-based dairy alternatives (PBDA) including organic products - under the ‘Joya’, ‘Dream’, ‘Happy’ brands as well as to many private label brands. The Company also distributes Hain Celestial grocery brands, including ‘Tilda Basmati rice’, ‘Terra premium vegetable chips’ and ‘Celestial seasoning tea’. The products are exported to over 30 countries in Europe while Austria and Germany are the major markets. The company reported revenues of €87.6Mn for FY18 and an EBITDA margin of 5.6%. The Company has reported a net cash position in FY18. However, adjusted for contingent liabilities and operating leases, adjusted net leverage increased sharply to 4.5x. With capacity expansion, the company is expected to generate sufficient cash flows to service its proposed borrowings.	Credit Paper	Europe	8.7m-877m 	Food and Beverage		
16	Rienhoff GmbH	Founded in 1982 by Udo Rienhoff (present CEO) and based in Vienna, Austria, Rienhoff GmbH (“Rienhoff” or “the Company” or “the Borrower”) is primarily into car renting business. For over 35 years, Rienhoff is operating as master-franchisee of one of the world’s largest car rental Company “Hertz” in Austria with addition in Slovenia in 2013. The vehicle inventory ranges from small cars to luxury sedans. Rienhoff started rental of classic /vintage cars under “Hertz Classics” brand and low-cost vehicle rental “Dollar / Trifty” brands both in year 2014. Rienhoff has wide network of 19 airport and off-airport locations in Austria and 1 in Slovenia.	Credit Paper	Europe	6.9m-669m 	Car Rental		
17	SAEXINGER Gesmbh	"Headquartered in Vienna-Leising (Austria), Saexinger GesmbH (“Saexinger”) is a family-owned dangerous goods logistics company. Saexinger GesmbH and Böntner Holding GmbH together would be referred to as ’Group’ or ’Böntner Group’ or ’Consolidated Group’.  The history of the company traces back to 1817, however, in its present form it was established in 1976 after the erstwhile company was taken over by Böntner Family. The company offers a range of services including storage of all dangerous goods classes (except explosive and radioactive substances), custom packaging, labelling, transport and related logistics solutions, and storage and transport of temperature-sensitive goods. Saexinger serves a wide range of end-industries including chemicals, detergents, pesticides, paints and varnishes, engine fuels, cosmetics, batteries, pharmaceutical products, among others."	Credit Paper	Europe	570K-57m	Logistics of Dangerous Goods		
18	Sanatorium Liebhartstal Formanek GmbH	Sanatorium Liebhartstal Formanek GmbH (“SanLieb”, “Company”) is a medical care facility based out of Vienna. ▪ SanLieb has 70 rooms with a capacity of 119 beds, covering a total area of 9,000m2, of which 6,000m2 comprises of two buildings while rest 3,000m2 is vacant parcel of land available for future expansion. The entire facility is located at Kollburggasse 6-10, Starkenburggasse 44, A-1160 Vienna	Credit Paper	Europe	440K-44m	Healthcare Hospital		
19	SPS Beteiligungs und Management GmbH	Vienna-based SPS Beteiligungs und Management GmbH (“SPS B&M” or “the Company” or “the Borrower”) is a leading international technology company, that designs, develops, manufactures and markets speech processing devices and solutions. It offers entire speech-to-text solutions, including hardware, software, and services - all from a single source, for various industries and application areas. It was the pioneer of professional dictation products and speech recognition technologies. Its niche product developments include cloud-based dictation system, mobile devices, USB dictation microphones and the dictation recorder app for smartphones. It largely caters to medical (40%) and legal sectors (35%) and rest is distributed among banks, insurance companies, civil engineers and police.	Credit Paper	Europe	5.4m-544m	Technology based speech Solutions		
20	WERNA Beteiligungs GmbH	Formed in August 2019, WERNA Beteiligungs GmbH (WERNA Beteiligungs) is an investment holding company. Its ultimate parent, WERNA Pte. Ltd. (WERNA) is a Singapore based, management consultant and investment management company founded in 2012. WERNA has real-estate presence in Austria with two other subsidiaries, (a) WERNA Linzerstrasse47 GmbH, which is in liquidation, post completion of development and resale of a property in Vienna, and (b) WERNA Real Estate GmbH, which operates serviced apartments in Vienna, via couple of owned and a rented place.	Credit Paper	Europe	1m-130m	Investment Holding in Real Estate		
21	Wiener Privatklinik Betriebs GmbH & Co KG	Wiener Privatklinik Betriebs GmbH & Co KG (‘WPK’) operates a 145-bed private hospital in Vienna, with vintage of more than 140 years. It offers multi-speciality services across Oncology, General Surgery, Sports and Trauma Surgery, Cardiology (Internal Medicine), Orthopaedics, Physiotherapy and Medical recovery services. WPK is held 100% by Wiener Privatklinik Holding AG (‘WPK Holding’, ultimate parent entity of the Group). WPK Holding and its subsidiaries (together ‘WPK Group’) render primary & secondary hospital care as well as retail & whole sale of drugs across Austria, Czech Republic and Romania. Dr. Walter EBM is the CEO, Managing Director and majority stake holder (c. 40%) in WPK Group. All the doctors who head various critical departments also have stake in WPK Group. In the recent past, Dr. Walter has been increasing his stake in the Group through share purchase from existing shareholders.	Credit Paper	Europe	5.63m-563m	Healthcare Hospital		
22	American Bank Note Company	Engaged in manufacture of plastic cards and provides related services. Primary products include EMV and mag stripe financial cards (debit and credit cards), identification and secure access cards (drivers' licenses, national ID cards for governments, hotel and facility entry cards), commercial plastic cards (gift, loyalty and membership), among others.	Credit Paper	United States	7.8m-780m 	Laminated Plastics Manufacturing		
23	Avedis ZildJian Co	Largest cymbals and drumstick maker in the world. The Company sells cymbals, drumsticks, percussion mallets and other drum accessories under the Zildjian, Vic Firth (acquired in 2010) and Balter Mallet brands (acquired in 2018).	Credit Paper	Europe, Africa / Middle East, United States and Canada, Latin America and Caribbean, Asia / Pacific	6.7m-670m 	Musical Instruments Manufacturing		
24	Cheer Pack North America, LLC	Engaged in manufacturing spout pouches which is flexible packaging product like drum liners, pails, lids which is having diversified industry application namely food and beverage industry such as dairy food, baby food, pet food, sauces, energy drinks, alcohol etc and non food segment namely cosmetics, household cleaners, personal care etc.	Credit Paper	United States	12.5m-1.25b	Packaging Products		
25	DN Tanks, Inc	Manufacturer of circular pre-stressed concrete tanks for liquid storage including potable water, wastewater, storm water, thermal energy, biofuels, etc. DNT’s primary customers are municipalities, local water authorities and other public agencies located primarily in the US. It also caters to the requirements of educational institutions, hospitals, airports, power plants, schools and government/military.	Credit Paper	United States, Canada, Europe, Asia-Pacific, Africa / Middle East	15.8m-1.58b	Other Concrete Product Manufacturing		
26	Esler Companies LLC	Founded in 2004 and headquartered in Smithfield, Rhode Island, Esler Companies, LLC (“Esler” or the “Company”) provides replacement of windows and doors and is the largest Renewal by Andersen (“RbA”) dealer in the US. RbA is the window replacement subsidiary of Andersen Corporation. RbA’s business includes manufacturing, installation and associated services of remodeled windows, primarily for home consumption. The company leverages dealership model wherein it uses services of franchises and specialist dealers like Esler to offer its window replacement services.	Credit Paper	United States	34.9m-3.49b	Residential Remodelers		
27	F.B. Packing Company Inc	Incorporated in 1990 by Frank Bertolino Sr as a means of diversification, is the sole remaining entity of the group that also included Frank Bertolino Beef Company Inc. (“FBB”). FBB was incorporated in 1967 and was a wholesaler of certified Angus beef products . However, it was shut down in 2012 due to accounting irregularities perpetrated by Frank Bertolino Jr. FBP is currently managed by Leo Bertolino, Frank Sr.’s eldest son. The company sources, processes and distributes beef, lamb, veal and other complementary products in the New England region of the US. FBP’s primary end customers include supermarkets, departmental stores, restaurant chains, and also other small meat wholesalers. It currently operates from New Boston Food Market (“Foodmart”; a food market complex in South Boston) and plans to relocate to a new larger facility in Peabody, MA next year.	Credit Paper	United States	12.5m-1.25b	Meat and Meat Product Merchant Wholesalers 		
28	Feeney Brothers Excavation LLC	Headquartered in Dorchester, MA, Feeney Brothers Excavation LLC (“FBE”) is a utility services provider engaged in constructing city gas distribution systems including excavation and laying of gas pipelines, underground electric civil work, civil work for telecom industry, installation/ maintenance and repair of steam lines. It operates principally in eastern Massachusetts and also throughout the New England region. It was founded in 1988 by Brendan and Greg Feeney (“Feeney brothers”) as ‘Feeney Brothers Excavation Corporation’. In 2012, the Feeney brothers sold 60% stake in the Company to CAI Private Equity (CAI) for $44Mn and was converted into a Limited Liability Company (LLC).	Credit Paper	United States	31.5m-3.15b	Oil and Gas Pipeline and Related Structures Construction 		
29	Rand Whitney Container LLc	Incorporated in 1938, is engaged primarily in manufacturing and distribution of corrugated packing products for medium scale corporates in the US. The Company’s products find application across sectors including e-commerce, food & beverages (F&B), consumer electronics, original equipment manufacturing (OEMs), Industrial goods, and aerospace and military. Apart from organic expansion, RWC has strengthened its business through strategic acquisitions and joint ventures. RWC has manufacturing facilities at New Hampshire (NH); Connecticut (CT); and Massachusetts (MA) and operates primarily in the New England region. The Company is a part of the Kraft Group (the Group) which has business interests in sports & entertainment, real estate & development and private equity investments along with packaging solutions.	Credit Paper	United States	16.5m-1.65b	Corrugated and Solid Fiber Box Manufacturing		
30	Symmons Industries Inc	Symmons Industries, Inc. (“Symmons” or the “Company”) was incorporated in 1939 by Paul C. Symmons as a plumbing valve manufacturer, which later diversified into kitchen and bath products. Its current product portfolio comprises trademarked pressure balancing valve system, single and two-handle faucets, water temperature control valves, and other plumbing products primarily serving the residential and commercial (particularly Healthcare and Hospitality) industries. Based out of Braintree, MA, the company caters to customers in the US, Canada, Mexico, Puerto Rico, the Caribbean, the UK and internationally. Currently managed by Tim O’ Keefe (Paul’s grandson and CEO), the company is privately owned by 3rd generation family owners Paula O’Keefe, Tim O’Keefe, and Ian O’Keefe.	Credit Paper	United States	9m-900m	Plumbing Fixture Fitting and Trim Manufacturing		
31	Leong Hup (Malaysia)	Leong Hup (Malaysia) Sdn. Bhd. (“LHM” or “the company” or “the borrower”) is one of the largest fully integrated producers of poultry, eggs and livestock feeds in Malaysia. The company is a wholly-owned subsidiary of Leong Hup International (LHI), which is a leading poultry producer in ASEAN countries and is controlled by the Lau Family. Parent entity LHI was established in 1978 in Malaysia and was subsequently expanded to Indonesia, Vietnam, Philippines, Singapore, and more recently into Cambodia. LHM operates the Malaysian business of the larger group and is a strategically important subsidiary (~30% of group revenues). 	Not found	Asia Pacific	MYR 1905mn	Integrated Poultry Farming		
32	Mine Logistics Sdn Bhd	Founded in 2009 by Mr. Dereck Ng and Mr. Hong Yeow, Mine Logistics Sdn. Bhd. (‘Minelog’ or ‘the Company’ or ‘the Borrower’) is a Malaysia-based integrated logistics solution provider. The Company offers one-stop logistics solutions like sea freight, air freight, haulage, forwarding, warehousing, break bulk, cargo handling and marine insurance. Majority (~90%) of Company’s revenue is generated from their freight segment catering mostly to Timber industry. In addition, the Company also provide cutting, stacking, bundling, painting, stencil MTIB and fumigation services as value-added services. With a fleet of 25 prime movers, 5 side loaders and 95 trailers, Minelog offers an extensive infrastructure in land transportation, through 100+ employees. Mine Warehouse Sdn. Bhd., an entity owned by one of the directors, owns two warehouses located in North Port (60k sq. ft) and Telok Gong (25k sq. ft) catering largely to the timber industry in peninsular region. The Company is a member of WCA (world’s largest and influential network of independent freight forwarders), the Selangor Freight Forwarders & Logistics Association (SFFLA) and the Federation of Malaysian Freight Forwarding (FMFF). In 2016, the Company was granted pioneer status for Integrated Logistics Services (ILS) by the Ministry of International Trade and Industry. The Company has obtained IILS (International Integrated Logistics Services) status with 6,225 agent offices in 793 cities across 187 countries. The company bagged various awards including SME 100 Fast Moving Companies Award 2013, Asia Pacific Excellence Service Award 2014, Golden Eagle Award Excellent Eagles 2014, Best Brands in Logistics – Freight Forwarding 2015-16 by SMES BestBrands Laureate. In 2016, Mr. Dereck Ng ventured into an online business-to-business (B2B) platform service (Minebizs.com) for Malaysia-based SME exporters connecting them with ASEAN importers. Also, Minelog is planning to expand their geographical presence by entering into Chinese market in collaboration with a Chinese partner (active in the supply chain management) to provide haulage and other logistics services. Further, in an interview1 from November 2016, Mr. Dereck Ng had expressed interest in venturing into business-to-consumer (B2C) E-Commerce Logistics space providing parcel delivery services throughout the ASEAN region by 2018. Progress on this is unknown. Minelog aims for listing in 2020. 	Not found	Asia Pacific	MYR 66mn	Logistics		
33	PKT Logistics	Founded in 1974 by Datuk Tio Sook Keo (Tio Senior), PKT Logistics Group Sdn Bhd (PKTG) is a Malaysia-based logistics solution provider and auto parts retailer. ~58% of the Group revenue comes from sale of automotive parts, handled by wholly owned subsidiary, Orisis Freight Logistics (M) Sdn Bhd (Orisis). PKTG’s operations further include end-to-end design, implementation, and operation of logistics solutions in freight forwarding, customs brokering, contract logistics, haulage, warehousing, and distribution management through Orisis as well as another subsidiary PKT Logistics (M) Sdn Bhd (PKTM). PKTM accounts for 31% of FY18 revenues, 74% assets, and 99% debt at PKTG and Orisis contributes 69% revenue with almost nil debt. Tio Senior was joined by Datuk Michael Tio (Tio Junior) in 1996. Post Tio Junior coming onboard the focus of the Company was shifted to automotive logistics in Malaysia since 2001. Apart from auto logistics, PKTG caters to Fast Moving Consumer Goods (FMCG), Food & Beverage (F&B), and renewable energy sectors. Majority of PKTG’s operations are in Malaysia. It also has some presence in Thailand, Vietnam, Indonesia, Cambodia, Laos, Taiwan, India and South Korea. By 2020, PKTG had aimed to achieve (a) 60:40 contribution between automotive and non-automotive segments (vs. >80% auto contribution in 2016), (b) 70:30 contribution by domestic and international revenue (Malaysia contributed >70% as of Sep 2016), (c) Warehouse capacity of 5Mn sq.ft. in Asia (1.2Mn sq.ft. at FY18), and (d) revenue of MYR1Bn (MYR0.7Mn in FY18). However, given the performance till date, it is unlikely that the Company will hit these targets by next year. 	 Not found	Asia Pacific	MYR 693mn	Logistics		
34	Syarikat Logistik Petikemas SDN BHD	Syarikat Logistik Petikemas Sdn. Bhd. (‘Petikemas’ or ‘the Group’) is an integrated logistics Company, that offers warehousing, transportation, freight forwarding, customs clearance, and ancillary services. Its operations are largely based in Malaysia, with some presence in Thailand through a JV (49% stake) with ThaiReefer Group. It operates ~200 ISO tanks and over 1000 trucks/haulers, including cold-stored/ refrigerated containers, through 1100 employees. The Group has seven warehouses (six of which are rented) located in Selangor, Pahang and Johor, with a total area of 1.6Mn sq.ft., and additional two warehouses of 1.3Mn sq.ft capacity are under construction—expected to begin operations by February 2021. Petikemas largely caters to Chemicals and FMCG industries. Incorporated in 1995, Petikemas commenced its business only in 1999 as a small forwarding Company and got into aggressive expansion in 2003 to become a full-fledged logistic service provider. In 2015, Malaysian Investment Development Authority (MIDA) approved the Group’s expansion projects including specialized warehousing services in Port Klang Free Zone (PKFZ) and Westports, and addition of 266 more commercial vehicles to its fleet. PKFZ operations entail Investment Tax Allowance. That apart, Petikemas was the first Malaysian Company to adopt automated bagging system technology to store resin polymer used in petrochemical and polymer industries. In FY19 ending January 31, 2019, Petikemas posted a revenue of MYR263Mn. 	Not found	Asia Pacific	MYR 263mn	Logistics		
35	Teo Seng Cap Bhd	Teo Seng Capital Berhad (Teo Seng or TSCB or the Borrower) is an integrated egg producer and one of the largest layer farmers in Malaysia. With revenues of ~MYR490Mn, it is an investment holding company and primary activities of its subsidiaries include egg production, manufacturing of paper egg trays, production of animal feeds, production of organic fertilizer using chicken manure and trading and distribution of animal health products. In 2008, the company was listed on the Bursa Securities Malaysia and has a current market cap of MYR339Mn as at 01 Oct 2019. TSCB currently operates 25 farms (categorised into brooding, pullet and layer farms) in Johor, Malaysia. Out of the 16 layer farms it owns, eight are accredited and granted export license by the Agri-Food and Veterinary Authority of Singapore (AVA). It has adopted the “all-in-all-out” and “closed-house” rearing system to reduce the risk of disease infection amongst its chickens. As of FY18, it has a daily egg production of 3.8Mn eggs, of which ~40% is exported overseas, mainly Singapore and Hong Kong. The Group plans to expand its production to 5Mn eggs per day by FY22. 	Not found	Asia Pacific	MYR 429mn	Integrated Poultry/ Farming		
'''

In [6]:
annotated_peers_str = '''
1	Gebrüder Woerle Ges.m.b.H	Positive
1	Gebrüder Haider Bauunternehmung GmbH	Negative 
1	Cobral Sarl	Negative 
1	Culinor NV	Negative 
1	Gastina GmbH	Negative 
1	Peckham & Rye Ltd.	Negative 
1	Bonfait B.V.	Positive
1	Total Gas & Power Limited	Negative 
1	Reynolds Catering Supplies Limited	Positive
1	Dominioni Punto & Pasta Srl	Negative 
2	HERZOG BAU GesmbH	Positive
2	Kelly Gesellschaft m.b.H	Negative 
2	TNT Express (Austria) Gesellschaft M.B.H.	Negative 
2	IBM Osterreich Internationale Bueromaschinen Gesellschaft m.b.H	Negative 
2	Straka Bau GmbH	Positive
2	Madaus Gesellschaft m.b.H.	Negative 
2	Verkehrsverbund Ost-Region (VOR) Gesellschaft M.B.H.	Negative 
2	Bank Austria Leasing Ikarus Immobilien Leasing Gesellschaft m.b.h.	Negative 
2	Unicredit Kfz Leasing Gmbh	Negative 
3	ELIXIA Austria GmbH	Positive
3	Holmes Place Health Clubs Ltd.	Positive
3	Holmes Place Health Clubs GmbH	Positive
3	Vienna Airport Business Park Immobilienbesitzgesellschaft M.B.H.	Negative 
3	Unilever BCS Austria GmbH	Negative 
3	Reckitt Benckiser Austria GmbH	Negative 
3	Brenntag Austria Holding Gmbh	Negative 
3	Iglo Austria Holding GmbH	Negative 
3	Novelis Deutschland Gmbh (Austria)	Negative 
3	MFC Holding Austria GmbH	Negative 
4	HLA Rosshafen Terminal GmbH	Negative 
4	Lormafer S.A.	Negative 
4	Pegas Container, s.r.o.	Negative 
4	Titagarh Wagons AFR S.A.	Negative 
4	CHS Container Handel GmbH	Negative 
4	Cocomat Holdings Limited	Negative 
4	Torgovyi Dom Soyuz Spets Sbyt	Neutral
4	ILAB Container AB	Neutral
4	On Rail Gesellschaft für Eisenbahnausrüstung und Zubehör mbH	Negative 
4	UCON AG Containersysteme KG	Neutral
5	ITERSEROH Jade-Stahl GmbH	Positive
5	INTERSEROH Scrap and Metals Trading GmbH	Neutral
5	INTERSEROH MAB Ost GmbH	Positive
5	Interseroh NRW GmbH	Positive
5	ALBA Servicios Verdes, S.L.	Neutral
5	INTERSEROH Hansa Finance GmbH	Negative 
5	ALBA plc & Co. KGaA	Positive
5	Hidroplasto Srl	Negative 
5	Northgate Information Solutions Limited	Negative 
5	ATON-HT S.A.	Neutral
6	ISG Pearce Limited	Negative 
6	Formula A/S	Positive
6	Norwood Promotional Products Europe SLU	Neutral
6	Sappi Finland Operations Oy	Negative 
6	Ocay Sverige I AB	Positive
6	Buffetti Group SpA	Positive
6	Whitegrove Group Limited	Positive
6	Lyreco Finland Oy	Positive
6	Dag Aasboe Travel As	Negative 
6	Media Partners Group B.V.	Negative 
7	SIGMA PLUS d.o.o.	Negative 
7	YIT Austria GmbH	Positive
7	Ortner Ges.M.B.H.	Neutral
7	Proenergy Contracting Gmbh	Positive
7	Twinputki Oy	Negative 
7	Luzian Bouvier Haustechnik & Fliesen Gmbh	Positive
7	KGT Gebäudetechnik GmbH	Neutral
7	ART-RASVJETA d.o.o.	Negative 
7	Kristl, Seibt & Co. Gesellschaft M.B.H.	Neutral
7	Daikin Airconditioning Central Europe HandelsGmbH	Positive
8	Alpin Spedition GmbH	Negative 
8	Saturn Electro-Handelsges.M.B.H. (Graz)	Negative 
8	Wienerwald Restaurants GmbH	Positive
8	Rosenberger Restaurant GmbH	Positive
8	Modelleisenbahn GmbH	Negative 
8	Diedamskopf Alpin Tourismus GmbH & CO KG	Neutral
8	Office Tower-IZD GmbH	Negative 
8	Modelleisenbahn Holding GmbH	Negative 
8	IGT Austria GmbH	Negative 
8	MAHLE Holding Austria GmbH	Negative 
10	Fournoi Attikis S.A.	Neutral
10	Pastisart Sa	Neutral
10	Stauffenberg GmbH & Co. KG	Neutral
10	Atlantic Pan S.L.	Neutral
10	Erlenbacher Backwaren GmbH	Neutral
10	Panaderias Nuevas De Santander Sl	Neutral
10	Minit Slovakia, s.r.o.	Neutral
10	Panamar Panaderos S.L.	Neutral
10	Lantmannen Unibake Sweden AB	Neutral
10	Grain D'Or Gel-Grain D'Or Frais SAS	Neutral
11	Jäger Bau GmbH	Positive
11	NORD-BAU GmbH & Co. KG	Neutral
11	Adelso Entreprenad & Transport AB	Neutral
11	Dreßler Bau GmbH	Neutral
11	Universale Bau Gmbh	Positive
11	Porr Bau GmbH	Neutral
11	ASFINAG Bau Management GmbH	Neutral
11	DURST-BAU GmbH	Negative 
11	Universale-Bau AG	Neutral
11	MAUSS BAU GmbH & Co. KG	Neutral
12	Biomay Produktions- und Handels AG	Positive
12	TPH Teppich-Produktions- und Handels-GmbH	Negative 
12	Colloseum Handels- Und Beteiligung Gmbh & Co. KG	Negative 
12	Aibler Fleisch- und Wurstwaren Produktions AG	Negative 
12	HIESTAND & Suhr Handels- und Logistik GmbH	Negative 
12	Ktk Getreidelager Und Handels Ag - Ktk Elevator And Trading	Negative 
12	Raiffeisen Uckermark Handels- und Dienstleistungs GmbH & Co. KG	Negative 
12	Nordpfeil GmbH	Negative 
12	Emporia Telecom Produktions- und Vertriebs- GmbH & Co. KG.	Negative 
12	TAG Immobilien AG	Negative 
13	Suomen Kiinnikekeskus Oy	Negative 
13	ITS, a.s.	Negative 
14	Faber Gmbh	Neutral
14	Nipponia S.A.	Negative 
14	Kolarik & Leeb GmbH	Negative 
14	Toyota Austria GmbH	Positive
14	Pharm-Allergan GmbH (Austria)	Negative 
14	MFC Holding Austria GmbH	Negative 
14	Novelis Deutschland Gmbh (Austria)	Negative 
14	Brenntag Austria Holding Gmbh	Negative 
14	Iglo Austria Holding GmbH	Negative 
14	Samsung Electronics Austria GmbH	Negative 
15	Mona Oberwart Produktions Gmbh	Neutral
15	Hain Celestial Ireland Limited	Neutral
15	Hain Celestial UK Limited	Neutral
15	Hain Frozen Foods UK Limited	Neutral
15	HC Holding BVBA	Negative 
15	Mona Naturprodukte GmbH	Neutral
15	Histon Sweet Spreads Limited	Neutral
15	Haldane Foods Ltd.	Neutral
15	Daniels Chilled Foods Limited	Neutral
15	Danival SAS	Neutral
16	Hertz UK Ltd	Positive
16	City Group Inter Rent PLC	Positive
16	Leaseway Vehicle Rental Limited	Neutral
16	Transports Grimaud 86	Positive
16	Easy Car Italia Srl	Neutral
16	Bil & Utstyrsutleie As	Neutral
16	Satal Exploitation	Positive
16	Easy Lease (Uk) Limited	Positive
16	Services Plus	Positive
16	Xlcr Vehicle Management Ltd	Positive
17	xpress Ltd.	Negative 
17	Transgarant	Negative 
17	Tempus Link Eood	Positive
17	Skanol Norge As	Positive
17	Tecnisample Sl	Positive
17	Finnforest Österreich GesmbH	Negative 
17	Unique Forwarding Ltd	Neutral
17	Havi Logistics Srl	Negative 
17	Sanova Pharma GesmbH	Negative 
17	Simtex International Limited	Positive
18	Nemocnice na Plesi, s.r.o.	Positive
18	United Parcel Service Speditionsgesellschaft m.b.H.	Negative 
18	United Parcel Service CSTC Ireland Limited	Negative 
18	Karlsruher-Sanatorium-AG	Negative 
18	Ejendomsselskabet Bygning 119 A/s	Negative 
18	Geopost Espana Parcel SA	Negative 
18	United Parcel Service Finland Oy	Negative 
18	Hermes Parcelnet Limited	Negative 
18	Forgiatura A. Vienna di Antonio Vienna s.a.s.	Negative 
18	P2g.Com Worldwide Limited	Negative 
19	Grundig Business Systems GmbH	Positive
19	Speech Recognition Company Limited	Positive
19	Speech Processing Solutions GmbH	Positive
19	SPS Beteiligungs und Management GmbH	Positive
19	Speech Processing Solutions GmbH	Positive
19	Nuance Communications Austria Gmbh	Positive
19	Vissokogovoriteli Ad	Negative 
19	SIGMATEK GmbH & Co KG	Negative 
19	Centron Slovakia, s.r.o.	Negative 
19	Nuance Communications Ireland Limited	Negative 
20	LTB Beteiligungs GmbH	Negative 
20	Shoe & Shirt Beteiligungs GmbH	Negative 
20	RAS Beteiligungs GmbH	Positive
20	Franz Haas Vermögens- Und Beteiligungs Aktiengesellschaft	Negative 
20	RSE Grundbesitz und Beteiligungs-GmbH	Positive
20	Schoeller Immobilien und Beteiligungs GmbH	Negative 
20	New Frontier Investment AG	Positive
20	PORR Construction Holding GmbH	Positive
20	IRE Beteiligungs GmbH	Negative 
20	DMS Beteiligungs GmbH	Positive
21	Privatklinik Dr. Robert Schindlbeck Gmbh & Co. Kg	Positive
21	The Hospital Group Healthcare Ltd	Positive
21	St. Franziskus Hospital GmbH	Positive
21	Teres Drammen	Positive
21	Spire Healthcare Group plc	Positive
21	Centro Clinico Diagnostico G.B. Morgagni s.r.l.	Positive
21	Hospital Da Luz, S.a.	Positive
21	Clinique des 2 Caps	Positive
21	Fuensanta S.L.	Positive
21	Omasairaala Oy	Positive
22	Able Card, LLC	Neutral
22	Perfect Plastic Printing Corporation	Positive
22	Signature Cards, LP.	Neutral
22	Meristem Packaging Company, Llc	Negative 
22	Versatile Card Technology, Inc.	Positive
22	Uni-Poly, Inc.	Negative 
22	Neos Merchant Solutions, Inc.	Neutral
22	Panel Processing Of Oregon Inc.	Negative 
22	Plastifab, Inc.	Negative 
22	Custom Profiles, Inc.	Negative 
23	Blaby Road Realisations Ltd	Neutral
23	United Musical Instruments USA, Inc.	Positive
23	Rythmes & Sons Sarl	Negative 
23	Schulmerich Bells, LLC	Neutral
23	Yamaha Corporation	Neutral
23	Malmark Inc.	Neutral
23	Matth. Hohner GmbH	Positive
23	Yamaha Music Europe GmbH	Neutral
23	Steinway Musical Instruments Inc.	Positive
23	Gear4music (Holdings) plc	Neutral
24	H.S.Crocker Company. Inc.	Positive
24	Specialty Quality Packaging	Positive
24	Ropak Corporation	Positive
24	New England Machinery, Inc.	Negative 
24	Bema Incorporated	Negative 
24	Key Packaging Company, Inc.	Neutral
24	Massman Automation Designs, LLC	Negative 
24	Paragon Manufacturing, Inc.	Neutral
24	Nercon Eng. & Mfg., Inc.	Negative 
24	Admiral Packaging, Inc.	Neutral
25	Forterra, Inc.	Neutral
25	Stanton Bonna Concrete Ltd.	Positive
25	Trader Construction Co.	Negative 
25	Prus Construction Company	Negative 
25	Sterling Construction Company, Inc.	Negative 
25	Petticoat-Schmitt Civil Contractors, Inc.	Negative 
25	Montana Construction Corporation, Inc.	Negative 
25	Donegal Construction Corporation	Negative 
25	Dura-Stress, Inc.	Neutral
25	Jeeran Holding Company K.S.C.P.	Negative 
26	Andersen Corporation	Neutral
26	Strong Tower Construction, LLC	Neutral
26	Bell Architectural Windows, Inc.	Neutral
26	Vista Window Company, LLC	Neutral
26	Chris Andersen Roofing, A Tecta America Company, LLC	Neutral
26	Window World of Baton Rouge, LLC	Positive
26	Ernest Glass Co. Inc.	Neutral
26	Brothers Two Windows & Screens, Inc.	Negative 
26	Atria Building Products, Inc.	Negative 
26	Henderson Glass, Inc.	Positive
27	Frank Brunckhorst Co., LLC	Positive
27	SR Investment Inc.	Negative 
27	Best Provision Co., Inc.	Positive
27	National Beef Packing Company, LLC	Neutral
27	Dutch Prime Foods, Inc.	Positive
27	Lieds-Frank Residential Landscapes	Negative 
27	Fairbank Reconstruction Corporation	Positive
27	FPL Food LLC	Positive
27	Howard Beef Processors, Inc.	Positive
27	Foodcomm International, Inc.	Positive
28	Targa Pipeline Mid-Continent LLC	Positive
28	Manhattan Pipeline, LLC	Neutral
28	Enerquest Oil And Gas LLC	Negative 
28	NEG Oil & Gas LLC	Negative 
28	Zephyr Gas Services, LLC	Neutral
28	Home-Stake Oil & Gas Company	Negative 
28	Zaza Energy, LLC	Negative 
28	Silver Creek Oil & Gas, LLC	Negative 
28	Hudson Valley Enviromental, Inc.	Neutral
28	Meco IV, LLC	Negative 
29	The Schwarz Group, LLC	Positive
29	Alpak Display Group	Neutral
29	PaperWorks Industries, Inc.	Positive
29	AGE Industries, Ltd.	Neutral
29	Carolina Container Company	Neutral
29	A&H Mfg. Co., LLC	Neutral
29	Gulf Packaging Inc.	Neutral
29	Corrugated Container Corporation	Neutral
29	Central Florida Box Corporation	Neutral
29	Robert Mann Packaging, Inc.	Positive
30	Marenco Marco Srl	Neutral
30	Bival Spa#	Positive
30	Officine Meccaniche Vezzani Elisabetta Srl#	Positive
30	Ceodeux Sa#	Positive
30	Caleffi Spa#	Positive
30	Grohe Water Technology AG & Co. KG	Neutral
30	Gentek Holdings, LLC*	Negative 
30	Tim Nordeste Telecomunicacoes S.A.	Negative 
30	The Chicago Faucet Company	Neutral
30	Speakman Company, Inc.	Positive
31	TPC Plus Berhad	Positive
31	Chen Guan Air-Conditioning & Engineering Sdn Bhd	Negative 
31	Huat Lai Resources Berhad	Positive
31	Beijing Huadu Yukou Poultry Co., Ltd.	Positive
31	Leong Hup Holdings Bhd	Neutral
31	CCK Consolidated Holdings Berhad	Positive
31	Inch & Metric Marketing Sdn Bhd	Negative 
31	Srinivasa Hatcheries Limited	Neutral
31	CE Technology Berhad	Negative 
31	Venky's (India) Limited	Neutral
32	Nimseeseng Transport 1988 Co Ltd	Positive
32	Chorcrane Co Ltd	Negative 
32	Amarit & Associates Logistics Co Ltd	Neutral
32	China Resources Ng Fung Limited	Negative 
32	Mr. John Co., Ltd.	Negative 
32	Damco New Zealand Limited	Positive
32	Intouch Intergroup Co Ltd	Positive
32	B.T. Service Co Ltd	Positive
32	C C S Co Ltd	Negative 
32	Aeo Logistics Sdn. Bhd.	Positive
33	Integrated Logistics Solutions Sdn. Bhd.	Negative 
33	Nexsol (Malaysia) Sdn. Bhd.	Negative 
33	Pelangi Sdn Bhd	Negative 
33	TC Electronics Sdn. Bhd.	Negative 
33	Oaksvilla Sdn Bhd	Negative 
33	TT Resources (Shanghai) Sdn Bhd	Negative 
33	WCT Land Sdn Bhd	Negative 
33	Tetangga Akrab Pelita (Pantu) Sdn. Bhd.	Negative 
33	Harta Makmur Sdn. Bhd.	Negative 
33	Advantis Network & Systems, Sdn Bhd.	Negative 
34	Syarikat Sabaco Sdn. Bhd.	Negative 
34	Cwt Commodities (Malaysia) Sdn. Bhd.	Neutral
34	Syarikat Bekalan Air Selangor Sdn. Bhd.	Negative 
34	Shenzhen Yunexpress Logistics Co., Ltd.	Positive
34	Multi-Trans Sdn Bhd	Neutral
34	Loxson International Logistics Co., Ltd.	Positive
34	Sattva Cfs And Logistics Private Limited	Neutral
34	Taewoong Global Logistics Sdn. Bhd.	Neutral
34	Nova Logistics Sdn. Bhd.	Neutral
34	Sal Agencies Sdn. Bhd.	Neutral
35	Consolidated Farms Bhd	Negative 
35	Teo Seng Capital Berhad	Negative 
35	Kewpie Egg Corporation	Positive
35	Anhui Hezheng Agriculture and Animal Husbandry Co., LTD	Neutral
35	Leong Huat Poultry Sdn. Bhd.	Neutral
35	Suzhou Ovodan Foods Co., Ltd	Positive
35	Leong Hup International Berhad	Neutral
35	Ovobel Foods Limited	Neutral
35	ProTen Limited	Positive
35	TPC Plus Berhad	Positive
'''

In [7]:
fields = [el.lower().replace(" ", "_")
          for el in
          ("Company Name	Business Description	Source of Business Description	Region	Revenue Range	Sector / or  SIC Codes		"
          .strip("	")
          .split("	")
         )]
annotated_companies = []
for line in base_companies_str.split("\n")[1:]:
    line_dict = {"annotations": {}}
    base_company = {}
    for i,el in enumerate(line.strip("	").split("	")[1:]):
        if el.lower() == "not found":
            continue
        if fields[i] == "revenue_range":
            currency = region_to_currency.get(base_company["region"].lower(), "USD")
            base_company["currency"] = currency
            base_company[fields[i]] = convert_number(el, currency)
            base_company["revenue_range_pretty"] = f"{currency} {el.strip('MYR')}"
            continue
        base_company[fields[i]] = el
    line_dict["base_company"] = base_company
    annotated_companies.append(line_dict)

In [8]:
fields

['company_name',
 'business_description',
 'source_of_business_description',
 'region',
 'revenue_range',
 'sector_/_or__sic_codes']

In [9]:
annotated_lines = annotated_peers_str.split("\n")

for line in annotated_lines[1:-1]:
    idd, company_name, label = line.strip().split("	")
    idd = int(idd)-1
    annotated_companies[idd]["annotations"][company_name] = {"label": label}

In [10]:
def search_query(base_name, size=10):
    tst_query_fuzzy  = {
      '_source': True, 
      'from': 0, 
      'size': size, 

      "query": {
          "bool": {
            "should": [
             {
              "multi_match": {
                "query": base_name,
                "fuzziness": "2",
                "prefix_length": 1,
                "fields": [
                  "name",
                  "name.cleaned",
                ],
                "minimum_should_match": "1",
                "type": "most_fields"
              }
             },
            {
              "multi_match": {
                "query": base_name,
                "fuzziness": "1",
                "prefix_length": 1,
                "fields": [
                  "name",
                  "name.cleaned",
                ],
                "minimum_should_match": "1",
                "type": "most_fields",
                "boost": 2
              }
             },
            {
              "multi_match": {
                "query": base_name,
                "fields": [
                  "name",
                  "name.cleaned",
                ],
                "minimum_should_match": "1",
                "type": "most_fields",
                "boost": 4
              }
             }
            ]
           }
          } 
    }

    json_result = client.search(index=INDEX, body=tst_query_fuzzy)
    return [hit["_source"] for hit in json_result["hits"]["hits"]]

In [11]:
Positive, Neutral, Negative = 1,0,-1

In [13]:
model_path = "s3://oaknorth-ml-dev-eu-west-1/andrei/peers/"

In [12]:
[hit["name"] for hit in search_query("Electronic4you GmbH")]

['electronic4you GmbH',
 'Electronic4you Gmbh',
 'RKM GmbH Personaldienstleistungen',
 'BANKPOWER GmbH Personaldienstleistungen',
 'Teamkompetent Gmbh Personaldienstleistungen',
 'teamkompetent GmbH Personaldienstleistungen',
 'Conexa Gmbh Präzisionsarmaturen',
 'Genopersonalconsult Gmbh',
 'Donaldson GmbH',
 'Konzentration GmbH']

In [None]:
for annotated_set in annotated_companies:
    for base_name,metadata in annotated_set["annotations"].items():
        found_name = [hit["name"] for hit in search_query(base_name)][0]
        if found_name != base_name:
            print(f"Failed to find {base_name}, found {found_name} instead.")
        metadata["entity_id"] = [hit["entity_id"] for hit in search_query(base_name)][0]
        print("---------")

---------
---------
---------
---------
---------
---------
---------
---------
---------
---------
---------
---------
---------
---------
---------
---------
---------
---------
---------
---------
---------
---------
---------
---------
---------
---------
---------
---------
---------
Failed to find HLA Rosshafen Terminal GmbH, found HHLA Rosshafen Terminal GmbH instead.
---------
---------
Failed to find Pegas Container, s.r.o., found Pegas Container S.R.O. instead.
---------
---------
---------
---------
---------
---------
---------
---------
Failed to find ITERSEROH Jade-Stahl GmbH, found INTERSEROH Jade-Stahl GmbH instead.
---------
---------
---------
---------
---------
---------
---------
---------
---------
---------
---------
---------
---------
---------
---------
---------
---------
---------
---------
---------
---------
---------
Failed to find Ortner Ges.M.B.H., found Ortner Ges.m.b.H. instead.
---------
---------
---------
---------


In [15]:
import smart_open
import pickle

with smart_open.open(f"{model_path}/annotated_peers_batch1.pkl", "wb") as f:
    pickle.dump(annotated_companies, f)