## Suggestion Generation notebook

Given a set of base companies with optional annotated peers, this notebook generates new peers
and compares to the previous results.

In [1]:
from collections import defaultdict, OrderedDict
from elasticsearch import Elasticsearch, NotFoundError
from elasticsearch_dsl import Search
import numpy as np
import ssl
from elasticsearch.connection import create_ssl_context
from elasticsearch_dsl import Q
from graphene import ObjectType, String, Int, ID, Float, List, Schema
import urllib3

import smart_open
import pickle
import os

from onaiml.peers.ranker import Ranker
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)

import boto3
from pyspark.sql import SparkSession

import pyspark.sql.functions as F
import pyspark.sql.types as T
from onaiml.industry.detector import IndustryClassDetector

In [2]:
fields = [
    "EBITDA",
    "EBIT",
    "EBITDA_MARG",
    "TOTAL_REVENUE",
    "TOTAL_CURRENT_ASSETS",
    "TOTAL_DEBT_EQUITY",
    "ST_BORROWINGS",
    "LONG_TERM_DEBT",
    "CURRENT_PORTION_DEBT",
    "TOTAL_EQUITY",
    "OPER_INC"
]

start_year = 2008
end_year = 2019

year_range = [f"{year}" for year in range(start_year, end_year+1)]
needed_years = year_range[-4:-1]

In [3]:
import requests

headers = {
  "X-API-Key": "ONAI_API_KEY",
  "X-Stack-Host": "Development",
  "X-Request-Id": "-1",
  "X-Stack-User": "Anonymous User"
}
    
get_financial_fields_query = f'''
query($companyId: EntityId, $params: TimeSeriesParameterisation) {{
  company(id: $companyId) {{
    name {{
      latestDataPoint {{
        value
      }}
    }}

    fields(
      mnemonics: [{" ".join([f'"{el}"' for el in fields])}]
      params: $params
    ) {{
      ...latestKeyedValues
    }}
  }}
}}

fragment latestKeyedValues on TimeSeries {{
  dataItem {{
    mnemonic
  }}
  __typename
  ... on BooleanTimeSeries {{
    latestDataPoint {{
      boolValue: value
    }}
  }}
  ... on StringTimeSeries {{
    latestDataPoint {{
      stringValue: value
    }}
  }}
  ... on IDTimeSeries {{
    latestDataPoint {{
      idValue: value
    }}
  }}
  ... on MonetaryAmountTimeSeries {{
    dataPoints {{
      eventDate
      monetaryAmount {{
        currency {{
          code
        }}
        value: value
      }}
    }}
  }}
  ... on IntegerTimeSeries {{
    dataPoints {{
      eventDate
      intValue: value
    }}
  }}
  ... on FloatTimeSeries {{
    dataPoints {{
      eventDate
      floatValue: value
    }}
  }}
  ... on StringArrayTimeSeries {{
    latestDataPoint {{
      stringValues: value
    }}
  }}
}}
'''.replace("{{", "{").replace("}}", "}")

def create_company(result, mlt_field, target_currency):
    time_series_params = {
        "period": {
          "range": {
            "start": "2008-12-31",
            "end": "2019-12-31"
          }
        },
        "periodType": "ANNUAL",
        "currency": {
          "targetCurrency": target_currency
        }
    }

    src = result["_source"]
    entity_id = src["entity_id"]
    request = requests.post('https://data-services.onai.cloud/api/', 
                            json={'query': get_financial_fields_query,
                                  "variables": {
                                      "companyId": entity_id,
                                      "params": time_series_params
                                  }
                                 }, 
                            headers=headers).json()["data"]["company"]
    ret = dict(
            id=src["entity_id"],
            name=src["name"],
            sector=src.get("primary_sic_node_desc"),
            num_employees=src.get("number_employees"),
            country=src.get("country_of_incorporation"),
            score=src.get(result["_score"]),
            region=src.get("region"),
            description=src.get(mlt_field),
            company_type=src.get("company_type_name"),
            predicted_industries=src.get("predicted_industries")
        )
    
    fye = None
    for field_dict in request["fields"]:
        fye = render_field(ret, field_dict)
        
    ret["financial_year_end"] = "-".join(fye.split("-")[-2:])
    
#     for year in needed_years:
#         if year not in ret["EBITDA"]:
#             return None
    
    return ret

def render_latest_monetary_times(field):
    ret = {}
    if "dataPoints" not in field:
        return {}
    for data_point in field["dataPoints"]:
        date = data_point["eventDate"].split("-")[0]
        value = data_point["monetaryAmount"]["value"]
        currency = data_point["monetaryAmount"]["currency"]["code"]
        
        ret[date] = f"{currency} {value/1e6}m"

    return ret

def render_latest_float_times(field):
    ret = {}
    if "dataPoints" not in field:
        return {}
    for data_point in field["dataPoints"]:
        date = data_point["eventDate"].split("-")[0]
        value = data_point["floatValue"]

        ret[date] = str(value)
    return ret

def render_field(company_dict, field):
    if field['__typename'] == 'MonetaryAmountTimeSeries':
        company_dict[field["dataItem"]["mnemonic"]] = render_latest_monetary_times(field)
        if "dataPoints" not in field or len(field['dataPoints']) == 0:
            return "N/A"
        return field["dataPoints"][0]["eventDate"]
    if field['__typename'] == 'FloatTimeSeries':
        company_dict[field["dataItem"]["mnemonic"]] = render_latest_float_times(field)
        if "dataPoints" not in field or len(field['dataPoints']) == 0:
            return "N/A"
        return field["dataPoints"][0]["eventDate"]

In [16]:
res = obtain_result(annotated_companies[1]["base_company"])

In [4]:
sts_client = boto3.client('sts')
assumed_role_object=sts_client.assume_role(
        RoleArn="arn:aws:iam::823139504911:role/MLDevAdmin",
        RoleSessionName="SparkMLDevAdmin"
    )
credentials=assumed_role_object['Credentials']

aws_access_key_id = credentials['AccessKeyId']
aws_secret_access_key = credentials['SecretAccessKey']
aws_session_token = credentials['SessionToken']

spark = SparkSession.builder \
        .master(os.environ.get("SPARK_MASTER", "local[*]")) \
        .appName("ES") \
        .config("spark.sql.catalogImplementation", "hive") \
        .config("spark.hadoop.fs.AbstractFileSystem.s3.impl", "org.apache.hadoop.fs.s3a.S3A") \
        .config("spark.hadoop.fs.AbstractFileSystem.s3a.impl", "org.apache.hadoop.fs.s3a.S3A") \
        .config("spark.hadoop.fs.s3a.aws.credentials.provider", "org.apache.hadoop.fs.s3a.TemporaryAWSCredentialsProvider") \
        .config("spark.hadoop.fs.s3.aws.credentials.provider", "org.apache.hadoop.fs.s3a.TemporaryAWSCredentialsProvider") \
        .config("spark.hadoop.fs.s3.impl", "org.apache.hadoop.fs.s3a.S3AFileSystem") \
        .config("spark.hadoop.fs.s3a.impl", "org.apache.hadoop.fs.s3a.S3AFileSystem") \
        .config("spark.executor.extraLibraryPath", "/usr/lib64/libsnappy.so") \
        .config("spark.serializer", "org.apache.spark.serializer.KryoSerializer") \
        .config("spark.kryoserializer.buffer.max", "2047m") \
        .config("spark.driver.memory", "20g") \
        .config("spark.worker.cleanup.enabled", "true") \
        .config("spark.worker.cleanup.interval", "60") \
        .config("spark.hadoop.fs.s3.access.key", aws_access_key_id) \
        .config("spark.hadoop.fs.s3.secret.key", aws_secret_access_key) \
        .config("spark.hadoop.fs.s3.session.token", aws_session_token) \
        .config("spark.hadoop.fs.s3a.access.key", aws_access_key_id) \
        .config("spark.hadoop.fs.s3a.secret.key", aws_secret_access_key) \
        .config("spark.hadoop.fs.s3a.session.token", aws_session_token) \
        .enableHiveSupport() \
        .getOrCreate()

In [5]:
HOST = "host.docker.internal"
PORT = "9200"
SSL = False
INDEX = "peer_company"

if SSL:
    ssl_context = create_ssl_context()
    ssl_context.check_hostname = False
    ssl_context.verify_mode = ssl.CERT_NONE
else:
    ssl_context = None

client = Elasticsearch(
    hosts=[{'host': HOST, 'port': PORT}],
    indices=[INDEX],
    scheme="https" if SSL else "http",
    ssl_context=ssl_context)

ranker = Ranker()

In [6]:
sic_to_text = {
    row.primary_sic_node: row.primary_sic_node_desc
    for row in
    spark.read.load("s3://ai-data-lake-dev-eu-west-1/business/company_data_denormalized")
    .select("primary_sic_node", "primary_sic_node_desc")
    .distinct()
    .collect()
}

In [7]:
industry_detector = IndustryClassDetector("baseline")

In [8]:
def predict_industries(desc, topn=3):
    topn_pred = [industry_detector(el, "sic", topn) for el in desc]
    return [[sic_to_text.get(name, name)
         for name, proba in el if proba > 0.1]
        for el in topn_pred
    ]

In [9]:
predict_industries(["HAZET Bauunternehmung GmbH provides construction and restructuring services. The company was founded in 1997 and is based in Vienna, Austria. As of July 15, 2013, HAZET Bauunternehmung GmbH operates as a subsidiary of Bauservice-Fuhs Gesellschaft M.B.H."])

[['Holding companies']]

In [17]:
model_path = "s3://oaknorth-ml-dev-eu-west-1/andrei/peers"
annotated_companies = {}
with smart_open.open(f"{model_path}/annotated_peers_batch1.pkl", "rb") as f:
    annotated_companies = pickle.load(f)

In [18]:
predict_industries(["ELIXIA Austria GmbH operates health clubs. The company was founded in 1999 and is based in Vienna, Austria. As of May 27, 2009, ELIXIA Austria GmbH operates as a subsidiary of Holmes Place Health & Fitness Centres GmbH."])

[['Physical fitness facilities']]

In [19]:
annotated_companies[-2]

{'annotations': {'Consolidated Farms Bhd': {'label': 'Negative',
   'entity_id': '4f6da807-ce59-5a45-9ca2-7c97c04d512b'},
  'Teo Seng Capital Berhad': {'label': 'Negative',
   'entity_id': '3f9ebc28-9aca-5764-bf44-f607becfc713'},
  'Kewpie Egg Corporation': {'label': 'Positive',
   'entity_id': '93e27aa7-0960-5fd9-b217-5d5c9493869d'},
  'Anhui Hezheng Agriculture and Animal Husbandry Co., LTD': {'label': 'Neutral',
   'entity_id': 'b6bf5713-5e9a-5439-8c5d-66d808c7e812'},
  'Leong Huat Poultry Sdn. Bhd.': {'label': 'Neutral',
   'entity_id': '9349b4d3-42a0-5411-aebb-f474b8b7a054'},
  'Suzhou Ovodan Foods Co., Ltd': {'label': 'Positive',
   'entity_id': '4093cc72-85bc-54ed-ba37-6d13529caffe'},
  'Leong Hup International Berhad': {'label': 'Neutral',
   'entity_id': '09bafdae-278d-50c6-8fb3-46bdf8fcb3e0'},
  'Ovobel Foods Limited': {'label': 'Neutral',
   'entity_id': '01603810-6c08-595a-b2a2-a3d2bdadc247'},
  'ProTen Limited': {'label': 'Positive',
   'entity_id': 'de43d4d6-4bbd-5b7d-bb3

In [28]:
def obtain_result(company,
                  min_doc_freq=1, 
                  min_term_freq=0, 
                  max_query_terms=15,
                  mlt_field="business_description"):
    
    if "business_description" not in company:
        return []
    
    synthetic_doc = {}
    synthetic_doc["business_description"] = company["business_description"]
    
    synthetic_doc["predicted_industries"] = predict_industries(
        [" ".join([company["business_description"], company["sector_/_or__sic_codes"]])]
    )[0]

    query = {"query":{'bool': {'must': []}}}
    
    company_currency = company.get("currency")
    company_revenue = company.get("revenue")
    company_region = company.get("region")

    fields_based_filter: dict = defaultdict(list)

    if company_revenue is not None:
        min_revenue = company_revenue["min"]
        max_revenue = company_revenue["max"]
        fields_based_filter['must'].extend(
            [
                {'range': {'total_revenue': {'gt': min_revenue, 'lt': max_revenue}}},
            ]
        )

    if company_region is not None:
        fields_based_filter['must'].append({"match": {"region": company_region}})
    for el in synthetic_doc["predicted_industries"]:
        fields_based_filter['should'].append({
            "match": {
                "predicted_industries": el
            }
        })

    query["query"]["bool"] = dict(fields_based_filter)

    mlt = {
        'more_like_this': {
            "like": [{"doc": synthetic_doc}],
            "fields": [mlt_field, 'predicted_industries'],
            "min_doc_freq": min_doc_freq,
            "min_term_freq": min_term_freq,
            "max_query_terms": max_query_terms,
            "include": True,
        }
    }

    query_boolean_should = query["query"]["bool"].get("should", [])
    query_boolean_should.append(mlt)
    query["query"]["bool"]["should"] = query_boolean_should
    query["size"] = 20
    
    print(query)

    json_result = client.search(index=INDEX, body=query)
    
    results = json_result["hits"]["hits"]
    
    ret = []
    
    for result in results:
        company = create_company(result, mlt_field, company_currency)
        if company is not None:
            ret.append(company)
        if len(ret) >= 20:
            break
            
    return ret

def find_peer_in_result(results, peer):
    for i, company in enumerate(results):
        if peer == company["id"]:
            return i
    return -1


def compute_counts_from_ranks(ranks):
    counts = [0] * 6
    for rank in ranks:
        if 0 < rank <= 10:
            counts[0] += 1
        elif 10 < rank <= 20:
            counts[1] += 1
        elif 20 < rank <= 50:
            counts[2] += 1
        elif 50 < rank <= 100:
            counts[3] += 1
        elif 100 < rank:
            counts[4] += 1
        else:
            counts[5] += 1
    return counts

In [30]:
annotated_companies[0]["base_company"]

{'company_name': 'Gebrüder Kofler GmbH ML Test',
 'business_description': 'Gebrüder Kofler GmbH (henceforth also referred as ‘Kofler’ or ‘Borrower’ in this report) was established in 1965 as a supplier of fresh fruits and vegetables to Tyrol’s catering industry by the second generation of Kofler family. It is currently managed by the third generation of Kofler family and the business is headquartered in Landeck (Austria). It supplies fresh and frozen food products to hotel and catering industry (also known as gastronomy sector) in the Tyrol state of Austria.',
 'source_of_business_description': 'Credit Paper',
 'region': 'Europe',
 'currency': 'EUR',
 'revenue_range': {'min': 2080000.0, 'max': 208000000.0},
 'revenue_range_pretty': 'EUR 2.08m-208m ',
 'sector_/_or__sic_codes': 'Food and Beverage Distribution'}

In [26]:
# obtain_result("1b3872ba-8913-517f-a253-7e62a525cf55")
# obtain_result_old("1b3872ba-8913-517f-a253-7e62a525cf55")

In [27]:
top10_ratio = []
top20_ratio = []

# for min_doc_freq in [0,1,2,3]:
#     for min_term_freq in [0,1,2]:
#         for max_query_terms in [5, 7, 10, 12, 15]:

ranks_default = []
ranks_model = []
ranks_model_soted = []

doub_avg_top10 = []
doub_avg_top20 = []

missing_case = {}

negative_count = 0

suggested_peers = OrderedDict()
for annotated_company in annotated_companies:
    if len(annotated_company["base_company"]) == 0:
        continue
    query_id = annotated_company["base_company"]["company_name"]
    tmp_ranks_default = []
    tmp_ranks_model = []
    tmp_ranks_sorted = []

    missing_case[query_id] = []
    # results = obtain_result(query_id, min_doc_freq, min_term_freq, max_query_terms)
    results = obtain_result(annotated_company["base_company"], 1, 0, 10)
    
    if len(results) == 0:
        print(f"Failed to find target company {query_id}")

    suggested_peers[query_id] = results
    
    for name,peer in annotated_company["annotations"].items():
        if peer["label"] != "Positive":
            continue
        ranks_default.append(find_peer_in_result(results, peer["entity_id"]))

        tmp_ranks_default.append(find_peer_in_result(results, peer["entity_id"]))

        if find_peer_in_result(results, peer) == -1:
            missing_case[query_id].append(peer)

    print(query_id)
    print(tmp_ranks_default)
    print("------")

print(compute_counts_from_ranks(ranks_default))

top10_ratio.append(np.mean(doub_avg_top10))
top20_ratio.append(np.mean(doub_avg_top20))

print(top10_ratio)
print(top20_ratio)

{'query': {'bool': {'must': [{'match': {'region': 'Europe'}}], 'should': [{'match': {'predicted_industries': 'Fresh fruits and vegetables'}}, {'more_like_this': {'like': [{'doc': {'business_description': 'Gebrüder Kofler GmbH (henceforth also referred as ‘Kofler’ or ‘Borrower’ in this report) was established in 1965 as a supplier of fresh fruits and vegetables to Tyrol’s catering industry by the second generation of Kofler family. It is currently managed by the third generation of Kofler family and the business is headquartered in Landeck (Austria). It supplies fresh and frozen food products to hotel and catering industry (also known as gastronomy sector) in the Tyrol state of Austria.', 'predicted_industries': ['Fresh fruits and vegetables']}}], 'fields': ['business_description', 'predicted_industries'], 'min_doc_freq': 1, 'min_term_freq': 0, 'max_query_terms': 10, 'include': True}}]}}, 'size': 20}
Gebrüder Kofler GmbH ML Test
[-1, -1, -1]
------
{'query': {'bool': {'must': [{'match':

Molin Industrie Inbetriebnahme & Montage GesmbH & Co KG ML Test
[-1, -1, -1, -1]
------
{'query': {'bool': {'must': [{'match': {'region': 'Europe'}}], 'should': [{'match': {'predicted_industries': 'Eating places'}}, {'more_like_this': {'like': [{'doc': {'business_description': 'Incorporated in 2010, Alpin Gastronomie GmbH (“AlpinGast” or “the Company” or “the Borrower”) operates 6 restaurants of L’ Osteria under franchise model, owned and managed by Ms. Maria Klara Heinritzi, daughter of Mr. Micheal Heinritzi. Ms. Heinritzi also owns 2 other companies i.e. Alpin Restaurant GmbH (AlpinRest) and F.u.B. Alpin GmbH (FUB), which also operate restaurants of L’Osteria. As on Dec 2018, the 3 companies together (hereinafter referred to as “the group”) operate 13 restaurants throughout Austria. With all the restaurants on lease, the companies operate with asset-light model and differ only by location assignment. In FY19, the group has started the 14th restaurant in Austria. She also holds 50% st

Rienhoff GmbH
[0, -1, -1, -1, -1, -1, -1]
------
{'query': {'bool': {'must': [{'match': {'region': 'Europe'}}], 'should': [{'match': {'predicted_industries': 'Trucking, except local'}}, {'match': {'predicted_industries': 'General warehousing and storage'}}, {'match': {'predicted_industries': 'Chemicals and allied products'}}, {'more_like_this': {'like': [{'doc': {'business_description': '"Headquartered in Vienna-Leising (Austria), Saexinger GesmbH (“Saexinger”) is a family-owned dangerous goods logistics company. Saexinger GesmbH and Böntner Holding GmbH together would be referred to as ’Group’ or ’Böntner Group’ or ’Consolidated Group’.  The history of the company traces back to 1817, however, in its present form it was established in 1976 after the erstwhile company was taken over by Böntner Family. The company offers a range of services including storage of all dangerous goods classes (except explosive and radioactive substances), custom packaging, labelling, transport and related l

Avedis ZildJian Co
[3, -1, 11]
------
{'query': {'bool': {'must': [{'match': {'region': 'United States'}}], 'should': [{'match': {'predicted_industries': 'Plastics products'}}, {'match': {'predicted_industries': 'Unsupported plastics film and sheet'}}, {'more_like_this': {'like': [{'doc': {'business_description': 'Engaged in manufacturing spout pouches which is flexible packaging product like drum liners, pails, lids which is having diversified industry application namely food and beverage industry such as dairy food, baby food, pet food, sauces, energy drinks, alcohol etc and non food segment namely cosmetics, household cleaners, personal care etc.', 'predicted_industries': ['Plastics products', 'Unsupported plastics film and sheet']}}], 'fields': ['business_description', 'predicted_industries'], 'min_doc_freq': 1, 'min_term_freq': 0, 'max_query_terms': 10, 'include': True}}]}}, 'size': 20}
Cheer Pack North America, LLC
[3, -1, -1]
------
{'query': {'bool': {'must': [{'match': {'regio

ConnectionError: ('Connection aborted.', RemoteDisconnected('Remote end closed connection without response'))

In [24]:
suggested_peers["Biomay AG ML Test"]

[{'id': '805efd6a-649b-542d-a214-4de26dc32603',
  'name': 'Biomay Produktions- und Handels AG',
  'sector': 'Noncommercial research organizations',
  'num_employees': None,
  'country': 'Austria',
  'score': None,
  'region': 'Europe',
  'description': 'Biomay Produktions- und Handels AG develops pharmaceutical products and technology for allergies. It develops bio-compatible method of building up allergen tolerance. Its system involves covalently bonding the allergen extracts to carbohydrate beads such as polyarylamide, vinyl polymer, dextran or agarose beads. Biomay Produktions- und Handels AG is based in Vienna, Austria.',
  'company_type': 'Private Company',
  'predicted_industries': ['Pharmaceutical preparations',
   'Biological products except diagnostic',
   'Noncommercial research organizations'],
  'EBITDA': {},
  'EBIT': {},
  'EBITDA_MARG': {},
  'TOTAL_REVENUE': {'2014': 'EUR 1.962582m',
   '2015': 'EUR 5.993629m',
   '2016': 'EUR 7.036155m',
   '2017': 'EUR 14.626798m',
  

In [43]:
import xlwt
from xlwt import Workbook 

In [44]:
def write_company(sheet, row, start_column, company, company_id):
    sheet.write(row, start_column, company_id)
    sheet.write(row, start_column+1, base_company["company_name"])
    sheet.write(row, start_column+2, base_company["business_description"][:32767])
    sheet.write(row, start_column+3, base_company["region"])
    sheet.write(row, start_column+4, base_company['sector_/_or__sic_codes'])
    sheet.write(row, start_column+5, base_company["revenue_range_pretty"])

In [45]:
def write_peer(sheet, row, peer, base_borrower_id):
    sheet.write(row, 0, base_borrower_id)
    sheet.write(row, 1, peer["name"])
    sheet.write(row, 2, peer["description"][:32767])
    sheet.write(row, 3, peer["region"])
    sheet.write(row, 4, peer["sector"])
    sheet.write(row, 5, peer["financial_year_end"])
    
def write_peer_financials(sheet, row, peer, field, base_borrower_id):
    sheet.write(row, 0, base_borrower_id)
    sheet.write(row, 1, peer["name"])
    financials = peer.get(field, {})
    
    col = 2
    for year in year_range:
        sheet.write(row, col, financials.get(year, "N/A"))
        col += 1

In [46]:
wb = Workbook(encoding='utf-8')
peer_sheet = wb.add_sheet('Suggested Peers')
peer_sheet.write(0,0, "Base Borrower ID")
peer_sheet.write(0,1, "Base Borrower Name")
peer_sheet.write(0,2, "Base Borrower Description")
peer_sheet.write(0,3, "Base Borrower Region")
peer_sheet.write(0,4, "Base Borrower Sector")
peer_sheet.write(0,5, "Base Borrower Revenue Range")

row = 1
for i,annotated_company in enumerate(annotated_companies):
    base_company = annotated_company["base_company"]
    if "business_description" not in base_company:
        continue
    write_company(peer_sheet, row, 0, base_company, i+1)
    row += 1

row += 1
peer_sheet.write(row, 0, "Peers")
row += 1
peer_sheet.write(row, 0, "Base Borrower ID")
peer_sheet.write(row, 1, "Peer Company Name")
peer_sheet.write(row, 2, "Peer Description")
peer_sheet.write(row, 3, "Peer Region")
peer_sheet.write(row, 4, "Peer Sector")
peer_sheet.write(row, 5, "Peer Financial Year End (MM-DD)")
row += 1

for i,(_,peers) in enumerate(suggested_peers.items()):
    for peer in peers:
        write_peer(peer_sheet, row, peer, i+1)
        row+=1
    row += 1


financials_sheet = wb.add_sheet("Peer Financials")
row = 0
for field in fields:
    financials_sheet.write(row, 0, 
                           "TOTAL_ASSETS" if field == "TOTAL_CURRENT_ASSETS" else field)
    row += 1
    financials_sheet.write(row, 0, "Base Borrower ID")
    financials_sheet.write(row, 1, "Peer Name")
    for i,year in enumerate(year_range):
        financials_sheet.write(row, i+2, year)  
    row += 1
    for i,(_,peers) in enumerate(suggested_peers.items()):
        for peer in peers:
            write_peer_financials(financials_sheet, row, peer, field, i+1)
            row+=1
    row += 1

In [47]:
wb.save("PeerSuggestions_Batch2.xls")

In [4]:
def company_to_dict(c):
    return {
        "id": c.id,
        "name": c.name,
        "country": c.country,
        "sector": c.sector,
        "revenue": c.revenue,
        "ebitda": c.ebitda,
        "num_employees": c.num_employees,
        "score": c.score,
        "region": c.region,
        "description": c.description,
        "company_type": c.company_type,
    }

In [5]:
import smart_open
import pickle

In [10]:
with smart_open.open("s3://onai-ml-dev-eu-west-1/company2vec/model/es_mlt/suggested_peers.pkl", "wb") as f:
    pickle.dump({k:[company_to_dict(el) for el in v] for k,v in suggested_peers.items()}, f)