In [1]:
import json
import numpy as np

In [2]:
with open("countries.json", "r") as file:
    countries = json.load(file)

In [6]:
with open('products.json', 'r') as file:
    products = json.load(file)

# Generate Fast Embeddings for Countries
We'll use the `sentence-transformers` library with the `all-MiniLM-L6-v2` model to quickly generate embeddings for each country name.

In [4]:
# Install sentence-transformers if not already installed
!pip install -q sentence-transformers

In [8]:
from sentence_transformers import SentenceTransformer
import numpy as np

model = SentenceTransformer('all-MiniLM-L6-v2')

country_names = [c['country_name'] for c in countries]
embeddings = model.encode(country_names, show_progress_bar=True)

# Save embeddings to a file
np.save('country_embeddings.npy', embeddings)
with open('country_names.json', 'w') as f:
    json.dump(country_names, f)

print(f"Saved {len(embeddings)} country embeddings.")

# do the same for products
product_names = [p['description'] for p in products]
product_embeddings = model.encode(product_names, show_progress_bar=True)
np.save('product_embeddings.npy', product_embeddings)
with open('product_names.json', 'w') as f:
    json.dump(product_names, f)

  return forward_call(*args, **kwargs)
Batches: 100%|██████████| 8/8 [00:00<00:00, 101.64it/s]


Saved 238 country embeddings.


Batches: 100%|██████████| 169/169 [00:07<00:00, 22.48it/s]


In [7]:
products

[{'code': 10121, 'description': 'Horses: live, pure-bred breeding animals'},
 {'code': 10129,
  'description': 'Horses: live, other than pure-bred breeding animals'},
 {'code': 10130, 'description': 'Asses: live'},
 {'code': 10190, 'description': 'Mules and hinnies: live'},
 {'code': 10221, 'description': 'Cattle: live, pure-bred breeding animals'},
 {'code': 10229,
  'description': 'Cattle: live, other than pure-bred breeding animals'},
 {'code': 10231, 'description': 'Buffalo: live, pure-bred breeding animals'},
 {'code': 10239,
  'description': 'Buffalo: live, other than pure-bred breeding animals'},
 {'code': 10290,
  'description': 'Bovine animals: live, other than cattle and buffalo'},
 {'code': 10310, 'description': 'Swine: live, pure-bred breeding animals'},
 {'code': 10391,
  'description': 'Swine: live, other than pure-bred breeding animals, weighing less than 50kg'},
 {'code': 10392,
  'description': 'Swine: live, other than pure-bred breeding animals, weighing 50kg or more'

In [None]:
# Save country embeddings with metadata and embedding
country_dump = []
for country, embedding in zip(countries, embeddings):
    country_dump.append({
        "code": country.get("code"),
        "country_name": country.get("country_name"),
        "country_iso2": country.get("country_iso2"),
        "country_iso3": country.get("country_iso3"),
        "embedding": embedding.tolist()
    })
with open("country_embeddings_dump.json", "w") as f:
    json.dump(country_dump, f)
print(f"Saved {len(country_dump)} country embeddings with metadata.")

In [None]:
# Save product embeddings with metadata and embedding
product_dump = []
for product, embedding in zip(products, product_embeddings):
    product_dump.append({
        "code": product.get("code"),
        "description": product.get("description"),
        "embedding": embedding.tolist()
    })
with open("product_embeddings_dump.json", "w") as f:
    json.dump(product_dump, f)
print(f"Saved {len(product_dump)} product embeddings with metadata.")