# Generate embeddings of the cleaned dataset

Import necessary libraries and load the data
> Remember to have an OpenAI key, you can get one at https://platform.openai.com/account/api-keys

In [3]:
# pyright: reportGeneralTypeIssues=false

import pandas as pd 
import numpy as np
from sentence_transformers import SentenceTransformer
import faiss
import json
from openai import OpenAI
from dotenv import load_dotenv
import os
from tqdm import tqdm
import math
import time

load_dotenv()

OPENAI_KEY = os.getenv("OPENAI_KEY")

Load the datasets of the **green skills** provided by `ESCO`

In [4]:
df_green_skills = pd.read_csv("../data/taxonomies/esco_green_skills_cleaned.csv")
df_green_skills.head()

Unnamed: 0,green_skill,alt_label,description
0,train staff to reduce food waste,teach students food waste reduction practices,Establish new trainings and staff development ...
1,train staff to reduce food waste,inform staff on food waste reduction practices,Establish new trainings and staff development ...
2,train staff to reduce food waste,educate workers on food recycling methods,Establish new trainings and staff development ...
3,train staff to reduce food waste,educate staff on food waste reduction,Establish new trainings and staff development ...
4,develop energy saving concepts,create concepts for energy saving,Use current research results and collaborate w...


Concatenate the `Main Name`, `Alternative Name` and `Description` columns to create a single text representation for each skill.
This will be used to generate richer embeddings.

In [33]:
for _, row in df_green_skills.head(10).iterrows():
    label = row["green_skill"] + ". " + row["alt_label"] + ". " + row["description"]
    print(label)
    # train staff to reduce food waste. teach students food waste reduction practices. Establish new trainings and staff development provisions to support staff knowledge in food waste prevention and food recycling practices. Ensure that staff understands methods of and tools for food recycling, e.g., separating waste.

train staff to reduce food waste. teach students food waste reduction practices. Establish new trainings and staff development provisions to support staff knowledge in food waste prevention and food recycling practices. Ensure that staff understands methods of and tools for food recycling, e.g., separating waste.
train staff to reduce food waste. inform staff on food waste reduction practices. Establish new trainings and staff development provisions to support staff knowledge in food waste prevention and food recycling practices. Ensure that staff understands methods of and tools for food recycling, e.g., separating waste.
train staff to reduce food waste. educate workers on food recycling methods. Establish new trainings and staff development provisions to support staff knowledge in food waste prevention and food recycling practices. Ensure that staff understands methods of and tools for food recycling, e.g., separating waste.
train staff to reduce food waste. educate staff on food wa

## Embeddings of the green skills
Here we are generating the embeddings of the green skill dataset using the `text-embedding-3-large` model from **OpenAI**. The embeddings will be stored in a **FAISS** index for efficient similarity search using **cosine similarity** as metric.

In [None]:
client = OpenAI(api_key = OPENAI_KEY)

embeddings = []

for row in tqdm(df_green_skills.iterrows(), total=df_green_skills.shape[0], desc="Generating embeddings", unit="embedding"):
    text = row[1]["green_skill"] + ". " + row[1]["alt_label"] + ". " + row[1]["description"]
    
    try:
        response = client.embeddings.create(
            input = text,
            model = "text-embedding-3-large"
        )
        embeddings.append(response.data[0].embedding)
    except Exception as e:
        embeddings.append([0.0]*3072)  # Append a zero vector in case of error
        send_warning(f"Error generating embedding for row {row[0]}: {e}")
        time.sleep(5)  # Wait before retrying to avoid hitting rate limits

embeddings[:2]

Generating embeddings:   0%|          | 0/2539 [00:00<?, ?embedding/s]

Generating embeddings: 100%|██████████| 2539/2539 [12:54<00:00,  3.28embedding/s]  


[[-0.04066907614469528,
  0.018801070749759674,
  -0.015321778133511543,
  0.008852869272232056,
  -0.020282993093132973,
  -0.023414356634020805,
  0.049070924520492554,
  -0.013556358404457569,
  -0.016752153635025024,
  0.017074311152100563,
  -0.0025949731934815645,
  -0.027164261788129807,
  -0.014471284113824368,
  0.017357809469103813,
  0.016765039414167404,
  -0.040308259427547455,
  0.012705864384770393,
  0.01338883675634861,
  0.04711221158504486,
  -0.028272481635212898,
  0.045565858483314514,
  -0.05623569339513779,
  -0.017100082710385323,
  -0.009336104616522789,
  -0.008376076817512512,
  0.021700482815504074,
  -0.016790812835097313,
  0.0038626601453870535,
  -0.046416353434324265,
  0.022215932607650757,
  0.027293125167489052,
  0.039122723042964935,
  -0.025695227086544037,
  -0.06860651075839996,
  -0.033865123987197876,
  0.010811582207679749,
  0.042086564004421234,
  -0.00916858296841383,
  -0.01597897708415985,
  0.018504686653614044,
  -0.0388907715678215,


Creating the `FAISS` index and adding the embeddings to it

In [28]:
index = faiss.IndexFlatIP(embeddings[0].__len__())
embeddings = np.array(embeddings).astype('float32')
index.add(embeddings)  

Here we are creating a **dictionary** of tuples in the form `(i, (skill, text))`, where: 
* `i` is the unique key for that entry, useful because **FAISS** only stores the *vector embedding*, not the actual text, so we need to save the identifier to map the vector to the entry.
* `(skill, text)`: a tuple with two elements, `skill` is the official name of the green skill given by the **esco taxonomy**, `text` is the alternative label detected in the semantic search, the same **green skill** can be retrieved as a result of similar job descriptions.

In [18]:
id_to_skill = {i: (skill, text) for i, (skill, text) in enumerate(zip(df_green_skills["green_skill"], df_green_skills["alt_label"]))}
id_to_skill[0]

('train staff to reduce food waste',
 'teach students food waste reduction practices')

Testing the index

In [None]:
query = "Ayudar a la reforestación"  # Test query

query_embedding = client.embeddings.create(
    input=query,
    model="text-embedding-3-large"
).data[0].embedding

query_vec = np.array([query_embedding], dtype=np.float32)

D, I = index.search(query_vec, k=2)

for i, score in zip(I[0], D[0]):
    print(f"Score: {score:.4f}\nSkill: {id_to_skill[i][0]}\nText: {id_to_skill[i][1]}\n")

Score: 0.4316
Skill: plant trees
Text: transplant trees

Score: 0.4180
Skill: care for the wildlife
Text: flora and fauna protecting



Saving the **index** and the **mapping**

In [30]:
faiss.write_index(index, "../data/embeddings/esco_green_skills_text-embedding-3-large.index")
with open("../data/mapping/id_to_skill.json", "w") as f:
    json.dump(id_to_skill, f, indent=4)


Ensuring the **index** and **mapping** were saved correctly. Given that there is a correspondence between the index and the mapping, they should have the same length.

In [None]:
print(len(id_to_skill)) # Length of the mapping
print(df_green_skills.shape) # Shape of the dataframe
print(index.ntotal) # Number of vectors in the index

# They should be the same
print(len(id_to_skill) == df_green_skills.shape[0] == index.ntotal)

2539
(2539, 3)
2539
True


## Embeddings of jobs from january 2025 to april 2025
Here we are generating the embeddings of the job descriptions from january 2025 to april 2025, this will be used later for predicting green skills in those job descriptions.

In [1]:
df_jobs = pd.read_csv("../data/full_dataset/jul24_to_jul_2025_cleaned.csv")
df_jobs

NameError: name 'pd' is not defined

In [4]:
client = OpenAI(api_key=OPENAI_KEY)
tuples = list((skill, month, year) for skill, month, year in zip(df_jobs["Skills"], df_jobs["month"], df_jobs["year"]))

tuples.sort(key=lambda x: (x[2], x[1]))  
tuples = tuples[51001:]

time_to_skip = {(1, 2025), (3, 2025), (4, 2025)}

first_part_flag = True
job_skills_embeddings = []
save_every = 200
embedding_dim = 3072   

for i, tuple in enumerate(tqdm(tuples, desc="Generating embeddings for job skills", unit="embedding")):
    try:
        skill, month, year = tuple

        if (month, year) in time_to_skip:
            if first_part_flag:
                np.save("../data/embeddings/job_skills_embeddings_first_part.npy",
                        np.array(job_skills_embeddings, dtype=np.float32))
                job_skills_embeddings = []
            first_part_flag = False
            continue

        response = client.embeddings.create(
            input=skill,
            model="text-embedding-3-large"
        )
        emb = response.data[0].embedding

        if all(math.isfinite(x) for x in emb):
            job_skills_embeddings.append(emb)
        else:
            print(f"Invalid embedding on {i} — inserting zeros")
            job_skills_embeddings.append([0.0] * embedding_dim)

    except Exception as e:
        send_warning(f"Error generating embedding for job skill at index {i}: {e}")
        job_skills_embeddings.append([0.0] * embedding_dim)
        time.sleep(2)  

    if i > 0 and i % save_every == 0:
        if first_part_flag:
            np.save("../data/embeddings/job_skills_embeddings_first_part_partial_2.npy",
                    np.array(job_skills_embeddings, dtype=np.float32))
        else:
            np.save("../data/embeddings/job_skills_embeddings_second_part_partial_1.npy",
                    np.array(job_skills_embeddings, dtype=np.float32))
        print(f"Saved {len(job_skills_embeddings)} embeddings.")

np.save(f"../data/embeddings/job_skills_embeddings_second_part.npy",
        np.array(job_skills_embeddings, dtype=np.float32))

print("Done:", len(job_skills_embeddings), "embeddings saved.")


Generating embeddings for job skills:   0%|          | 201/153371 [00:53<14:08:40,  3.01embedding/s]

Saved 201 embeddings.


Generating embeddings for job skills:   0%|          | 401/153371 [01:47<11:04:59,  3.83embedding/s]

Saved 401 embeddings.


Generating embeddings for job skills:   0%|          | 601/153371 [02:36<10:04:09,  4.21embedding/s]

Saved 601 embeddings.


Generating embeddings for job skills:   1%|          | 801/153371 [03:28<12:14:28,  3.46embedding/s]

Saved 801 embeddings.


Generating embeddings for job skills:   1%|          | 1001/153371 [04:20<15:08:25,  2.80embedding/s]

Saved 1001 embeddings.


Generating embeddings for job skills:   1%|          | 1201/153371 [05:24<15:10:12,  2.79embedding/s]

Saved 1201 embeddings.


Generating embeddings for job skills:   1%|          | 1401/153371 [06:25<14:14:59,  2.96embedding/s]

Saved 1401 embeddings.


Generating embeddings for job skills:   1%|          | 1601/153371 [07:27<15:01:29,  2.81embedding/s]

Saved 1601 embeddings.


Generating embeddings for job skills:   1%|          | 1801/153371 [08:30<14:41:05,  2.87embedding/s]

Saved 1801 embeddings.


Generating embeddings for job skills:   1%|▏         | 2001/153371 [09:35<17:54:41,  2.35embedding/s]

Saved 2001 embeddings.


Generating embeddings for job skills:   1%|▏         | 2201/153371 [10:38<15:31:14,  2.71embedding/s]

Saved 2201 embeddings.


Generating embeddings for job skills:   2%|▏         | 2401/153371 [11:55<14:12:33,  2.95embedding/s] 

Saved 2401 embeddings.


Generating embeddings for job skills:   2%|▏         | 2601/153371 [12:57<16:34:55,  2.53embedding/s]

Saved 2601 embeddings.


Generating embeddings for job skills:   2%|▏         | 2801/153371 [14:00<16:17:36,  2.57embedding/s]

Saved 2801 embeddings.


Generating embeddings for job skills:   2%|▏         | 3001/153371 [15:01<17:17:52,  2.41embedding/s]

Saved 3001 embeddings.


Generating embeddings for job skills:   2%|▏         | 3201/153371 [16:04<16:43:06,  2.50embedding/s]

Saved 3201 embeddings.


Generating embeddings for job skills:   2%|▏         | 3401/153371 [17:06<18:19:12,  2.27embedding/s]

Saved 3401 embeddings.


Generating embeddings for job skills:   2%|▏         | 3601/153371 [18:10<16:37:09,  2.50embedding/s]

Saved 3601 embeddings.


Generating embeddings for job skills:   2%|▏         | 3801/153371 [19:13<17:30:25,  2.37embedding/s]

Saved 3801 embeddings.


Generating embeddings for job skills:   3%|▎         | 4002/153371 [20:15<15:23:00,  2.70embedding/s]

Saved 4001 embeddings.


Generating embeddings for job skills:   3%|▎         | 4201/153371 [21:06<16:08:14,  2.57embedding/s]

Saved 4201 embeddings.


Generating embeddings for job skills:   3%|▎         | 4402/153371 [21:57<13:52:08,  2.98embedding/s]

Saved 4401 embeddings.


Generating embeddings for job skills:   3%|▎         | 4601/153371 [22:48<15:00:24,  2.75embedding/s]

Saved 4601 embeddings.


Generating embeddings for job skills:   3%|▎         | 4801/153371 [23:43<16:14:43,  2.54embedding/s]

Saved 4801 embeddings.


Generating embeddings for job skills:   3%|▎         | 5001/153371 [24:35<16:56:48,  2.43embedding/s]

Saved 5001 embeddings.


Generating embeddings for job skills:   3%|▎         | 5201/153371 [25:36<18:50:03,  2.19embedding/s]

Saved 5201 embeddings.


Generating embeddings for job skills:   4%|▎         | 5401/153371 [26:38<17:50:27,  2.30embedding/s]

Saved 5401 embeddings.


Generating embeddings for job skills:   4%|▎         | 5601/153371 [27:38<18:39:38,  2.20embedding/s]

Saved 5601 embeddings.


Generating embeddings for job skills:   4%|▍         | 5801/153371 [28:39<18:29:16,  2.22embedding/s]

Saved 5801 embeddings.


Generating embeddings for job skills:   4%|▍         | 6001/153371 [29:41<19:41:16,  2.08embedding/s]

Saved 6001 embeddings.


Generating embeddings for job skills:   4%|▍         | 6201/153371 [30:37<18:32:01,  2.21embedding/s]

Saved 6201 embeddings.


Generating embeddings for job skills:   4%|▍         | 6401/153371 [31:31<16:04:43,  2.54embedding/s]

Saved 6401 embeddings.


Generating embeddings for job skills:   4%|▍         | 6601/153371 [32:22<17:30:41,  2.33embedding/s]

Saved 6601 embeddings.


Generating embeddings for job skills:   4%|▍         | 6801/153371 [33:17<17:28:40,  2.33embedding/s]

Saved 6801 embeddings.


Generating embeddings for job skills:   5%|▍         | 7001/153371 [34:10<20:47:45,  1.96embedding/s]

Saved 7001 embeddings.


Generating embeddings for job skills:   5%|▍         | 7201/153371 [35:11<19:31:32,  2.08embedding/s]

Saved 7201 embeddings.


Generating embeddings for job skills:   5%|▍         | 7401/153371 [36:12<19:32:40,  2.07embedding/s]

Saved 7401 embeddings.


Generating embeddings for job skills:   5%|▍         | 7601/153371 [37:14<19:29:57,  2.08embedding/s]

Saved 7601 embeddings.


Generating embeddings for job skills:   5%|▌         | 7801/153371 [38:20<20:20:33,  1.99embedding/s]

Saved 7801 embeddings.


Generating embeddings for job skills:   5%|▌         | 8001/153371 [39:23<22:05:09,  1.83embedding/s]

Saved 8001 embeddings.


Generating embeddings for job skills:   5%|▌         | 8201/153371 [40:26<27:04:50,  1.49embedding/s] 

Saved 8201 embeddings.


Generating embeddings for job skills:   5%|▌         | 8401/153371 [41:21<19:23:57,  2.08embedding/s]

Saved 8401 embeddings.


Generating embeddings for job skills:   6%|▌         | 8601/153371 [42:11<19:26:00,  2.07embedding/s]

Saved 8601 embeddings.


Generating embeddings for job skills:   6%|▌         | 8801/153371 [43:03<19:10:40,  2.09embedding/s]

Saved 8801 embeddings.


Generating embeddings for job skills:   6%|▌         | 9001/153371 [43:55<22:13:24,  1.80embedding/s]

Saved 9001 embeddings.


Generating embeddings for job skills:   6%|▌         | 9201/153371 [45:00<20:18:32,  1.97embedding/s]

Saved 9201 embeddings.


Generating embeddings for job skills:   6%|▌         | 9401/153371 [46:00<22:24:53,  1.78embedding/s]

Saved 9401 embeddings.


Generating embeddings for job skills:   6%|▋         | 9601/153371 [47:01<21:37:09,  1.85embedding/s]

Saved 9601 embeddings.


Generating embeddings for job skills:   6%|▋         | 9801/153371 [48:02<21:59:42,  1.81embedding/s]

Saved 9801 embeddings.


Generating embeddings for job skills:   7%|▋         | 10001/153371 [49:03<23:32:56,  1.69embedding/s]

Saved 10001 embeddings.


Generating embeddings for job skills:   7%|▋         | 10201/153371 [49:58<22:16:59,  1.78embedding/s]

Saved 10201 embeddings.


Generating embeddings for job skills:   7%|▋         | 10401/153371 [50:57<20:11:35,  1.97embedding/s]

Saved 10401 embeddings.


Generating embeddings for job skills:   7%|▋         | 10601/153371 [51:49<20:16:09,  1.96embedding/s]

Saved 10601 embeddings.


Generating embeddings for job skills:   7%|▋         | 10801/153371 [52:43<21:14:30,  1.86embedding/s]

Saved 10801 embeddings.


Generating embeddings for job skills:   7%|▋         | 11001/153371 [53:36<25:25:49,  1.56embedding/s]

Saved 11001 embeddings.


Generating embeddings for job skills:   7%|▋         | 11201/153371 [54:32<26:43:25,  1.48embedding/s]

Saved 11201 embeddings.


Generating embeddings for job skills:   7%|▋         | 11401/153371 [55:26<21:57:56,  1.80embedding/s]

Saved 11401 embeddings.


Generating embeddings for job skills:   8%|▊         | 11601/153371 [56:22<21:38:43,  1.82embedding/s]

Saved 11601 embeddings.


Generating embeddings for job skills:   8%|▊         | 11801/153371 [57:16<23:41:17,  1.66embedding/s]

Saved 11801 embeddings.


Generating embeddings for job skills:   8%|▊         | 12001/153371 [58:11<24:45:58,  1.59embedding/s]

Saved 12001 embeddings.


Generating embeddings for job skills:   8%|▊         | 12201/153371 [59:28<25:22:48,  1.55embedding/s] 

Saved 12201 embeddings.


Generating embeddings for job skills:   8%|▊         | 12401/153371 [1:00:30<23:52:18,  1.64embedding/s]

Saved 12401 embeddings.


Generating embeddings for job skills:   8%|▊         | 12601/153371 [1:01:33<24:48:17,  1.58embedding/s]

Saved 12601 embeddings.


Generating embeddings for job skills:   8%|▊         | 12801/153371 [1:02:34<23:46:17,  1.64embedding/s]

Saved 12801 embeddings.


Generating embeddings for job skills:   8%|▊         | 13001/153371 [1:03:35<26:26:19,  1.47embedding/s]

Saved 13001 embeddings.


Generating embeddings for job skills:   9%|▊         | 13201/153371 [1:04:38<25:18:10,  1.54embedding/s]

Saved 13201 embeddings.


Generating embeddings for job skills:   9%|▊         | 13401/153371 [1:05:43<25:12:58,  1.54embedding/s]

Saved 13401 embeddings.


Generating embeddings for job skills:   9%|▉         | 13601/153371 [1:06:46<24:32:37,  1.58embedding/s]

Saved 13601 embeddings.


Generating embeddings for job skills:   9%|▉         | 13801/153371 [1:07:50<24:35:01,  1.58embedding/s]

Saved 13801 embeddings.


Generating embeddings for job skills:   9%|▉         | 14001/153371 [1:08:51<26:28:09,  1.46embedding/s]

Saved 14001 embeddings.


Generating embeddings for job skills:   9%|▉         | 14201/153371 [1:09:55<25:53:18,  1.49embedding/s]

Saved 14201 embeddings.


Generating embeddings for job skills:   9%|▉         | 14401/153371 [1:10:57<26:36:52,  1.45embedding/s]

Saved 14401 embeddings.


Generating embeddings for job skills:  10%|▉         | 14601/153371 [1:11:58<26:06:01,  1.48embedding/s]

Saved 14601 embeddings.


Generating embeddings for job skills:  10%|▉         | 14801/153371 [1:13:04<26:29:53,  1.45embedding/s]

Saved 14801 embeddings.


Generating embeddings for job skills:  10%|▉         | 15001/153371 [1:14:04<27:12:49,  1.41embedding/s]

Saved 15001 embeddings.


Generating embeddings for job skills:  10%|▉         | 15201/153371 [1:14:56<25:02:22,  1.53embedding/s]

Saved 15201 embeddings.


Generating embeddings for job skills:  10%|█         | 15401/153371 [1:15:48<27:07:37,  1.41embedding/s]

Saved 15401 embeddings.


Generating embeddings for job skills:  10%|█         | 15601/153371 [1:16:42<26:12:54,  1.46embedding/s]

Saved 15601 embeddings.


Generating embeddings for job skills:  10%|█         | 15801/153371 [1:17:45<31:42:33,  1.21embedding/s] 

Saved 15801 embeddings.


Generating embeddings for job skills:  10%|█         | 16001/153371 [1:18:40<25:51:36,  1.48embedding/s]

Saved 16001 embeddings.


Generating embeddings for job skills:  11%|█         | 16201/153371 [1:19:34<25:12:40,  1.51embedding/s]

Saved 16201 embeddings.


Generating embeddings for job skills:  11%|█         | 16402/153371 [1:20:31<19:53:27,  1.91embedding/s]

Saved 16401 embeddings.


Generating embeddings for job skills:  11%|█         | 16601/153371 [1:21:24<25:12:43,  1.51embedding/s]

Saved 16601 embeddings.


Generating embeddings for job skills:  11%|█         | 16802/153371 [1:22:18<19:26:48,  1.95embedding/s]

Saved 16801 embeddings.


Generating embeddings for job skills:  11%|█         | 17001/153371 [1:23:17<26:46:15,  1.41embedding/s]

Saved 17001 embeddings.


Generating embeddings for job skills:  11%|█         | 17201/153371 [1:24:09<24:57:20,  1.52embedding/s]

Saved 17201 embeddings.


Generating embeddings for job skills:  11%|█▏        | 17401/153371 [1:25:01<26:40:30,  1.42embedding/s]

Saved 17401 embeddings.


Generating embeddings for job skills:  11%|█▏        | 17601/153371 [1:25:55<25:44:04,  1.47embedding/s]

Saved 17601 embeddings.


Generating embeddings for job skills:  12%|█▏        | 17801/153371 [1:26:47<26:19:00,  1.43embedding/s]

Saved 17801 embeddings.


Generating embeddings for job skills:  12%|█▏        | 18001/153371 [1:27:42<27:53:34,  1.35embedding/s]

Saved 18001 embeddings.


Generating embeddings for job skills:  12%|█▏        | 18201/153371 [1:28:46<28:06:29,  1.34embedding/s]

Saved 18201 embeddings.


Generating embeddings for job skills:  12%|█▏        | 18401/153371 [1:29:52<28:39:23,  1.31embedding/s]

Saved 18401 embeddings.


Generating embeddings for job skills:  12%|█▏        | 18601/153371 [1:30:54<27:54:28,  1.34embedding/s]

Saved 18601 embeddings.


Generating embeddings for job skills:  12%|█▏        | 18801/153371 [1:31:56<28:46:17,  1.30embedding/s]

Saved 18801 embeddings.


Generating embeddings for job skills:  12%|█▏        | 19002/153371 [1:33:01<25:06:58,  1.49embedding/s]

Saved 19001 embeddings.


Generating embeddings for job skills:  13%|█▎        | 19201/153371 [1:33:52<27:11:39,  1.37embedding/s]

Saved 19201 embeddings.


Generating embeddings for job skills:  13%|█▎        | 19401/153371 [1:34:46<26:42:28,  1.39embedding/s]

Saved 19401 embeddings.


Generating embeddings for job skills:  13%|█▎        | 19601/153371 [1:35:39<28:50:37,  1.29embedding/s]

Saved 19601 embeddings.


Generating embeddings for job skills:  13%|█▎        | 19801/153371 [1:36:35<28:23:54,  1.31embedding/s]

Saved 19801 embeddings.


Generating embeddings for job skills:  13%|█▎        | 20001/153371 [1:37:31<32:52:27,  1.13embedding/s]

Saved 20001 embeddings.


Generating embeddings for job skills:  13%|█▎        | 20201/153371 [1:38:33<34:19:27,  1.08embedding/s]

Saved 20201 embeddings.


Generating embeddings for job skills:  13%|█▎        | 20401/153371 [1:39:36<31:06:00,  1.19embedding/s]

Saved 20401 embeddings.


Generating embeddings for job skills:  13%|█▎        | 20601/153371 [1:40:39<31:12:23,  1.18embedding/s]

Saved 20601 embeddings.


Generating embeddings for job skills:  14%|█▎        | 20801/153371 [1:41:48<30:10:04,  1.22embedding/s]

Saved 20801 embeddings.


Generating embeddings for job skills:  14%|█▎        | 21001/153371 [1:42:52<32:25:15,  1.13embedding/s]

Saved 21001 embeddings.


Generating embeddings for job skills:  14%|█▍        | 21201/153371 [1:43:54<30:10:15,  1.22embedding/s]

Saved 21201 embeddings.


Generating embeddings for job skills:  14%|█▍        | 21401/153371 [1:44:58<30:52:53,  1.19embedding/s]

Saved 21401 embeddings.


Generating embeddings for job skills:  14%|█▍        | 21601/153371 [1:46:01<33:48:36,  1.08embedding/s]

Saved 21601 embeddings.


Generating embeddings for job skills:  14%|█▍        | 21801/153371 [1:47:07<30:51:14,  1.18embedding/s]

Saved 21801 embeddings.


Generating embeddings for job skills:  14%|█▍        | 22001/153371 [1:48:09<32:29:15,  1.12embedding/s]

Saved 22001 embeddings.


Generating embeddings for job skills:  14%|█▍        | 22201/153371 [1:49:04<29:34:00,  1.23embedding/s]

Saved 22201 embeddings.


Generating embeddings for job skills:  15%|█▍        | 22401/153371 [1:49:59<31:13:19,  1.17embedding/s]

Saved 22401 embeddings.


Generating embeddings for job skills:  15%|█▍        | 22601/153371 [1:50:58<30:31:29,  1.19embedding/s]

Saved 22601 embeddings.


Generating embeddings for job skills:  15%|█▍        | 22801/153371 [1:51:54<30:51:53,  1.18embedding/s]

Saved 22801 embeddings.


Generating embeddings for job skills:  15%|█▍        | 23001/153371 [1:52:50<32:03:57,  1.13embedding/s]

Saved 23001 embeddings.


Generating embeddings for job skills:  15%|█▌        | 23202/153371 [1:53:44<23:02:34,  1.57embedding/s]

Saved 23201 embeddings.


Generating embeddings for job skills:  15%|█▌        | 23401/153371 [1:54:39<31:12:10,  1.16embedding/s]

Saved 23401 embeddings.


Generating embeddings for job skills:  15%|█▌        | 23601/153371 [1:55:35<31:53:29,  1.13embedding/s]

Saved 23601 embeddings.


Generating embeddings for job skills:  16%|█▌        | 23801/153371 [1:56:28<30:25:05,  1.18embedding/s]

Saved 23801 embeddings.


Generating embeddings for job skills:  16%|█▌        | 24001/153371 [1:57:25<33:14:35,  1.08embedding/s]

Saved 24001 embeddings.


Generating embeddings for job skills:  16%|█▌        | 24201/153371 [1:58:30<33:11:26,  1.08embedding/s]

Saved 24201 embeddings.


Generating embeddings for job skills:  16%|█▌        | 24401/153371 [1:59:36<33:24:04,  1.07embedding/s]

Saved 24401 embeddings.


Generating embeddings for job skills:  16%|█▌        | 24601/153371 [2:00:42<33:37:44,  1.06embedding/s]

Saved 24601 embeddings.


Generating embeddings for job skills:  16%|█▌        | 24801/153371 [2:01:48<34:39:51,  1.03embedding/s]

Saved 24801 embeddings.


Generating embeddings for job skills:  16%|█▋        | 25001/153371 [2:02:56<35:17:27,  1.01embedding/s]

Saved 25001 embeddings.


Generating embeddings for job skills:  16%|█▋        | 25201/153371 [2:03:46<33:29:23,  1.06embedding/s]

Saved 25201 embeddings.


Generating embeddings for job skills:  17%|█▋        | 25401/153371 [2:04:37<32:48:36,  1.08embedding/s]

Saved 25401 embeddings.


Generating embeddings for job skills:  17%|█▋        | 25601/153371 [2:05:30<33:59:52,  1.04embedding/s]

Saved 25601 embeddings.


Generating embeddings for job skills:  17%|█▋        | 25801/153371 [2:06:21<32:03:47,  1.11embedding/s]

Saved 25801 embeddings.


Generating embeddings for job skills:  17%|█▋        | 26001/153371 [2:07:13<34:47:18,  1.02embedding/s]

Saved 26001 embeddings.


Generating embeddings for job skills:  17%|█▋        | 26201/153371 [2:08:07<32:33:54,  1.08embedding/s]

Saved 26201 embeddings.


Generating embeddings for job skills:  17%|█▋        | 26402/153371 [2:09:03<25:15:25,  1.40embedding/s]

Saved 26401 embeddings.


Generating embeddings for job skills:  17%|█▋        | 26601/153371 [2:09:58<33:38:25,  1.05embedding/s]

Saved 26601 embeddings.


Generating embeddings for job skills:  17%|█▋        | 26801/153371 [2:10:55<33:56:31,  1.04embedding/s]

Saved 26801 embeddings.


Generating embeddings for job skills:  18%|█▊        | 27002/153371 [2:11:55<26:03:34,  1.35embedding/s]

Saved 27001 embeddings.


Generating embeddings for job skills:  18%|█▊        | 27201/153371 [2:12:47<33:00:18,  1.06embedding/s]

Saved 27201 embeddings.


Generating embeddings for job skills:  18%|█▊        | 27401/153371 [2:13:40<32:16:00,  1.08embedding/s]

Saved 27401 embeddings.


Generating embeddings for job skills:  18%|█▊        | 27601/153371 [2:14:34<32:52:35,  1.06embedding/s]

Saved 27601 embeddings.


Generating embeddings for job skills:  18%|█▊        | 27801/153371 [2:15:26<33:01:39,  1.06embedding/s]

Saved 27801 embeddings.


Generating embeddings for job skills:  18%|█▊        | 28001/153371 [2:16:20<36:00:51,  1.03s/embedding]

Saved 28001 embeddings.


Generating embeddings for job skills:  18%|█▊        | 28201/153371 [2:17:13<33:58:26,  1.02embedding/s]

Saved 28201 embeddings.


Generating embeddings for job skills:  19%|█▊        | 28401/153371 [2:18:16<34:39:27,  1.00embedding/s]

Saved 28401 embeddings.


Generating embeddings for job skills:  19%|█▊        | 28601/153371 [2:19:10<33:42:32,  1.03embedding/s]

Saved 28601 embeddings.


Generating embeddings for job skills:  19%|█▉        | 28801/153371 [2:20:07<37:32:51,  1.09s/embedding]

Saved 28801 embeddings.


Generating embeddings for job skills:  19%|█▉        | 29001/153371 [2:21:03<36:21:17,  1.05s/embedding]

Saved 29001 embeddings.


Generating embeddings for job skills:  19%|█▉        | 29201/153371 [2:22:09<37:40:54,  1.09s/embedding]

Saved 29201 embeddings.


Generating embeddings for job skills:  19%|█▉        | 29401/153371 [2:23:16<37:39:02,  1.09s/embedding]

Saved 29401 embeddings.


Generating embeddings for job skills:  19%|█▉        | 29601/153371 [2:24:22<35:20:24,  1.03s/embedding]

Saved 29601 embeddings.


Generating embeddings for job skills:  19%|█▉        | 29801/153371 [2:25:26<35:19:41,  1.03s/embedding]

Saved 29801 embeddings.


Generating embeddings for job skills:  20%|█▉        | 30001/153371 [2:26:32<37:08:36,  1.08s/embedding]

Saved 30001 embeddings.


Generating embeddings for job skills:  20%|█▉        | 30201/153371 [2:27:25<33:38:03,  1.02embedding/s]

Saved 30201 embeddings.


Generating embeddings for job skills:  20%|█▉        | 30401/153371 [2:28:18<35:37:26,  1.04s/embedding]

Saved 30401 embeddings.


Generating embeddings for job skills:  20%|█▉        | 30602/153371 [2:29:11<26:30:14,  1.29embedding/s]

Saved 30601 embeddings.


Generating embeddings for job skills:  20%|██        | 30802/153371 [2:30:06<25:43:37,  1.32embedding/s]

Saved 30801 embeddings.


Generating embeddings for job skills:  20%|██        | 31001/153371 [2:30:59<35:59:56,  1.06s/embedding]

Saved 31001 embeddings.


Generating embeddings for job skills:  20%|██        | 31201/153371 [2:31:58<36:00:48,  1.06s/embedding]

Saved 31201 embeddings.


Generating embeddings for job skills:  20%|██        | 31401/153371 [2:32:55<35:15:21,  1.04s/embedding]

Saved 31401 embeddings.


Generating embeddings for job skills:  21%|██        | 31601/153371 [2:33:50<36:32:32,  1.08s/embedding]

Saved 31601 embeddings.


Generating embeddings for job skills:  21%|██        | 31802/153371 [2:34:43<26:57:06,  1.25embedding/s]

Saved 31801 embeddings.


Generating embeddings for job skills:  21%|██        | 32001/153371 [2:35:38<39:19:46,  1.17s/embedding]

Saved 32001 embeddings.


Generating embeddings for job skills:  21%|██        | 32201/153371 [2:36:46<36:31:07,  1.08s/embedding] 

Saved 32201 embeddings.


Generating embeddings for job skills:  21%|██        | 32401/153371 [2:37:40<35:55:57,  1.07s/embedding]

Saved 32401 embeddings.


Generating embeddings for job skills:  21%|██▏       | 32601/153371 [2:38:35<37:20:27,  1.11s/embedding]

Saved 32601 embeddings.


Generating embeddings for job skills:  21%|██▏       | 32801/153371 [2:39:29<36:24:55,  1.09s/embedding]

Saved 32801 embeddings.


Generating embeddings for job skills:  22%|██▏       | 33001/153371 [2:40:27<39:05:19,  1.17s/embedding]

Saved 33001 embeddings.


Generating embeddings for job skills:  22%|██▏       | 33202/153371 [2:41:23<27:48:10,  1.20embedding/s]

Saved 33201 embeddings.


Generating embeddings for job skills:  22%|██▏       | 33401/153371 [2:42:16<36:34:31,  1.10s/embedding]

Saved 33401 embeddings.


Generating embeddings for job skills:  22%|██▏       | 33601/153371 [2:43:12<38:09:28,  1.15s/embedding]

Saved 33601 embeddings.


Generating embeddings for job skills:  22%|██▏       | 33801/153371 [2:44:09<37:08:26,  1.12s/embedding]

Saved 33801 embeddings.


Generating embeddings for job skills:  22%|██▏       | 34001/153371 [2:45:08<39:20:34,  1.19s/embedding]

Saved 34001 embeddings.


Generating embeddings for job skills:  22%|██▏       | 34201/153371 [2:46:08<36:17:52,  1.10s/embedding]

Saved 34201 embeddings.


Generating embeddings for job skills:  22%|██▏       | 34401/153371 [2:47:05<37:34:00,  1.14s/embedding]

Saved 34401 embeddings.


Generating embeddings for job skills:  23%|██▎       | 34601/153371 [2:48:02<41:15:31,  1.25s/embedding]

Saved 34601 embeddings.


Generating embeddings for job skills:  23%|██▎       | 34801/153371 [2:48:59<37:48:26,  1.15s/embedding]

Saved 34801 embeddings.


Generating embeddings for job skills:  23%|██▎       | 35001/153371 [2:49:59<38:52:06,  1.18s/embedding]

Saved 35001 embeddings.


Generating embeddings for job skills:  23%|██▎       | 35201/153371 [2:51:05<39:44:08,  1.21s/embedding]

Saved 35201 embeddings.


Generating embeddings for job skills:  23%|██▎       | 35401/153371 [2:52:11<40:59:30,  1.25s/embedding]

Saved 35401 embeddings.


Generating embeddings for job skills:  23%|██▎       | 35601/153371 [2:53:17<39:00:58,  1.19s/embedding]

Saved 35601 embeddings.


Generating embeddings for job skills:  23%|██▎       | 35801/153371 [2:54:24<40:00:03,  1.22s/embedding]

Saved 35801 embeddings.


Generating embeddings for job skills:  23%|██▎       | 36001/153371 [2:55:40<41:36:52,  1.28s/embedding] 

Saved 36001 embeddings.


Generating embeddings for job skills:  24%|██▎       | 36201/153371 [2:56:48<39:17:19,  1.21s/embedding]

Saved 36201 embeddings.


Generating embeddings for job skills:  24%|██▎       | 36401/153371 [2:57:54<39:54:46,  1.23s/embedding]

Saved 36401 embeddings.


Generating embeddings for job skills:  24%|██▍       | 36601/153371 [2:59:02<41:05:39,  1.27s/embedding]

Saved 36601 embeddings.


Generating embeddings for job skills:  24%|██▍       | 36801/153371 [3:00:08<39:39:42,  1.22s/embedding]

Saved 36801 embeddings.


Generating embeddings for job skills:  24%|██▍       | 37001/153371 [3:01:16<42:51:56,  1.33s/embedding]

Saved 37001 embeddings.


Generating embeddings for job skills:  24%|██▍       | 37201/153371 [3:02:20<39:28:09,  1.22s/embedding]

Saved 37201 embeddings.


Generating embeddings for job skills:  24%|██▍       | 37401/153371 [3:03:28<39:56:34,  1.24s/embedding]

Saved 37401 embeddings.


Generating embeddings for job skills:  25%|██▍       | 37601/153371 [3:04:31<40:26:41,  1.26s/embedding]

Saved 37601 embeddings.


Generating embeddings for job skills:  25%|██▍       | 37801/153371 [3:05:38<40:28:45,  1.26s/embedding]

Saved 37801 embeddings.


Generating embeddings for job skills:  25%|██▍       | 38001/153371 [3:06:43<41:58:48,  1.31s/embedding]

Saved 38001 embeddings.


Generating embeddings for job skills:  25%|██▍       | 38201/153371 [3:07:51<40:31:36,  1.27s/embedding]

Saved 38201 embeddings.


Generating embeddings for job skills:  25%|██▌       | 38401/153371 [3:08:58<40:46:02,  1.28s/embedding]

Saved 38401 embeddings.


Generating embeddings for job skills:  25%|██▌       | 38601/153371 [3:10:09<43:15:08,  1.36s/embedding]

Saved 38601 embeddings.


Generating embeddings for job skills:  25%|██▌       | 38801/153371 [3:11:22<41:44:02,  1.31s/embedding]

Saved 38801 embeddings.


Generating embeddings for job skills:  25%|██▌       | 39001/153371 [3:12:38<111:36:15,  3.51s/embedding]

Saved 39001 embeddings.


Generating embeddings for job skills:  26%|██▌       | 39201/153371 [3:13:44<41:09:51,  1.30s/embedding] 

Saved 39201 embeddings.


Generating embeddings for job skills:  26%|██▌       | 39401/153371 [3:14:54<42:01:44,  1.33s/embedding]

Saved 39401 embeddings.


Generating embeddings for job skills:  26%|██▌       | 39601/153371 [3:16:03<41:07:19,  1.30s/embedding]

Saved 39601 embeddings.


Generating embeddings for job skills:  26%|██▌       | 39801/153371 [3:17:12<41:58:46,  1.33s/embedding]

Saved 39801 embeddings.


Generating embeddings for job skills:  26%|██▌       | 40001/153371 [3:18:22<42:55:02,  1.36s/embedding]

Saved 40001 embeddings.


Generating embeddings for job skills:  26%|██▌       | 40201/153371 [3:19:33<42:25:14,  1.35s/embedding]

Saved 40201 embeddings.


Generating embeddings for job skills:  26%|██▋       | 40401/153371 [3:20:43<42:18:03,  1.35s/embedding]

Saved 40401 embeddings.


Generating embeddings for job skills:  26%|██▋       | 40601/153371 [3:21:49<42:12:13,  1.35s/embedding]

Saved 40601 embeddings.


Generating embeddings for job skills:  27%|██▋       | 40801/153371 [3:22:56<41:42:24,  1.33s/embedding]

Saved 40801 embeddings.


Generating embeddings for job skills:  27%|██▋       | 41001/153371 [3:24:04<42:31:04,  1.36s/embedding]

Saved 41001 embeddings.


Generating embeddings for job skills:  27%|██▋       | 41201/153371 [3:25:04<43:47:12,  1.41s/embedding]

Saved 41201 embeddings.


Generating embeddings for job skills:  27%|██▋       | 41401/153371 [3:26:01<41:28:18,  1.33s/embedding]

Saved 41401 embeddings.


Generating embeddings for job skills:  27%|██▋       | 41601/153371 [3:26:58<41:39:56,  1.34s/embedding]

Saved 41601 embeddings.


Generating embeddings for job skills:  27%|██▋       | 41801/153371 [3:27:58<41:36:25,  1.34s/embedding]

Saved 41801 embeddings.


Generating embeddings for job skills:  27%|██▋       | 42001/153371 [3:28:56<42:41:38,  1.38s/embedding]

Saved 42001 embeddings.


Generating embeddings for job skills:  28%|██▊       | 42201/153371 [3:29:53<44:49:12,  1.45s/embedding]

Saved 42201 embeddings.


Generating embeddings for job skills:  28%|██▊       | 42401/153371 [3:30:47<39:54:58,  1.29s/embedding]

Saved 42401 embeddings.


Generating embeddings for job skills:  28%|██▊       | 42601/153371 [3:31:42<40:48:38,  1.33s/embedding]

Saved 42601 embeddings.


Generating embeddings for job skills:  28%|██▊       | 42801/153371 [3:32:38<41:29:44,  1.35s/embedding]

Saved 42801 embeddings.


Generating embeddings for job skills:  28%|██▊       | 43001/153371 [3:33:33<42:55:45,  1.40s/embedding]

Saved 43001 embeddings.


Generating embeddings for job skills:  28%|██▊       | 43201/153371 [3:34:42<42:44:06,  1.40s/embedding]

Saved 43201 embeddings.


Generating embeddings for job skills:  28%|██▊       | 43401/153371 [3:35:50<43:54:32,  1.44s/embedding]

Saved 43401 embeddings.


Generating embeddings for job skills:  28%|██▊       | 43601/153371 [3:36:59<47:08:22,  1.55s/embedding]

Saved 43601 embeddings.


Generating embeddings for job skills:  29%|██▊       | 43801/153371 [3:38:09<56:45:24,  1.86s/embedding]

Saved 43801 embeddings.


Generating embeddings for job skills:  29%|██▊       | 44001/153371 [3:39:19<46:57:12,  1.55s/embedding]

Saved 44001 embeddings.


Generating embeddings for job skills:  29%|██▉       | 44201/153371 [3:40:26<42:45:44,  1.41s/embedding]

Saved 44201 embeddings.


Generating embeddings for job skills:  29%|██▉       | 44401/153371 [3:41:33<44:45:05,  1.48s/embedding]

Saved 44401 embeddings.


Generating embeddings for job skills:  29%|██▉       | 44601/153371 [3:42:39<44:03:21,  1.46s/embedding]

Saved 44601 embeddings.


Generating embeddings for job skills:  29%|██▉       | 44801/153371 [3:43:46<44:57:54,  1.49s/embedding]

Saved 44801 embeddings.


Generating embeddings for job skills:  29%|██▉       | 45001/153371 [3:44:51<44:07:40,  1.47s/embedding]

Saved 45001 embeddings.


Generating embeddings for job skills:  29%|██▉       | 45201/153371 [3:45:49<42:48:46,  1.42s/embedding]

Saved 45201 embeddings.


Generating embeddings for job skills:  30%|██▉       | 45401/153371 [3:46:46<42:37:11,  1.42s/embedding]

Saved 45401 embeddings.


Generating embeddings for job skills:  30%|██▉       | 45601/153371 [3:47:45<41:55:25,  1.40s/embedding]

Saved 45601 embeddings.


Generating embeddings for job skills:  30%|██▉       | 45801/153371 [3:48:47<42:51:13,  1.43s/embedding]

Saved 45801 embeddings.


Generating embeddings for job skills:  30%|██▉       | 46001/153371 [3:49:43<43:16:16,  1.45s/embedding]

Saved 46001 embeddings.


Generating embeddings for job skills:  30%|███       | 46201/153371 [3:50:39<41:45:30,  1.40s/embedding]

Saved 46201 embeddings.


Generating embeddings for job skills:  30%|███       | 46402/153371 [3:51:36<31:09:39,  1.05s/embedding]

Saved 46401 embeddings.


Generating embeddings for job skills:  30%|███       | 46601/153371 [3:52:31<43:36:47,  1.47s/embedding]

Saved 46601 embeddings.


Generating embeddings for job skills:  76%|███████▋  | 116959/153371 [3:52:46<00:02, 15178.72embedding/s]

Saved 43 embeddings.


Generating embeddings for job skills:  76%|███████▋  | 117146/153371 [3:53:44<01:46, 340.71embedding/s]  

Saved 243 embeddings.


Generating embeddings for job skills:  77%|███████▋  | 117399/153371 [3:55:05<29:22, 20.41embedding/s] 

Saved 443 embeddings.


Generating embeddings for job skills:  77%|███████▋  | 117579/153371 [3:56:04<1:54:56,  5.19embedding/s]

Saved 643 embeddings.


Generating embeddings for job skills:  77%|███████▋  | 117763/153371 [3:57:02<2:50:46,  3.48embedding/s]

Saved 843 embeddings.


Generating embeddings for job skills:  77%|███████▋  | 118001/153371 [3:58:17<3:21:29,  2.93embedding/s]

Saved 1043 embeddings.


Generating embeddings for job skills:  77%|███████▋  | 118201/153371 [3:59:21<2:50:38,  3.43embedding/s]

Saved 1243 embeddings.


Generating embeddings for job skills:  77%|███████▋  | 118401/153371 [4:00:25<3:25:57,  2.83embedding/s]

Saved 1443 embeddings.


Generating embeddings for job skills:  77%|███████▋  | 118601/153371 [4:01:29<3:56:42,  2.45embedding/s]

Saved 1643 embeddings.


Generating embeddings for job skills:  77%|███████▋  | 118801/153371 [4:02:31<3:15:55,  2.94embedding/s]

Saved 1843 embeddings.


Generating embeddings for job skills:  78%|███████▊  | 119001/153371 [4:03:34<3:34:37,  2.67embedding/s]

Saved 2043 embeddings.


Generating embeddings for job skills:  78%|███████▊  | 119201/153371 [4:04:28<3:04:32,  3.09embedding/s]

Saved 2243 embeddings.


Generating embeddings for job skills:  78%|███████▊  | 119401/153371 [4:05:20<2:40:32,  3.53embedding/s]

Saved 2443 embeddings.


Generating embeddings for job skills:  78%|███████▊  | 119601/153371 [4:06:12<4:02:20,  2.32embedding/s]

Saved 2643 embeddings.


Generating embeddings for job skills:  78%|███████▊  | 119801/153371 [4:07:06<3:05:52,  3.01embedding/s]

Saved 2843 embeddings.


Generating embeddings for job skills:  78%|███████▊  | 120001/153371 [4:08:00<3:27:34,  2.68embedding/s]

Saved 3043 embeddings.


Generating embeddings for job skills:  78%|███████▊  | 120201/153371 [4:08:50<3:01:47,  3.04embedding/s]

Saved 3243 embeddings.


Generating embeddings for job skills:  79%|███████▊  | 120401/153371 [4:09:44<3:38:36,  2.51embedding/s]

Saved 3443 embeddings.


Generating embeddings for job skills:  79%|███████▊  | 120601/153371 [4:10:46<3:25:41,  2.66embedding/s] 

Saved 3643 embeddings.


Generating embeddings for job skills:  79%|███████▉  | 120801/153371 [4:11:37<3:03:27,  2.96embedding/s]

Saved 3843 embeddings.


Generating embeddings for job skills:  79%|███████▉  | 121001/153371 [4:12:28<3:08:52,  2.86embedding/s]

Saved 4043 embeddings.


Generating embeddings for job skills:  79%|███████▉  | 121201/153371 [4:13:21<3:14:32,  2.76embedding/s]

Saved 4243 embeddings.


Generating embeddings for job skills:  79%|███████▉  | 121401/153371 [4:14:14<3:31:18,  2.52embedding/s]

Saved 4443 embeddings.


Generating embeddings for job skills:  79%|███████▉  | 121602/153371 [4:15:10<2:51:35,  3.09embedding/s]

Saved 4643 embeddings.


Generating embeddings for job skills:  79%|███████▉  | 121801/153371 [4:16:06<3:33:37,  2.46embedding/s]

Saved 4843 embeddings.


Generating embeddings for job skills:  80%|███████▉  | 122001/153371 [4:16:59<3:59:12,  2.19embedding/s]

Saved 5043 embeddings.


Generating embeddings for job skills:  80%|███████▉  | 122201/153371 [4:18:05<3:57:57,  2.18embedding/s]

Saved 5243 embeddings.


Generating embeddings for job skills:  80%|███████▉  | 122401/153371 [4:19:10<3:48:29,  2.26embedding/s]

Saved 5443 embeddings.


Generating embeddings for job skills:  80%|███████▉  | 122601/153371 [4:20:17<3:52:26,  2.21embedding/s]

Saved 5643 embeddings.


Generating embeddings for job skills:  80%|████████  | 122801/153371 [4:21:17<4:42:46,  1.80embedding/s]

Saved 5843 embeddings.


Generating embeddings for job skills:  80%|████████  | 123001/153371 [4:22:17<3:53:21,  2.17embedding/s]

Saved 6043 embeddings.


Generating embeddings for job skills:  80%|████████  | 123201/153371 [4:23:12<3:32:25,  2.37embedding/s]

Saved 6243 embeddings.


Generating embeddings for job skills:  80%|████████  | 123401/153371 [4:24:05<3:20:05,  2.50embedding/s]

Saved 6443 embeddings.


Generating embeddings for job skills:  81%|████████  | 123601/153371 [4:24:58<3:31:58,  2.34embedding/s]

Saved 6643 embeddings.


Generating embeddings for job skills:  81%|████████  | 123802/153371 [4:25:51<2:57:44,  2.77embedding/s]

Saved 6843 embeddings.


Generating embeddings for job skills:  81%|████████  | 124001/153371 [4:26:49<4:01:47,  2.02embedding/s]

Saved 7043 embeddings.


Generating embeddings for job skills:  81%|████████  | 124201/153371 [4:27:52<4:00:41,  2.02embedding/s]

Saved 7243 embeddings.


Generating embeddings for job skills:  81%|████████  | 124401/153371 [4:28:56<3:52:32,  2.08embedding/s]

Saved 7443 embeddings.


Generating embeddings for job skills:  81%|████████  | 124601/153371 [4:29:56<3:52:41,  2.06embedding/s]

Saved 7643 embeddings.


Generating embeddings for job skills:  81%|████████▏ | 124801/153371 [4:31:03<4:10:53,  1.90embedding/s]

Saved 7843 embeddings.


Generating embeddings for job skills:  82%|████████▏ | 125001/153371 [4:32:08<4:08:24,  1.90embedding/s]

Saved 8043 embeddings.


Generating embeddings for job skills:  82%|████████▏ | 125201/153371 [4:33:11<4:18:51,  1.81embedding/s]

Saved 8243 embeddings.


Generating embeddings for job skills:  82%|████████▏ | 125401/153371 [4:34:16<4:07:49,  1.88embedding/s]

Saved 8443 embeddings.


Generating embeddings for job skills:  82%|████████▏ | 125601/153371 [4:35:19<4:20:06,  1.78embedding/s]

Saved 8643 embeddings.


Generating embeddings for job skills:  82%|████████▏ | 125801/153371 [4:36:24<4:11:55,  1.82embedding/s]

Saved 8843 embeddings.


Generating embeddings for job skills:  82%|████████▏ | 126001/153371 [4:37:27<4:18:18,  1.77embedding/s]

Saved 9043 embeddings.


Generating embeddings for job skills:  82%|████████▏ | 126201/153371 [4:38:32<4:12:05,  1.80embedding/s]

Saved 9243 embeddings.


Generating embeddings for job skills:  82%|████████▏ | 126401/153371 [4:39:37<4:06:13,  1.83embedding/s]

Saved 9443 embeddings.


Generating embeddings for job skills:  83%|████████▎ | 126601/153371 [4:40:40<3:51:33,  1.93embedding/s]

Saved 9643 embeddings.


Generating embeddings for job skills:  83%|████████▎ | 126801/153371 [4:41:53<4:08:33,  1.78embedding/s] 

Saved 9843 embeddings.


Generating embeddings for job skills:  83%|████████▎ | 127001/153371 [4:42:55<4:16:19,  1.71embedding/s]

Saved 10043 embeddings.


Generating embeddings for job skills:  83%|████████▎ | 127201/153371 [4:43:58<4:28:51,  1.62embedding/s]

Saved 10243 embeddings.


Generating embeddings for job skills:  83%|████████▎ | 127401/153371 [4:45:03<4:10:20,  1.73embedding/s]

Saved 10443 embeddings.


Generating embeddings for job skills:  83%|████████▎ | 127601/153371 [4:46:10<4:11:12,  1.71embedding/s]

Saved 10643 embeddings.


Generating embeddings for job skills:  83%|████████▎ | 127801/153371 [4:47:17<4:37:26,  1.54embedding/s]

Saved 10843 embeddings.


Generating embeddings for job skills:  83%|████████▎ | 128001/153371 [4:48:22<4:12:18,  1.68embedding/s]

Saved 11043 embeddings.


Generating embeddings for job skills:  84%|████████▎ | 128201/153371 [4:49:25<4:16:41,  1.63embedding/s]

Saved 11243 embeddings.


Generating embeddings for job skills:  84%|████████▎ | 128401/153371 [4:50:36<3:59:16,  1.74embedding/s] 

Saved 11443 embeddings.


Generating embeddings for job skills:  84%|████████▍ | 128601/153371 [4:51:39<4:10:35,  1.65embedding/s]

Saved 11643 embeddings.


Generating embeddings for job skills:  84%|████████▍ | 128801/153371 [4:52:43<4:07:05,  1.66embedding/s]

Saved 11843 embeddings.


Generating embeddings for job skills:  84%|████████▍ | 129001/153371 [4:53:47<4:27:45,  1.52embedding/s]

Saved 12043 embeddings.


Generating embeddings for job skills:  84%|████████▍ | 129201/153371 [4:54:48<4:04:59,  1.64embedding/s]

Saved 12243 embeddings.


Generating embeddings for job skills:  84%|████████▍ | 129401/153371 [4:56:00<4:37:39,  1.44embedding/s]

Saved 12443 embeddings.


Generating embeddings for job skills:  85%|████████▍ | 129601/153371 [4:57:02<4:04:10,  1.62embedding/s]

Saved 12643 embeddings.


Generating embeddings for job skills:  85%|████████▍ | 129801/153371 [4:58:06<4:05:00,  1.60embedding/s]

Saved 12843 embeddings.


Generating embeddings for job skills:  85%|████████▍ | 130001/153371 [4:59:06<4:03:53,  1.60embedding/s]

Saved 13043 embeddings.


Generating embeddings for job skills:  85%|████████▍ | 130201/153371 [5:00:01<3:47:45,  1.70embedding/s]

Saved 13243 embeddings.


Generating embeddings for job skills:  85%|████████▌ | 130401/153371 [5:00:55<3:48:51,  1.67embedding/s]

Saved 13443 embeddings.


Generating embeddings for job skills:  85%|████████▌ | 130601/153371 [5:01:47<3:50:15,  1.65embedding/s]

Saved 13643 embeddings.


Generating embeddings for job skills:  85%|████████▌ | 130801/153371 [5:02:38<3:48:48,  1.64embedding/s]

Saved 13843 embeddings.


Generating embeddings for job skills:  85%|████████▌ | 131001/153371 [5:03:34<3:47:41,  1.64embedding/s]

Saved 14043 embeddings.


Generating embeddings for job skills:  86%|████████▌ | 131201/153371 [5:04:26<3:47:03,  1.63embedding/s]

Saved 14243 embeddings.


Generating embeddings for job skills:  86%|████████▌ | 131401/153371 [5:05:23<3:47:23,  1.61embedding/s]

Saved 14443 embeddings.


Generating embeddings for job skills:  86%|████████▌ | 131601/153371 [5:06:18<4:08:32,  1.46embedding/s]

Saved 14643 embeddings.


Generating embeddings for job skills:  86%|████████▌ | 131801/153371 [5:07:11<3:50:34,  1.56embedding/s]

Saved 14843 embeddings.


Generating embeddings for job skills:  86%|████████▌ | 132002/153371 [5:08:03<2:59:17,  1.99embedding/s]

Saved 15043 embeddings.


Generating embeddings for job skills:  86%|████████▌ | 132201/153371 [5:08:56<4:33:16,  1.29embedding/s]

Saved 15243 embeddings.


Generating embeddings for job skills:  86%|████████▋ | 132401/153371 [5:09:50<3:45:49,  1.55embedding/s]

Saved 15443 embeddings.


Generating embeddings for job skills:  86%|████████▋ | 132601/153371 [5:10:44<3:43:26,  1.55embedding/s]

Saved 15643 embeddings.


Generating embeddings for job skills:  87%|████████▋ | 132801/153371 [5:11:37<4:03:46,  1.41embedding/s]

Saved 15843 embeddings.


Generating embeddings for job skills:  87%|████████▋ | 133001/153371 [5:12:43<4:16:15,  1.32embedding/s] 

Saved 16043 embeddings.


Generating embeddings for job skills:  87%|████████▋ | 133201/153371 [5:14:00<3:59:34,  1.40embedding/s] 

Saved 16243 embeddings.


Generating embeddings for job skills:  87%|████████▋ | 133401/153371 [5:15:06<4:02:46,  1.37embedding/s]

Saved 16443 embeddings.


Generating embeddings for job skills:  87%|████████▋ | 133601/153371 [5:16:13<3:45:38,  1.46embedding/s]

Saved 16643 embeddings.


Generating embeddings for job skills:  87%|████████▋ | 133801/153371 [5:17:16<3:56:37,  1.38embedding/s]

Saved 16843 embeddings.


Generating embeddings for job skills:  87%|████████▋ | 134001/153371 [5:18:20<3:55:44,  1.37embedding/s]

Saved 17043 embeddings.


Generating embeddings for job skills:  88%|████████▊ | 134201/153371 [5:19:24<4:01:47,  1.32embedding/s]

Saved 17243 embeddings.


Generating embeddings for job skills:  88%|████████▊ | 134401/153371 [5:20:30<3:54:05,  1.35embedding/s]

Saved 17443 embeddings.


Generating embeddings for job skills:  88%|████████▊ | 134601/153371 [5:21:33<4:06:41,  1.27embedding/s]

Saved 17643 embeddings.


Generating embeddings for job skills:  88%|████████▊ | 134801/153371 [5:22:37<4:02:04,  1.28embedding/s]

Saved 17843 embeddings.


Generating embeddings for job skills:  88%|████████▊ | 135001/153371 [5:23:36<3:38:26,  1.40embedding/s]

Saved 18043 embeddings.


Generating embeddings for job skills:  88%|████████▊ | 135201/153371 [5:24:28<3:35:58,  1.40embedding/s]

Saved 18243 embeddings.


Generating embeddings for job skills:  88%|████████▊ | 135401/153371 [5:25:21<3:33:35,  1.40embedding/s]

Saved 18443 embeddings.


Generating embeddings for job skills:  88%|████████▊ | 135601/153371 [5:26:18<3:41:19,  1.34embedding/s]

Saved 18643 embeddings.


Generating embeddings for job skills:  89%|████████▊ | 135801/153371 [5:27:16<3:36:36,  1.35embedding/s]

Saved 18843 embeddings.


Generating embeddings for job skills:  89%|████████▊ | 136001/153371 [5:28:10<3:22:41,  1.43embedding/s]

Saved 19043 embeddings.


Generating embeddings for job skills:  89%|████████▉ | 136201/153371 [5:29:03<3:54:30,  1.22embedding/s]

Saved 19243 embeddings.


Generating embeddings for job skills:  89%|████████▉ | 136402/153371 [5:30:06<2:53:08,  1.63embedding/s] 

Saved 19443 embeddings.


Generating embeddings for job skills:  89%|████████▉ | 136602/153371 [5:30:57<2:40:20,  1.74embedding/s]

Saved 19643 embeddings.


Generating embeddings for job skills:  89%|████████▉ | 136801/153371 [5:31:46<3:28:36,  1.32embedding/s]

Saved 19843 embeddings.


Generating embeddings for job skills:  89%|████████▉ | 137001/153371 [5:32:39<3:41:41,  1.23embedding/s]

Saved 20043 embeddings.


Generating embeddings for job skills:  89%|████████▉ | 137201/153371 [5:33:44<3:48:32,  1.18embedding/s]

Saved 20243 embeddings.


Generating embeddings for job skills:  90%|████████▉ | 137401/153371 [5:34:47<3:43:10,  1.19embedding/s]

Saved 20443 embeddings.


Generating embeddings for job skills:  90%|████████▉ | 137601/153371 [5:35:54<3:35:53,  1.22embedding/s]

Saved 20643 embeddings.


Generating embeddings for job skills:  90%|████████▉ | 137801/153371 [5:37:01<4:02:47,  1.07embedding/s]

Saved 20843 embeddings.


Generating embeddings for job skills:  90%|████████▉ | 138001/153371 [5:38:04<3:19:54,  1.28embedding/s]

Saved 21043 embeddings.


Generating embeddings for job skills:  90%|█████████ | 138201/153371 [5:39:01<3:16:13,  1.29embedding/s]

Saved 21243 embeddings.


Generating embeddings for job skills:  90%|█████████ | 138401/153371 [5:39:52<3:10:03,  1.31embedding/s]

Saved 21443 embeddings.


Generating embeddings for job skills:  90%|█████████ | 138601/153371 [5:40:45<3:25:12,  1.20embedding/s]

Saved 21643 embeddings.


Generating embeddings for job skills:  91%|█████████ | 138801/153371 [5:41:40<3:19:36,  1.22embedding/s]

Saved 21843 embeddings.


Generating embeddings for job skills:  91%|█████████ | 139001/153371 [5:42:35<3:34:05,  1.12embedding/s]

Saved 22043 embeddings.


Generating embeddings for job skills:  91%|█████████ | 139201/153371 [5:43:41<3:33:47,  1.10embedding/s]

Saved 22243 embeddings.


Generating embeddings for job skills:  91%|█████████ | 139401/153371 [5:44:43<3:20:29,  1.16embedding/s]

Saved 22443 embeddings.


Generating embeddings for job skills:  91%|█████████ | 139601/153371 [5:45:48<3:22:44,  1.13embedding/s]

Saved 22643 embeddings.


Generating embeddings for job skills:  91%|█████████ | 139801/153371 [5:46:54<3:35:48,  1.05embedding/s]

Saved 22843 embeddings.


Generating embeddings for job skills:  91%|█████████▏| 140001/153371 [5:47:58<3:22:04,  1.10embedding/s]

Saved 23043 embeddings.


Generating embeddings for job skills:  91%|█████████▏| 140201/153371 [5:49:02<3:13:13,  1.14embedding/s]

Saved 23243 embeddings.


Generating embeddings for job skills:  92%|█████████▏| 140401/153371 [5:50:07<3:14:12,  1.11embedding/s]

Saved 23443 embeddings.


Generating embeddings for job skills:  92%|█████████▏| 140601/153371 [5:51:12<3:12:11,  1.11embedding/s]

Saved 23643 embeddings.


Generating embeddings for job skills:  92%|█████████▏| 140801/153371 [5:52:15<3:16:42,  1.07embedding/s]

Saved 23843 embeddings.


Generating embeddings for job skills:  92%|█████████▏| 141001/153371 [5:53:18<2:53:12,  1.19embedding/s]

Saved 24043 embeddings.


Generating embeddings for job skills:  92%|█████████▏| 141201/153371 [5:54:11<2:59:34,  1.13embedding/s]

Saved 24243 embeddings.


Generating embeddings for job skills:  92%|█████████▏| 141401/153371 [5:55:05<2:49:37,  1.18embedding/s]

Saved 24443 embeddings.


Generating embeddings for job skills:  92%|█████████▏| 141601/153371 [5:55:59<2:52:13,  1.14embedding/s]

Saved 24643 embeddings.


Generating embeddings for job skills:  92%|█████████▏| 141801/153371 [5:56:53<2:50:47,  1.13embedding/s]

Saved 24843 embeddings.


Generating embeddings for job skills:  93%|█████████▎| 142001/153371 [5:57:46<2:43:50,  1.16embedding/s]

Saved 25043 embeddings.


Generating embeddings for job skills:  93%|█████████▎| 142201/153371 [5:58:37<2:44:32,  1.13embedding/s]

Saved 25243 embeddings.


Generating embeddings for job skills:  93%|█████████▎| 142401/153371 [5:59:29<2:44:24,  1.11embedding/s]

Saved 25443 embeddings.


Generating embeddings for job skills:  93%|█████████▎| 142601/153371 [6:00:20<2:46:43,  1.08embedding/s]

Saved 25643 embeddings.


Generating embeddings for job skills:  93%|█████████▎| 142801/153371 [6:01:13<2:49:48,  1.04embedding/s]

Saved 25843 embeddings.


Generating embeddings for job skills:  93%|█████████▎| 143001/153371 [6:02:06<2:48:08,  1.03embedding/s]

Saved 26043 embeddings.


Generating embeddings for job skills:  93%|█████████▎| 143201/153371 [6:03:17<2:42:38,  1.04embedding/s]

Saved 26243 embeddings.


Generating embeddings for job skills:  93%|█████████▎| 143401/153371 [6:04:21<2:41:16,  1.03embedding/s]

Saved 26443 embeddings.


Generating embeddings for job skills:  94%|█████████▎| 143601/153371 [6:05:29<2:36:33,  1.04embedding/s]

Saved 26643 embeddings.


Generating embeddings for job skills:  94%|█████████▍| 143801/153371 [6:06:33<2:33:38,  1.04embedding/s]

Saved 26843 embeddings.


Generating embeddings for job skills:  94%|█████████▍| 144001/153371 [6:07:38<2:38:15,  1.01s/embedding]

Saved 27043 embeddings.


Generating embeddings for job skills:  94%|█████████▍| 144201/153371 [6:08:45<2:35:27,  1.02s/embedding]

Saved 27243 embeddings.


Generating embeddings for job skills:  94%|█████████▍| 144401/153371 [6:09:53<2:28:44,  1.01embedding/s]

Saved 27443 embeddings.


Generating embeddings for job skills:  94%|█████████▍| 144601/153371 [6:11:19<2:31:04,  1.03s/embedding]

Saved 27643 embeddings.


Generating embeddings for job skills:  94%|█████████▍| 144801/153371 [6:12:23<2:32:36,  1.07s/embedding]

Saved 27843 embeddings.


Generating embeddings for job skills:  95%|█████████▍| 145001/153371 [6:13:26<2:20:15,  1.01s/embedding]

Saved 28043 embeddings.


Generating embeddings for job skills:  95%|█████████▍| 145201/153371 [6:14:33<2:19:13,  1.02s/embedding]

Saved 28243 embeddings.


Generating embeddings for job skills:  95%|█████████▍| 145401/153371 [6:15:40<2:21:37,  1.07s/embedding]

Saved 28443 embeddings.


Generating embeddings for job skills:  95%|█████████▍| 145601/153371 [6:16:46<2:13:50,  1.03s/embedding]

Saved 28643 embeddings.


Generating embeddings for job skills:  95%|█████████▌| 145801/153371 [6:17:53<2:04:59,  1.01embedding/s]

Saved 28843 embeddings.


Generating embeddings for job skills:  95%|█████████▌| 146001/153371 [6:18:55<2:01:21,  1.01embedding/s]

Saved 29043 embeddings.


Generating embeddings for job skills:  95%|█████████▌| 146201/153371 [6:19:48<1:54:38,  1.04embedding/s]

Saved 29243 embeddings.


Generating embeddings for job skills:  95%|█████████▌| 146401/153371 [6:20:48<2:06:24,  1.09s/embedding]

Saved 29443 embeddings.


Generating embeddings for job skills:  96%|█████████▌| 146601/153371 [6:21:44<1:59:50,  1.06s/embedding]

Saved 29643 embeddings.


Generating embeddings for job skills:  96%|█████████▌| 146801/153371 [6:22:37<1:49:44,  1.00s/embedding]

Saved 29843 embeddings.


Generating embeddings for job skills:  96%|█████████▌| 147001/153371 [6:23:34<1:55:15,  1.09s/embedding]

Saved 30043 embeddings.


Generating embeddings for job skills:  96%|█████████▌| 147201/153371 [6:24:37<1:47:25,  1.04s/embedding]

Saved 30243 embeddings.


Generating embeddings for job skills:  96%|█████████▌| 147401/153371 [6:25:43<1:47:27,  1.08s/embedding]

Saved 30443 embeddings.


Generating embeddings for job skills:  96%|█████████▌| 147601/153371 [6:26:53<1:41:51,  1.06s/embedding]

Saved 30643 embeddings.


Generating embeddings for job skills:  96%|█████████▋| 147802/153371 [6:28:01<1:13:18,  1.27embedding/s]

Saved 30843 embeddings.


Generating embeddings for job skills:  96%|█████████▋| 148001/153371 [6:29:03<1:37:34,  1.09s/embedding]

Saved 31043 embeddings.


Generating embeddings for job skills:  97%|█████████▋| 148201/153371 [6:30:10<1:33:46,  1.09s/embedding]

Saved 31243 embeddings.


Generating embeddings for job skills:  97%|█████████▋| 148401/153371 [6:31:14<1:33:53,  1.13s/embedding]

Saved 31443 embeddings.


Generating embeddings for job skills:  97%|█████████▋| 148601/153371 [6:32:18<1:29:47,  1.13s/embedding]

Saved 31643 embeddings.


Generating embeddings for job skills:  97%|█████████▋| 148801/153371 [6:33:24<1:25:17,  1.12s/embedding]

Saved 31843 embeddings.


Generating embeddings for job skills:  97%|█████████▋| 149002/153371 [6:34:27<59:00,  1.23embedding/s]  

Saved 32043 embeddings.


Generating embeddings for job skills:  97%|█████████▋| 149202/153371 [6:35:21<57:18,  1.21embedding/s]  

Saved 32243 embeddings.


Generating embeddings for job skills:  97%|█████████▋| 149402/153371 [6:36:17<52:59,  1.25embedding/s]  

Saved 32443 embeddings.


Generating embeddings for job skills:  98%|█████████▊| 149601/153371 [6:37:10<1:05:49,  1.05s/embedding]

Saved 32643 embeddings.


Generating embeddings for job skills:  98%|█████████▊| 149801/153371 [6:38:04<1:04:00,  1.08s/embedding]

Saved 32843 embeddings.


Generating embeddings for job skills:  98%|█████████▊| 150001/153371 [6:38:54<1:03:58,  1.14s/embedding]

Saved 33043 embeddings.


Generating embeddings for job skills:  98%|█████████▊| 150201/153371 [6:39:47<59:02,  1.12s/embedding]  

Saved 33243 embeddings.


Generating embeddings for job skills:  98%|█████████▊| 150401/153371 [6:40:39<55:05,  1.11s/embedding]

Saved 33443 embeddings.


Generating embeddings for job skills:  98%|█████████▊| 150601/153371 [6:41:48<1:29:49,  1.95s/embedding]

Saved 33643 embeddings.


Generating embeddings for job skills:  98%|█████████▊| 150802/153371 [6:42:41<36:32,  1.17embedding/s]  

Saved 33843 embeddings.


Generating embeddings for job skills:  98%|█████████▊| 151001/153371 [6:43:34<46:13,  1.17s/embedding]

Saved 34043 embeddings.


Generating embeddings for job skills:  99%|█████████▊| 151201/153371 [6:44:37<43:16,  1.20s/embedding]

Saved 34243 embeddings.


Generating embeddings for job skills:  99%|█████████▊| 151401/153371 [6:45:42<41:25,  1.26s/embedding]

Saved 34443 embeddings.


Generating embeddings for job skills:  99%|█████████▉| 151601/153371 [6:46:49<34:27,  1.17s/embedding]

Saved 34643 embeddings.


Generating embeddings for job skills:  99%|█████████▉| 151801/153371 [6:47:59<32:48,  1.25s/embedding]

Saved 34843 embeddings.


Generating embeddings for job skills:  99%|█████████▉| 152001/153371 [6:49:05<28:19,  1.24s/embedding]

Saved 35043 embeddings.


Generating embeddings for job skills:  99%|█████████▉| 152201/153371 [6:50:17<24:30,  1.26s/embedding]

Saved 35243 embeddings.


Generating embeddings for job skills:  99%|█████████▉| 152401/153371 [6:51:21<19:02,  1.18s/embedding]

Saved 35443 embeddings.


Generating embeddings for job skills:  99%|█████████▉| 152601/153371 [6:52:26<16:22,  1.28s/embedding]

Saved 35643 embeddings.


Generating embeddings for job skills: 100%|█████████▉| 152801/153371 [6:53:32<11:34,  1.22s/embedding]

Saved 35843 embeddings.


Generating embeddings for job skills: 100%|█████████▉| 153001/153371 [6:54:41<07:23,  1.20s/embedding]

Saved 36043 embeddings.


Generating embeddings for job skills: 100%|█████████▉| 153201/153371 [6:55:45<03:33,  1.26s/embedding]

Saved 36243 embeddings.


Generating embeddings for job skills: 100%|██████████| 153371/153371 [6:56:38<00:00,  6.14embedding/s]


Done: 36413 embeddings saved.


## Join the three datasets
Given that we divided the dataset in three parts: 
* july 2024 to december 2024 (newly embedded)
* january 2025 to april 2025 (previously embedded for validation)
* may 2025 to july 2025 (newly embedded)
We need to join them into a single dataset for further processing.

In [None]:
npy_array_1 = np.load("../data/embeddings/job_skills_embeddings_first_part_fixed_51002.npy")
npy_array_2 = np.load("../data/embeddings/job_skills_embeddings_first_part.npy")
npy_array_3 = np.load("../data/embeddings/old_embeddings/job_skills_embeddings.npy")
npy_array_4 = np.load("../data/embeddings/job_skills_embeddings_second_part.npy")

print("Shapes:", npy_array_1.shape, npy_array_2.shape, npy_array_3.shape, npy_array_4.shape)
total_embeddings = sum([npy_array_1.shape[0], npy_array_2.shape[0], npy_array_3.shape[0], npy_array_4.shape[0]])
print("Total embeddings:", total_embeddings)
print("Assertion:", total_embeddings + 1 == len(df_jobs)) # We eliminated one row in the jan-apr 2025 period because it was corrupted
print(len(df_jobs))

final_embeddings = np.concatenate([npy_array_1, npy_array_2, npy_array_3, npy_array_4], axis=0)
print("Final shape:", final_embeddings.shape)
np.save("../data/embeddings/full_job_skills_embeddings.npy", final_embeddings)

Shapes: (51002, 3072) (46639, 3072) (70318, 3072) (36413, 3072)
Total embeddings: 204372
Assertion: True
204373
Final shape: (204372, 3072)


In [37]:
full_dataset = pd.read_csv("../data/full_dataset/jul24_to_jul_2025_cleaned.csv")
full_dataset = full_dataset[~(full_dataset["month"].isin([1, 3, 4]) & (full_dataset["year"] == 2025))]
print("Final dataset shape:", full_dataset.shape)

Final dataset shape: (134054, 6)


In [39]:
df_jan_apr_2025 = pd.read_csv("../data/validation_green_skill_dataset/full_jan_apr_2025_dataset/jan_to_apr_2025_with_languages_cleaned.csv")
df_jan_apr_2025["year"] = 2025
print("Jan-Apr 2025 dataset shape:", df_jan_apr_2025.shape)

Jan-Apr 2025 dataset shape: (70318, 7)


In [40]:
full_dataset = pd.concat([full_dataset, df_jan_apr_2025], ignore_index=True)

In [41]:
print("Combined dataset shape:", full_dataset.shape)
print("Final assertion:", full_dataset.shape[0] == final_embeddings.shape[0])

Combined dataset shape: (204372, 7)
Final assertion: True


In [42]:
full_dataset = full_dataset.sort_values(by=["year", "month"]).reset_index(drop=True)
full_dataset.drop(columns=["detected_language"], inplace=True)
full_dataset.to_csv("../data/full_dataset/jul24_to_jul_2025_cleaned_sorted.csv", index=False)

Create the **FAISS** index and add the embeddings to it. Also save the index to disk.

In [43]:
# Create the embedding index

job_skills_embeddings = np.load("../data/embeddings/full_job_skills_embeddings.npy")

index_job_skills = faiss.IndexFlatIP(job_skills_embeddings[0].__len__())
job_skills_embeddings = np.array(job_skills_embeddings).astype('float32')
index_job_skills.add(job_skills_embeddings)

faiss.write_index(index_job_skills, "../data/embeddings/full_job_skills_embeddings.index")

In [44]:
print(index_job_skills.ntotal)

204372


Create a mapping between the index and the skill, similar to the one created for the green skills, here we are using tuples of the form `(i, (job_id, job_skill))`, where:
* `i` is the unique key for that entry, useful because **FAISS** only stores the *vector embedding*, not the actual text, so we need to save the identifier to map the vector to the entry.
* `(job_id, job_skill)`: a tuple with two elements, `job_id` is the unique identifier of the job posting, `job_skill` is the skill extracted from the job posting.

In [6]:
# Index for Job Skills
id_to_job = {i : (id, skill, month, year) for i, (id, skill, month, year) in enumerate(zip(full_dataset["Job_ID"], full_dataset["Skills"], full_dataset["month"], full_dataset["year"]))}
for i in range(5):
    print(id_to_job[i])

('job_624fd0a8f34770fb', 'experiencia en atencion al cliente', 7.0, 2024)
('job_624fd0a8f34770fb', 'gusto por el servicio y atencion al cliente', 7.0, 2024)
('job_624fd0a8f34770fb', 'preparatoria terminada', 7.0, 2024)
('job_624fd0a8f34770fb', 'mayor de anos', 7.0, 2024)
('job_624fd0a8f34770fb', 'experiencia en empleos presenciales', 7.0, 2024)


Save the mapping to a json file.

In [7]:
with open("../data/mapping/id_to_jobs.json", "w") as f:
    json.dump(id_to_job, f, indent=4)