In [4]:
import os
import random
import re
import string

import nltk
import numpy as np
import pandas as pd

import gensim
from gensim.models import Word2Vec, KeyedVectors

from nltk import word_tokenize
from nltk.corpus import stopwords
import spacy

nlp = spacy.load('en_core_web_lg')
#nltk.download("stopwords")

In [5]:
#!pip install gensim
#!pip install --upgrade pip

In [6]:
df_raw=pd.read_excel("Injury Triage 2022.xlsx")

In [7]:
def clean_text(text, tokenizer, stopwords):
    """Pre-process text and generate tokens

    Args:
        text: Text to tokenize.

    Returns:
        Tokenized text.
    """
    text = str(text).lower()  # Lowercase words
    text = re.sub(r"\[(.*?)\]", "", text)  # Remove [+XYZ chars] in content
    text = re.sub(r"\s+", " ", text)  # Remove multiple spaces in content
    text = re.sub(r"\w+…|…", "", text)  # Remove ellipsis (and last word)
    text = re.sub(r"(?<=\w)-(?=\w)", " ", text)  # Replace dash between words
    text = re.sub(f"[{re.escape(string.punctuation)}]", "", text)  # Remove punctuation

    tokens = tokenizer(text)  # Get tokens from text
    tokens = [t for t in tokens if not t in stopwords]  # Remove stopwords
    tokens = ["" if t.isdigit() else t for t in tokens]  # Remove digits
    tokens = [t for t in tokens if len(t) > 1]  # Remove short tokens
    return tokens

In [8]:
custom_stopwords = set(stopwords.words("english"))
#text_columns = ["title", "description", "content"]

df = df_raw.copy()
# df["content"] = df["content"].fillna("")

# for col in text_columns:
#     df[col] = df[col].astype(str)

# Create text column based on title, description, and content
#df["text"] = df[text_columns].apply(lambda x: " | ".join(x), axis=1)
df["tokens"] = df["Description of Event"].map(lambda x: clean_text(x, word_tokenize, custom_stopwords))

# Remove duplicated after preprocessing
_, idx = np.unique(df["tokens"], return_index=True)
df = df.iloc[idx, :]

# Remove empty values and keep relevant columns
df = df.loc[df.tokens.map(lambda x: len(x) > 0), ["Description of Event", "tokens"]]

docs = df["Description of Event"].values
tokenized_docs = df["tokens"].values

print(f"Original dataframe: {df_raw.shape}")
print(f"Pre-processed dataframe: {df.shape}")

Original dataframe: (720, 19)
Pre-processed dataframe: (720, 2)


In [9]:
pd.set_option('display.max_colwidth',None)

In [10]:
df.head()

Unnamed: 0,Description of Event,tokens
301,"AT 12:10pm EE checked in pax with tote/toolbox weighing 92lbs. EE marked the bag with two heavy tags. EE asked for assistance from another agent, to lift the heavy bag on to the ticket counter bag belt. EE stood with his left side closest to the belt. Together with the other agent, he lifted the bag with two hands moving the bag across his body from right to left. As EE moved the bag across his body, he heard a “pop” and immediately felt pain.","[1210pm, ee, checked, pax, totetoolbox, weighing, 92lbs, ee, marked, bag, two, heavy, tags, ee, asked, assistance, another, agent, lift, heavy, bag, ticket, counter, bag, belt, ee, stood, left, side, closest, belt, together, agent, lifted, bag, two, hands, moving, bag, across, body, right, left, ee, moved, bag, across, body, heard, pop, immediately, felt, pain]"
157,"On 14JAN22, Katy parked the Porsche at gate 37 to greet a passenger off DL3851 PDX-LAX. She parked at the left tail side of the aircraft with the rear of the Porsche facing south. After parking, ERMC arrived and backed up to ship 252 to service the lavatories. At approximately 1027p Katy and the passenger returned to the Porsche. The passenger got into the car and Katy proceeded to load the luggage into the trunk. As she was closing the trunk, the ABM lavatory service driver begins to pull away. Katy gets pushed up onto the side of the Porsche by the still connected lavatory service hose. She quickly pulls it away enough to get under it. Then proceeds to waive down the driver to stop.","[14jan22, katy, parked, porsche, gate, greet, passenger, dl3851, pdx, lax, parked, left, tail, side, aircraft, rear, porsche, facing, south, parking, ermc, arrived, backed, ship, service, lavatories, approximately, 1027p, katy, passenger, returned, porsche, passenger, got, car, katy, proceeded, load, luggage, trunk, closing, trunk, abm, lavatory, service, driver, begins, pull, away, katy, gets, pushed, onto, side, porsche, still, connected, lavatory, service, hose, quickly, pulls, away, enough, get, proceeds, waive, driver, stop]"
662,advised Josephine Calautti informed me this morning her left foot was slightly swollen and the bottom of her heel is in pain. The direct cause of this injury is unknown to employee as it was not contributed to any type of twisting or of that nature. On 06/18 @ approx. 11a Josephine was walking down the jet bridge on gate 21 when she felt sharp pains on the bottom of her heel through to her Achilles. This morning her lower left ankle became slightly swollen and she was hurting while putting pressure on heel while walking.,"[advised, josephine, calautti, informed, morning, left, foot, slightly, swollen, bottom, heel, pain, direct, cause, injury, unknown, employee, contributed, type, twisting, nature, approx, 11a, josephine, walking, jet, bridge, gate, felt, sharp, pains, bottom, heel, achilles, morning, lower, left, ankle, became, slightly, swollen, hurting, putting, pressure, heel, walking]"
495,Agent was assisting in lifting aisle chair with heavy passenger over the aircraft lip of door onto the jet bridge. She felt pain down her mid to lower back.,"[agent, assisting, lifting, aisle, chair, heavy, passenger, aircraft, lip, door, onto, jet, bridge, felt, pain, mid, lower, back]"
4,"Agent was assisting a passenger deplaning from remote ops spot 5C, he climbed up the steps and stopped at the second step to retrieve carry on item from passenger and he walked down the steps backwards. When he stepped down he felt his left ankle slightly roll but felt no pain. He continued working and clocked out at 2230. He woke up this morning (23NOV) and felt a sharp pain starting from his ankle bone shooting up his leg and his ankle was swollen as well. He is also limping as he walks","[agent, assisting, passenger, deplaning, remote, ops, spot, 5c, climbed, steps, stopped, second, step, retrieve, carry, item, passenger, walked, steps, backwards, stepped, felt, left, ankle, slightly, roll, felt, pain, continued, working, clocked, woke, morning, 23nov, felt, sharp, pain, starting, ankle, bone, shooting, leg, ankle, swollen, well, also, limping, walks]"


In [None]:
#print(gensim.utils.simple_preprocess("AT 12:10pm EE checked in pax with tote/toolbox weighing 92lbs. EE marked the bag with two heavy tags. EE asked for assistance from another agent, to lift the heavy bag on to the ticket counter bag belt. EE stood with his left side closest to the belt. Together with the other agent, he lifted the bag with two hands moving the bag across his body from right to left. As EE moved the bag across his body, he heard a pop and immediately felt pain."))

In [None]:
df['text'] = df['text'].str.replace(r'\W', ' ', case = False)
df['text'] = df['text'].str.replace(r'[.,?<>-]', '')

In [14]:
model = Word2Vec(sentences=tokenized_docs, vector_size=300, workers=1)
#wv = api.load('word2vec-google-news-300')
model.build_vocab(tokenized_docs, progress_per=1000)
#model = KeyedVectors.load_word2vec_format=('word2vec-google-news-300')

In [15]:
def vectorize(list_of_docs, model):
    """Generate vectors for list of documents using a Word Embedding

    Args:
        list_of_docs: List of documents
        model: Gensim's Word Embedding

    Returns:
        List of document vectors
    """
    features = []

    for tokens in list_of_docs:
        zero_vector = np.zeros(model.vector_size)
        vectors = []
        for token in tokens:
            if token in model.wv:
                try:
                    vectors.append(model.wv[token])
                except KeyError:
                    continue
        if vectors:
            vectors = np.asarray(vectors)
            avg_vec = vectors.mean(axis=0)
            features.append(avg_vec)
        else:
            features.append(zero_vector)
    return features    


In [16]:
vectorized_docs = vectorize(tokenized_docs, model=model)
len(vectorized_docs), len(vectorized_docs[0])

(720, 300)

In [17]:
df["vectorized_docs"] =vectorized_docs

In [18]:
#model.similarity(w1="slip",w2="fall")

In [19]:
df.head()

Unnamed: 0,Description of Event,tokens,vectorized_docs
301,"AT 12:10pm EE checked in pax with tote/toolbox weighing 92lbs. EE marked the bag with two heavy tags. EE asked for assistance from another agent, to lift the heavy bag on to the ticket counter bag belt. EE stood with his left side closest to the belt. Together with the other agent, he lifted the bag with two hands moving the bag across his body from right to left. As EE moved the bag across his body, he heard a “pop” and immediately felt pain.","[1210pm, ee, checked, pax, totetoolbox, weighing, 92lbs, ee, marked, bag, two, heavy, tags, ee, asked, assistance, another, agent, lift, heavy, bag, ticket, counter, bag, belt, ee, stood, left, side, closest, belt, together, agent, lifted, bag, two, hands, moving, bag, across, body, right, left, ee, moved, bag, across, body, heard, pop, immediately, felt, pain]","[0.02078223, 0.28733596, 0.040639326, 0.14662439, 0.053288974, -0.23149274, 0.1421007, 0.5295764, 0.08089438, -0.105023816, 0.008849435, -0.32476515, 0.06760511, -0.123851515, -0.11228678, -0.18282238, 0.15936929, 0.045607127, 0.0934885, -0.014152064, -0.17246228, 0.0772793, 0.16892964, 0.06377067, 0.022525078, -0.0154312365, -0.34821892, 0.056510925, -0.16030723, -0.19473821, 0.06710984, -0.18251911, 0.07434252, -0.059736427, -0.07203949, 0.16165294, 0.055306293, -0.3178666, -0.12057288, 0.09298358, -0.101624966, 0.09543545, -0.055783104, -0.14444922, 0.20872512, 0.25495723, 0.06222806, 0.0071749776, -0.01998309, 0.15665336, 0.067331165, -0.013482443, -0.12223459, 0.06347043, -0.027874587, 0.23429559, 0.004018198, -0.112019174, 0.047091495, -0.006398526, -0.034146193, -0.08964167, 0.002964169, 0.047649346, -0.025708806, 0.09972803, -0.056284558, -0.0042402847, -0.11530547, -0.15914607, -0.037736095, 0.08804078, 0.30009148, -0.18848133, -0.043457553, 0.16498247, -0.30944487, 0.099162765, -0.08905923, 0.18599838, -0.2031958, -0.20715837, 0.08662451, 0.42640746, 0.055079065, -0.0001251785, -0.31122673, 0.056625664, 0.2427255, 0.14296466, 0.31089833, -0.14139344, 0.15720321, 0.000899923, 0.114472345, 0.2605898, 0.24237971, -0.011944372, -0.06241723, 0.15385789, ...]"
157,"On 14JAN22, Katy parked the Porsche at gate 37 to greet a passenger off DL3851 PDX-LAX. She parked at the left tail side of the aircraft with the rear of the Porsche facing south. After parking, ERMC arrived and backed up to ship 252 to service the lavatories. At approximately 1027p Katy and the passenger returned to the Porsche. The passenger got into the car and Katy proceeded to load the luggage into the trunk. As she was closing the trunk, the ABM lavatory service driver begins to pull away. Katy gets pushed up onto the side of the Porsche by the still connected lavatory service hose. She quickly pulls it away enough to get under it. Then proceeds to waive down the driver to stop.","[14jan22, katy, parked, porsche, gate, greet, passenger, dl3851, pdx, lax, parked, left, tail, side, aircraft, rear, porsche, facing, south, parking, ermc, arrived, backed, ship, service, lavatories, approximately, 1027p, katy, passenger, returned, porsche, passenger, got, car, katy, proceeded, load, luggage, trunk, closing, trunk, abm, lavatory, service, driver, begins, pull, away, katy, gets, pushed, onto, side, porsche, still, connected, lavatory, service, hose, quickly, pulls, away, enough, get, proceeds, waive, driver, stop]","[0.018185027, 0.24103673, 0.033950515, 0.12280439, 0.045673896, -0.19350724, 0.11986755, 0.44350573, 0.0674236, -0.08736359, 0.005948549, -0.27228317, 0.05648091, -0.10418014, -0.09363291, -0.15337855, 0.13365251, 0.038733337, 0.07763914, -0.01128005, -0.14527127, 0.0646305, 0.14240187, 0.05349175, 0.018938713, -0.012338214, -0.29150653, 0.046295498, -0.13494223, -0.16322286, 0.056475136, -0.15268292, 0.062198758, -0.05005803, -0.05995316, 0.13622993, 0.04683847, -0.26655528, -0.10114395, 0.078702785, -0.085272424, 0.07932398, -0.046804763, -0.12131594, 0.17498948, 0.21467352, 0.052842136, 0.0060765697, -0.016691461, 0.13065454, 0.056775503, -0.0116650555, -0.10153768, 0.05269577, -0.024186354, 0.19787341, 0.003183882, -0.09300643, 0.039216407, -0.0051485547, -0.02816534, -0.07574181, 0.0026814814, 0.040118862, -0.020854734, 0.08353546, -0.04681781, -0.0039473586, -0.095757976, -0.13348508, -0.03162648, 0.07318173, 0.251829, -0.15766856, -0.037656594, 0.13920265, -0.25963736, 0.08323799, -0.07456138, 0.15739939, -0.17026408, -0.17362772, 0.073558286, 0.35675853, 0.04649602, 0.0005329356, -0.2611394, 0.048367016, 0.2039431, 0.11888104, 0.26183498, -0.11786316, 0.13248833, 0.0005679205, 0.0942889, 0.21819885, 0.20413387, -0.010660792, -0.052618306, 0.12911649, ...]"
662,advised Josephine Calautti informed me this morning her left foot was slightly swollen and the bottom of her heel is in pain. The direct cause of this injury is unknown to employee as it was not contributed to any type of twisting or of that nature. On 06/18 @ approx. 11a Josephine was walking down the jet bridge on gate 21 when she felt sharp pains on the bottom of her heel through to her Achilles. This morning her lower left ankle became slightly swollen and she was hurting while putting pressure on heel while walking.,"[advised, josephine, calautti, informed, morning, left, foot, slightly, swollen, bottom, heel, pain, direct, cause, injury, unknown, employee, contributed, type, twisting, nature, approx, 11a, josephine, walking, jet, bridge, gate, felt, sharp, pains, bottom, heel, achilles, morning, lower, left, ankle, became, slightly, swollen, hurting, putting, pressure, heel, walking]","[0.016328685, 0.2256951, 0.033180457, 0.11531844, 0.04282044, -0.18142179, 0.11186939, 0.41607228, 0.0644237, -0.08212799, 0.0062495936, -0.25514024, 0.052961648, -0.098467685, -0.088590324, -0.14420919, 0.12463156, 0.036530126, 0.07345489, -0.010700663, -0.13663276, 0.060632195, 0.13355803, 0.049449347, 0.017990539, -0.011791407, -0.27324322, 0.043646775, -0.1259502, -0.15244682, 0.05282398, -0.14315903, 0.05819357, -0.046632156, -0.056390196, 0.12773165, 0.04330261, -0.24970125, -0.09465945, 0.07353329, -0.08012435, 0.07457813, -0.0437913, -0.11351191, 0.16442634, 0.20084937, 0.049714714, 0.004977336, -0.014987365, 0.123220876, 0.054138403, -0.011011182, -0.09537837, 0.049327083, -0.023159064, 0.18532114, 0.004077584, -0.08820557, 0.036875267, -0.005986343, -0.027765522, -0.07125113, 0.0019059335, 0.03743845, -0.01925724, 0.078131564, -0.044472132, -0.002971681, -0.09051495, -0.12545396, -0.030323375, 0.0686107, 0.23635921, -0.14763792, -0.03509579, 0.13046348, -0.24316925, 0.0781866, -0.070375636, 0.14739442, -0.15939394, -0.16309509, 0.06809446, 0.33392745, 0.04289276, 0.00052493694, -0.24496213, 0.04445139, 0.19028884, 0.1123167, 0.24528658, -0.11081952, 0.1233372, 0.00056087447, 0.08945822, 0.20474158, 0.19141296, -0.009350534, -0.04992808, 0.12021206, ...]"
495,Agent was assisting in lifting aisle chair with heavy passenger over the aircraft lip of door onto the jet bridge. She felt pain down her mid to lower back.,"[agent, assisting, lifting, aisle, chair, heavy, passenger, aircraft, lip, door, onto, jet, bridge, felt, pain, mid, lower, back]","[0.018408585, 0.25044417, 0.035839386, 0.12729527, 0.04721288, -0.2008638, 0.12484263, 0.4610085, 0.07113667, -0.09052149, 0.0064197374, -0.28270215, 0.058109805, -0.10907129, -0.09748259, -0.16043998, 0.13760701, 0.040821828, 0.081521414, -0.011674487, -0.15033413, 0.06863006, 0.14769606, 0.05587868, 0.020457035, -0.012854615, -0.30275092, 0.048281793, -0.13931437, -0.16981423, 0.05817873, -0.1592388, 0.06374463, -0.05253346, -0.062377255, 0.14066613, 0.04852417, -0.276407, -0.10487449, 0.08075166, -0.0882594, 0.08219226, -0.04864668, -0.1253697, 0.18129693, 0.22312692, 0.05512938, 0.006633375, -0.017259793, 0.13569397, 0.058851797, -0.012214874, -0.10572791, 0.05471304, -0.025892535, 0.20585449, 0.004279928, -0.09670791, 0.0411664, -0.005450506, -0.02984078, -0.07854408, 0.0021120664, 0.04155751, -0.021728322, 0.085840836, -0.0491499, -0.0041287984, -0.100466706, -0.13897525, -0.033119902, 0.076778725, 0.26216522, -0.16421126, -0.03905717, 0.14464219, -0.27002174, 0.086238615, -0.07821316, 0.1628882, -0.17730637, -0.18025939, 0.07630676, 0.37070873, 0.048024915, 0.00011938936, -0.27231008, 0.05044693, 0.21221653, 0.12390254, 0.27189428, -0.12190017, 0.13705061, -3.3140153e-05, 0.09853614, 0.22773719, 0.21240161, -0.01019749, -0.05449572, 0.13392751, ...]"
4,"Agent was assisting a passenger deplaning from remote ops spot 5C, he climbed up the steps and stopped at the second step to retrieve carry on item from passenger and he walked down the steps backwards. When he stepped down he felt his left ankle slightly roll but felt no pain. He continued working and clocked out at 2230. He woke up this morning (23NOV) and felt a sharp pain starting from his ankle bone shooting up his leg and his ankle was swollen as well. He is also limping as he walks","[agent, assisting, passenger, deplaning, remote, ops, spot, 5c, climbed, steps, stopped, second, step, retrieve, carry, item, passenger, walked, steps, backwards, stepped, felt, left, ankle, slightly, roll, felt, pain, continued, working, clocked, woke, morning, 23nov, felt, sharp, pain, starting, ankle, bone, shooting, leg, ankle, swollen, well, also, limping, walks]","[0.015928483, 0.21274167, 0.030344835, 0.10737581, 0.039439436, -0.1705585, 0.10599091, 0.39093864, 0.06071554, -0.07772164, 0.0055750185, -0.24023338, 0.050636843, -0.092864335, -0.08327067, -0.13493466, 0.11743128, 0.035261497, 0.06905873, -0.01022689, -0.12777908, 0.05748125, 0.12542081, 0.04717495, 0.01596261, -0.010630482, -0.25733674, 0.040324733, -0.1179388, -0.14379792, 0.049869023, -0.13518688, 0.054325182, -0.044130042, -0.053065863, 0.11923597, 0.040429354, -0.23491083, -0.08850951, 0.068782136, -0.07469316, 0.07037449, -0.041685518, -0.10684141, 0.15418434, 0.18870728, 0.04620371, 0.0048914473, -0.014651528, 0.115186565, 0.0500248, -0.010810094, -0.08900032, 0.046327483, -0.021726329, 0.17380825, 0.0031376856, -0.08199419, 0.03446861, -0.005147065, -0.024957277, -0.06687637, 0.001941849, 0.035208143, -0.018773176, 0.07372322, -0.0415528, -0.0042032357, -0.085401215, -0.11807943, -0.028426174, 0.06470798, 0.22239466, -0.13930188, -0.033197645, 0.12273723, -0.22922392, 0.07283851, -0.066055164, 0.13830309, -0.14985788, -0.1528843, 0.06392536, 0.3140196, 0.040795434, 0.0001531002, -0.23076008, 0.04242212, 0.1789225, 0.10582331, 0.23071913, -0.10469456, 0.11630662, 7.300104e-05, 0.0844925, 0.1929244, 0.17957298, -0.009614417, -0.0458864, 0.11288259, ...]"


In [None]:
#similar_words = model.corpus_total_words('ergonomics')

In [23]:
word_vectors = model.wv
centroid = np.average([word_vectors[w] for w in ['agent', 'assisting', 'passenger']],axis=0)
centroid                       

array([ 0.02135359,  0.27528015,  0.03755542,  0.1407682 ,  0.05171154,
       -0.22131716,  0.13771528,  0.5068462 ,  0.07737219, -0.09927314,
        0.00730698, -0.31211594,  0.0642892 , -0.12010213, -0.1076315 ,
       -0.17503841,  0.1524787 ,  0.04305016,  0.08934892, -0.01259414,
       -0.16573702,  0.07508815,  0.16334355,  0.0623648 ,  0.02150093,
       -0.01418434, -0.3343953 ,  0.05211757, -0.15306143, -0.187674  ,
        0.06369116, -0.17471845,  0.06993175, -0.05852129, -0.0680497 ,
        0.15505655,  0.05287779, -0.30352917, -0.11293057,  0.08768768,
       -0.09864753,  0.08945364, -0.05320227, -0.13870548,  0.19737919,
        0.24444766,  0.05795242,  0.00734502, -0.01826408,  0.14981748,
        0.06492586, -0.01516534, -0.11391661,  0.05802617, -0.0274608 ,
        0.22703218,  0.00598049, -0.10588481,  0.04385154, -0.0051788 ,
       -0.03304135, -0.08638885,  0.00303132,  0.04693333, -0.02352046,
        0.09493224, -0.05498859, -0.00389691, -0.11005169, -0.15

In [28]:
# for label in label_names:
#     label_vectors = model[label]
# label_vectors

In [25]:
#print(model.wv['remote'])

In [29]:
label_names = ['ergonomics','sliptripfall','other']
vectorized_lbls = []
for label in label_names:
    if label in model.wv:
        #vectorized_lbls.append(model.wv[label])
        print(label)
        print(vectorized_lbls)

In [30]:
def embed(tokens, nlp):
    """Return the centroid of the embeddings for the given tokens.

    Out-of-vocabulary tokens are cast aside. Stop words are also
    discarded. An array of 0s is returned if none of the tokens
    are valid.

    """

    lexemes = (nlp.vocab[token] for token in tokens)

    vectors = np.asarray([
        lexeme.vector
        for lexeme in lexemes
        if lexeme.has_vector
        and not lexeme.is_stop
        and len(lexeme.text) > 1
    ])

    if len(vectors) > 0:
        centroid = vectors.mean(axis=0)
    else:
        width = nlp.meta['vectors']['width']  # typically 300
        centroid = np.zeros(width)

    return centroid

In [36]:
label_names = [['ergonomics'], ['sliptripfall'],['other']]
label_vectors = np.asarray([
    embed(label, nlp)
    for label in label_names
])
label_vectors.shape

(3, 300)

In [38]:
for label in label_names:
    print(label)

['ergonomics']
['sliptripfall']
['other']


In [37]:
label_vectors

array([[-3.23149991,  1.39170003,  0.61522001,  1.24890006,  1.30079997,
         0.34608001, -0.42649999,  2.47309995, -1.6142    ,  0.17183   ,
         3.1861999 ,  1.09389997, -5.05770016,  0.65094   , -0.44475999,
         4.7196002 ,  4.15439987,  0.76629001, -3.30240011, -1.20410001,
        -1.37010002,  0.41666999,  0.16638   ,  1.2306    , -0.95186001,
        -1.2507    , -1.23740005, -0.89238   , -1.50530005,  0.26585999,
        -1.1013    , -1.23699999,  2.91829991, -1.77740002, -0.085541  ,
         0.072318  ,  2.81509995, -0.15484001,  0.72745001,  1.45640004,
         2.03069997,  0.96011001, -1.02540004, -1.34809995,  0.19598   ,
        -0.19997001, -1.18040001, -2.09579992, -0.44082001, -5.31619978,
         1.90470004, -0.12857001, -0.35438001, -1.43079996,  0.14883   ,
         1.15690005, -1.73619998,  3.42499995, -1.4993    ,  0.93362999,
         0.69845998,  0.98834997, -0.26402   , -1.41960001,  1.2902    ,
         1.31850004, -0.86610001, -4.1704998 ,  0.9

In [40]:
#print([item for current in label_names for item in current.split(',')])

In [41]:
label_names

[['ergonomics'], ['sliptripfall'], ['other']]

In [42]:
label_names = ['ergonomics','sliptripfall','other']
label_vectors = np.asarray([embed(label.split(' '), nlp) for label in label_names.str])
label_vectors.shape

AttributeError: 'list' object has no attribute 'str'

In [None]:
label_vectors

In [None]:
label_vectors

In [None]:
from sklearn.neighbors import NearestNeighbors
#neighbors=NearestNeighbors()

In [None]:
neigh = NearestNeighbors(n_neighbors=1)
neigh.fit(label_vectors)


In [None]:
closest_label = neigh.kneighbors(vectorized_docs, return_distance=False)[5, 0]

In [None]:
label_names[closest_label]

In [None]:
closest_label

In [None]:
vectorized_lbs = vectorize(label_names, model=model)
len(vectorized_lbs), len(vectorized_lbs[0])

In [None]:
vectorized_lbs[0]