In [15]:
from sentence_transformers import SentenceTransformer
from numpy import dot
from math import sqrt
import json

def get_tweets():
    tweets = []

    # read via utf-8
    file = open("tweets-utf-8.json", "r", encoding="utf-8")
    for l in file:
        line = json.loads(l)
        tweets.append(line["text"])
    print("Read tweets: " + str(len(tweets)))
    return tweets

def mag(vec): 
    return sqrt(sum(pow(item, 2) for item in vec))

def truncate_25(arr):
    # truncate to top 25
    result = []
    for i in range(0, 25):
        result.append(arr[i])
    return result

def sort_by_sim(query_embedding,document_embeddings,documents):
    result = []
    # for each document, do dot product formula
    for i in range(0, len(documents)):
        dotprod = dot(query_embedding, document_embeddings[i])
        normprod = mag(query_embedding) * mag(document_embeddings[i])

        # if normalization is zero, it is impossible to divide, ignore it
        if normprod == 0:
            continue;
            
        sim =  dotprod / normprod
        result.append((sim, documents[i]))

    # sort them
    result = sorted(result, key=lambda sim: sim[0], reverse=True)
    return result
    
def glove_top25(query,documents):
    # compute
    model = SentenceTransformer('sentence-transformers/average_word_embeddings_glove.840B.300d')
    result = sort_by_sim(model.encode(query), model.encode(documents), documents)
    return truncate_25(result)
    

def minilm_top25(query,documents):
    model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
    result = sort_by_sim(model.encode(query), model.encode(documents), documents)
    return truncate_25(result)
        
## Test Code
tweets = get_tweets()

print("**************GLOVE*****************")
for p in glove_top25("I am looking for a job.",tweets): print(p)

print("**************MINILM*****************")
for p in minilm_top25("I am looking for a job.",tweets): print(p)


Read tweets: 110474
**************GLOVE*****************
(np.float32(0.8553331), 'I love my job. 😂😐😭')
(np.float32(0.7629978), '@SusanSherring You guys are doing a great job.')
(np.float32(0.7313472), '@realDonaldTrump my grandpa wanted to say good job. So good job. From Phil')
(np.float32(0.7054985), '@JoannaLidback @ChittendenNate @cabotcheese Good job. Other than you look like you wish you were somewhere else...')
(np.float32(0.6776877), 'Getting ready for another day on the job. @tutordoctor https://t.co/nlDTfKUN8J')
(np.float32(0.66975015), 'Yes I did! I liked it a lot and looking forward to more!! https://t.co/foQccgQjFX')
(np.float32(0.65675926), "@carbonfixated I can't imagine anyone having the patience for it but I bet she'd do an excellent job.")
(np.float32(0.64907837), '@EdBrown19 @ThatFishCreigh @Fffeisty Wow Ed, nice job.')
(np.float32(0.647988), '@anjacks0n Perish the thought, seriously! Working on this stuff is the favourite part of my job. :)')
(np.float32(0.638776), '

To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development


(np.float32(0.81600475), 'I need a job')
(np.float32(0.6099426), "Does anyone know of anywhere that's hiring 🆘🙂")
(np.float32(0.5766299), 'Can you recommend anyone for this #job? Retail Clerk (Part-Time) - 6001 Highland Road, Whie Lake, MI 48836 - https://t.co/aZkkb2v11x')
(np.float32(0.56360316), "If you're looking for work in #Shelby, MI, check out this #job: https://t.co/0wP0eSzAS7 #cfgjobs #Hiring")
(np.float32(0.56006175), "If you're looking for work in #Shelby, MI, check out this #job: https://t.co/SRBQ9IPq97 #Retail #Hiring")
(np.float32(0.55992186), "If you're looking for work in #Shelby, MI, check out this #job: https://t.co/Js7LQEpgxX #Hiring")
(np.float32(0.55390894), 'Can you recommend anyone for this #job? Receiving Clerk - 10 Mile Rd NE, Rockford MI - https://t.co/8ozG1BTp5d #SupplyChain #Rockford, MI')
(np.float32(0.5424327), "If you're looking for work in #UNION, MI, check out this #job: https://t.co/PfYe1OaG20 #Retail #Hiring #CareerArc https://t.co/kDUxbtb7Gh")
(np.fl