In [None]:
#!/usr/bin/env python3
import sys
import unicodedata

In [None]:
import numpy as np
import pandas as pd
from collections import defaultdict, Counter
import pickle

In [525]:
import emoji as em

In [None]:
from tqdm.notebook import tqdm

In [None]:
supported_models = (
    pd.DataFrame(TextEmbedding.list_supported_models())
    .sort_values("size_in_GB")
    .drop(columns="sources")
    .reset_index(drop=True)
)
supported_models

	

In [None]:
supported_models.loc[9]['model']

In [510]:
with open('emoji2dict.pkl', 'rb') as file:
    emoji_dict = pickle.load(file)

In [511]:
for emoji in emoji_dict:
    emoji_dict[emoji]['emoji_char'] = emoji

In [512]:
emoji_dict[emoji]['emoji_char']

'🇦🇽'

In [513]:
from qdrant_client import models, QdrantClient
from sentence_transformers import SentenceTransformer


In [514]:
sentence_encoder = SentenceTransformer('paraphrase-multilingual-MiniLM-L12-v2')



In [515]:
vector_dict = {}

for emoji in tqdm(emoji_dict):
    vector_dict[emoji] = sentence_encoder.encode(
        emoji_dict[emoji]["Description"] + 
        ' '.join(str(s) for s in emoji_dict[emoji]["Semantic_Tags"])
    )

  0%|          | 0/5034 [00:00<?, ?it/s]

In [516]:
client = QdrantClient(":memory:")

In [517]:
client.recreate_collection(
    collection_name="EMOJIS",
    vectors_config=models.VectorParams(
        size=sentence_encoder.get_sentence_embedding_dimension(),
        distance=models.Distance.COSINE,
    ),
)


  client.recreate_collection(


True

In [519]:
client.upload_points(
    collection_name="EMOJIS",
    points=[
        models.PointStruct(
            id=idx, 
            vector=vector_dict[emoji].tolist(),
            payload=emoji_dict[emoji]
        )
        for idx, emoji in enumerate(emoji_dict)
    ],
)

In [520]:
emoji_dict['🧬']

{'Emoji': '🧬',
 'Description': 'This emoji represents DNA, the genetic material found in living organisms, symbolizing the blueprint of life and the basis of heredity and evolution.',
 'Semantic_Tags': ['DNA',
  'genetics',
  'heredity',
  'evolution',
  'biology',
  'molecule',
  'science'],
 'emoji_char': '🧬'}

In [526]:
def return_simialr_emojis(query):
    hits = client.search(
        collection_name="EMOJIS",
        query_vector=sentence_encoder.encode(query).tolist(),
        limit=40,
    )

    hit_emojis = set()

    for hit in hits:
        emoji_char = hit.payload['emoji_char']
        score = hit.score

        _ord = ''
        for c in emoji_char:
            _ord += str(ord(c)) + ' '

        _spec = len(emoji_char) + 3
        unicode_desc = ' '.join(em.demojize(emoji_char).split('_'))[1:-1].upper()

        if emoji_char not in hit_emojis: 
            print(f"{emoji_char:<{_spec}}", end='')
            print(f"{score:<7.3f}", end= '')
            print(f"{unicode_desc:<55}")
                #emoji_char + '\t' +  , end= '\n') #, "score:", hit.score
            #display(Markdown('***{}***'.format(emoji_char)))
            
        hit_emojis.add(emoji_char)

    #     for i in today_links:
    # display(Markdown('*{}*'.format(i[0], i[1])))

        


In [527]:
return_simialr_emojis(
    "DNA Biologie Labor Forschung"
)

🧬   0.484  DNA                                                    
👩🏾‍🔬   0.314  WOMAN SCIENTIST MEDIUM-DARK SKIN TONE                  
🅾   0.310  O BUTTON (BLOOD TYPE)                                  
🅾️   0.310  O BUTTON (BLOOD TYPE)                                  
👩‍🔬   0.306  WOMAN SCIENTIST                                        
🧪   0.299  TEST TUBE                                              
👩🏿‍🔬   0.298  WOMAN SCIENTIST DARK SKIN TONE                         
👨‍🔬   0.292  MAN SCIENTIST                                          
👩🏽‍🔬   0.289  WOMAN SCIENTIST MEDIUM SKIN TONE                       
🅱   0.289  B BUTTON (BLOOD TYPE)                                  
👨🏾‍🔬   0.289  MAN SCIENTIST MEDIUM-DARK SKIN TONE                    
🅰   0.287  A BUTTON (BLOOD TYPE)                                  
👩🏻‍🔬   0.285  WOMAN SCIENTIST LIGHT SKIN TONE                        
🧑🏽‍🔬   0.284  SCIENTIST MEDIUM SKIN TONE                             
🆎   0.279  AB BUTTON (BLOOD TYPE)      

In [528]:
return_simialr_emojis(
    "جينات بيولوجيا بحث"
)

🧬   0.580  DNA                                                    
🦠   0.413  MICROBE                                                
👨‍🔬   0.338  MAN SCIENTIST                                          
👩🏾‍🔬   0.332  WOMAN SCIENTIST MEDIUM-DARK SKIN TONE                  
🧑‍🔬   0.325  SCIENTIST                                              
👩‍🔬   0.322  WOMAN SCIENTIST                                        
🧑🏽‍🔬   0.311  SCIENTIST MEDIUM SKIN TONE                             
🅾   0.304  O BUTTON (BLOOD TYPE)                                  
🅾️   0.304  O BUTTON (BLOOD TYPE)                                  
👩🏿‍🔬   0.303  WOMAN SCIENTIST DARK SKIN TONE                         
🧑🏾‍🔬   0.298  SCIENTIST MEDIUM-DARK SKIN TONE                        
🧑🏿‍🔬   0.297  SCIENTIST DARK SKIN TONE                               
🅱   0.295  B BUTTON (BLOOD TYPE)                                  
🧪   0.291  TEST TUBE                                              
👨🏿‍🔬   0.290  MAN SCIENTIST DARK SKIN TO

In [529]:
return_simialr_emojis("علوم الفلك  و الفضاء")

🪐   0.562  RINGED PLANET                                          
🌌   0.518  MILKY WAY                                              
👨‍🚀   0.499  MAN ASTRONAUT                                          
☄   0.489  COMET                                                  
🔭   0.480  TELESCOPE                                              
☄️   0.473  COMET                                                  
👩‍🚀   0.458  WOMAN ASTRONAUT                                        
🧑‍🚀   0.440  ASTRONAUT                                              
🧑🏻‍🚀   0.437  ASTRONAUT LIGHT SKIN TONE                              
🧑🏽‍🚀   0.422  ASTRONAUT MEDIUM SKIN TONE                             
👨🏽‍🚀   0.421  MAN ASTRONAUT MEDIUM SKIN TONE                         
🌃   0.420  NIGHT WITH STARS                                       
👩🏻‍🚀   0.417  WOMAN ASTRONAUT LIGHT SKIN TONE                        
🛰️   0.416  SATELLITE                                              
👩🏿‍🚀   0.413  WOMAN ASTRONAUT DARK SKIN TO

In [530]:
return_simialr_emojis("science astronomy space")

🪐   0.479  RINGED PLANET                                          
🔭   0.469  TELESCOPE                                              
🌌   0.429  MILKY WAY                                              
☄️   0.402  COMET                                                  
☄   0.397  COMET                                                  
👨‍🚀   0.378  MAN ASTRONAUT                                          
🔬   0.349  MICROSCOPE                                             
🧑‍🚀   0.344  ASTRONAUT                                              
🛰️   0.342  SATELLITE                                              
🛰   0.341  SATELLITE                                              
🧑🏻‍🚀   0.341  ASTRONAUT LIGHT SKIN TONE                              
👩🏿‍🚀   0.339  WOMAN ASTRONAUT DARK SKIN TONE                         
👩‍🚀   0.339  WOMAN ASTRONAUT                                        
🌃   0.332  NIGHT WITH STARS                                       
🧑🏾‍🚀   0.329  ASTRONAUT MEDIUM-DARK SKIN TONE   

In [531]:
return_simialr_emojis("snow winter sport")

🎿   0.668  SKIS                                                   
🏂🏽   0.658  SNOWBOARDER MEDIUM SKIN TONE                           
⛷   0.630  SKIER                                                  
⛷️   0.623  SKIER                                                  
🏂🏼   0.618  SNOWBOARDER MEDIUM-LIGHT SKIN TONE                     
🛷   0.615  SLED                                                   
🏂   0.606  SNOWBOARDER                                            
🏂🏿   0.602  SNOWBOARDER DARK SKIN TONE                             
🏂🏾   0.591  SNOWBOARDER MEDIUM-DARK SKIN TONE                      
🏂🏻   0.585  SNOWBOARDER LIGHT SKIN TONE                            
☃️   0.581  SNOWMAN                                                
☃   0.581  SNOWMAN                                                
❄   0.570  SNOWFLAKE                                              
⛸   0.555  ICE SKATE                                              
⛄   0.533  SNOWMAN WITHOUT SNOW                        

In [532]:
return_simialr_emojis("virus health pandemic")
# mexican asian french italina food

😷   0.297  FACE WITH MEDICAL MASK                                 
🤒   0.269  FACE WITH THERMOMETER                                  
🌪   0.262  TORNADO                                                
🌪️   0.262  TORNADO                                                
🌩   0.258  CLOUD WITH LIGHTNING                                   
🌩️   0.258  CLOUD WITH LIGHTNING                                   
🦠   0.247  MICROBE                                                
⛈️   0.234  CLOUD WITH LIGHTNING AND RAIN                          
🦟   0.231  MOSQUITO                                               
⛈   0.230  CLOUD WITH LIGHTNING AND RAIN                          
🤮   0.222  FACE VOMITING                                          
🌀   0.210  CYCLONE                                                
🐅   0.206  TIGER                                                  
🦹🏿   0.206  SUPERVILLAIN DARK SKIN TONE                            
💉   0.203  SYRINGE                                        

In [533]:
return_simialr_emojis("new beginnings")
# ambition

🌅   0.443  SUNRISE                                                
🥚   0.429  EGG                                                    
🌄   0.416  SUNRISE OVER MOUNTAINS                                 
👶   0.415  BABY                                                   
🌚   0.415  NEW MOON FACE                                          
🌱   0.415  SEEDLING                                               
🌑   0.388  NEW MOON                                               
🔰   0.384  JAPANESE SYMBOL FOR BEGINNER                           
🐣   0.377  HATCHING CHICK                                         
🚪   0.359  DOOR                                                   
🆕   0.332  NEW BUTTON                                             
🌼   0.332  BLOSSOM                                                
👶🏽   0.329  BABY MEDIUM SKIN TONE                                  
👶🏼   0.325  BABY MEDIUM-LIGHT SKIN TONE                            
👶🏻   0.324  BABY LIGHT SKIN TONE                            

In [561]:
return_simialr_emojis(" Katze")
# ambition

🐈   0.602  CAT                                                    
🐆   0.592  LEOPARD                                                
😸   0.588  GRINNING CAT WITH SMILING EYES                         
😼   0.581  CAT WITH WRY SMILE                                     
🐅   0.578  TIGER                                                  
🐱   0.568  CAT FACE                                               
😻   0.564  SMILING CAT WITH HEART-EYES                            
🐈‍⬛   0.559  BLACK CAT                                              
😽   0.555  KISSING CAT                                            
😺   0.552  GRINNING CAT                                           
😹   0.522  CAT WITH TEARS OF JOY                                  
😾   0.508  POUTING CAT                                            
😿   0.486  CRYING CAT                                             
🙀   0.476  WEARY CAT                                              
🐹   0.393  HAMSTER                                          

In [542]:
return_simialr_emojis("ambition growth success")

🚀   0.356  ROCKET                                                 
📈   0.355  CHART INCREASING                                       
🌱   0.337  SEEDLING                                               
💹   0.336  CHART INCREASING WITH YEN                              
🏗️   0.307  BUILDING CONSTRUCTION                                  
💸   0.286  MONEY WITH WINGS                                       
🧗   0.284  PERSON CLIMBING                                        
🚧   0.281  CONSTRUCTION                                           
🎯   0.272  BULLSEYE                                               
↗️   0.271  UP-RIGHT ARROW                                         
🥅   0.271  GOAL NET                                               
🏗   0.270  BUILDING CONSTRUCTION                                  
↗   0.265  UP-RIGHT ARROW                                         
🌟   0.252  GLOWING STAR                                           
💯   0.244  HUNDRED POINTS                                   

In [566]:
return_simialr_emojis("hippo")
# 

🦛   0.577  HIPPOPOTAMUS                                           
🐿️   0.410  CHIPMUNK                                               
🦥   0.405  SLOTH                                                  
🐫   0.400  TWO-HUMP CAMEL                                         
🐿   0.389  CHIPMUNK                                               
♑   0.389  CAPRICORN                                              
🐏   0.384  RAM                                                    
🐪   0.376  CAMEL                                                  
🦓   0.373  ZEBRA                                                  
🦕   0.371  SAUROPOD                                               
🐸   0.370  FROG                                                   
🇸🇹   0.366  SÃO TOMÉ & PRÍNCIPE                                    
🪬   0.354  HAMSA                                                  
🦔   0.351  HEDGEHOG                                               
🇮🇷   0.350  IRAN                                            