# Imports

In [1]:
import pandas as pd
import re


import struct
import sys

from custom_functions import *
import _pickle as cPickle
import pickle as pkl
# To access Google Drive
# from google.colab import drive, files
# drive.mount('/content/gdrive')


from gensim.models import KeyedVectors
from gensim.models.wrappers import FastText

import pickle
import numpy as np
import fasttext

# Utils

In [9]:
def save_fasttext_embeddings(model, output_file):
    file = open(output_file, "w",encoding='utf')
    embedding_dict = {}
    
    words = model.get_words()
    print('Input Vocab:\t',str(len(words)), "\nModel Dimensions: ",str(model.get_dimension()))
    cnt = 0
    for w in words:
        v = model.get_word_vector(w)
        embedding_dict[w] = v
        
        vstr = ""
        for vi in v:
            vstr += " " + str(vi)
        try:
            row = w + vstr + "\n"
            file.write(row)
            cnt = cnt + 1
        except IOError as e:
            if e.errno == errno.EPIPE:
                pass
            
    file.close()
    
    with open(output_file.replace(".vec", "") + ".pkl", 'wb') as handle:
        pickle.dump(embedding_dict, handle, protocol=pickle.HIGHEST_PROTOCOL)
        
    print('Total words processed: ',cnt)

def clean_text(text):
    
    # Eliminate unprintable characters
    text = ''.join(x for x in text if x.isprintable())

    # Eliminate urls
    text = re.sub(r'https?://\S+|www\.\S+', '', text)

    # Eliminate html elements
    text = re.sub(r'<.*?>', '', text)

    # Eliminate punctuations
    text = re.sub('[!#?,.:";-@#$%^&*_~<>()-]', '', text)
  
    # Eliminating hashtags (keeping original word)
    text = re.sub(r'#', '', text) 
      
    # Eliminating special characters like retweet text "RT" 
    text = re.sub(r'(\\x(.)*)', '',text)
    text = re.sub(r'\\n|\\t|\\n\\n', ' ', text)
    text = re.sub(r"b'RT|b'|b RT|b\"RT|b", "", text)
    text = re.sub("[@#$%^&*)(}{|/><=+=_:\"\\\\]+"," ",text).strip()

    return text

# def get_word_embeddings(filepath, vocab, embedding_dimension):
#   word_embeddings = np.zeros((len(vocab) + 1, embedding_dimension))
#   embedding_file = open(filepath, "r", encoding = "utf8")
#   count = 0
#   exep = 0
#   for line in embedding_file:
#     try:
#         line = line.split()
#         word = line[0]
#         if word in vocab:
#           word_vector = np.asarray(line[1:], dtype = "float32")
#           if len(word_vector) == embedding_dimension:
#             word_embeddings[vocab[word]] = word_vector
#           else:
#             print('\nVector size does not match with embedding_dimension:\t', word_vector)
#           count+=1
   
#     except ValueError as e:
#         print("Exception:",e,"\n\n",line)
#         exep += 1
#         continue
  
#   print("Total word embeddings read: {}\n".format(count),exep)
#   embedding_file.close()
#   return word_embeddings

def convert_bin_to_vec(FILE_NAME):
  
    # coding: utf-8

    MAX_VECTORS = 2000000 # This script takes a lot of RAM (>2GB for 200K vectors), if you want to use the full 3M embeddings then you probably need to insert the vectors into some kind of database
    FLOAT_SIZE = 4 # 32bit float

    vectors = dict()

    with open(FILE_NAME, 'rb') as f:

        c = None

        # read the header
        header = ""
        while c != "\n":
            c = f.read(1)
            header += c

        total_num_vectors, vector_len = (int(x) for x in header.split())
        num_vectors = min(MAX_VECTORS, total_num_vectors)

        print ("Number of vectors: %d/%d" % (num_vectors, total_num_vectors))
        print ("Vector size: %d" % vector_len)

        while len(vectors) < num_vectors:

            word = ""        
            while True:
                c = f.read(1)
                if c == " ":
                    break
                word += c

            binary_vector = f.read(FLOAT_SIZE * vector_len)
            vectors[word] = [ struct.unpack_from('f', binary_vector, i)[0] 
                              for i in xrange(0, len(binary_vector), FLOAT_SIZE) ]

            sys.stdout.write("%d%%\r" % (len(vectors) / num_vectors * 100))
            sys.stdout.flush()

    print("\nSaving...")
    with open(FILE_NAME[:-3] + "pcl", 'wb') as f:
        cPickle.dump(vectors, f, cPickle.HIGHEST_PROTOCOL)
        
        
def get_word_embeddings(filepath, vocab, ft = False, save_embeddings = False):
  
  binary = False
  embedding_dimension = 0
  embedding_dict = {}

  if ft == True:
    word_vectors = fasttext.load_model(filepath)
    embedding_dimension = len(get_word_vector(list(word_vectors.get_words())[0]))
    print("File loaded. Total Words: {},\t Embedding Dimension: {}".format(len(word_vectors.get_words()), embedding_dimension))

    for word in vocab:
      try:
        wv = word_vectors.get_word_vector[word]
        embedding_dict[word] = wv

      except Exception as e:
        print("Exception reading vector for word:  {}, \n Exception : {} \n".format(word, e))
        continue

    print("Total embeddings found: {}\n\n".format(len(embedding_dict)))

  else:
    if ".bin" in filepath :
      print("Processing binary file")
      binary = True
      
    print("Loading vectors from: {} \n".format(filepath))
    word_vectors =  KeyedVectors.load_word2vec_format(filepath,binary=binary)

    embedding_dimension = len(list(word_vectors.vectors)[0])
    print("File loaded. Total Words: {},\t Embedding Dimension: {}".format(len(word_vectors.vocab), embedding_dimension))
  
  for word in vocab:
    try:
      wv = word_vectors.wv[word]
      embedding_dict[word] = wv

    except Exception as e:
      print("Exception reading vector for word:  {}, \n Exception : {} \n".format(word, e))
      continue

  print("Total embeddings found: {}\n\n".format(len(embedding_dict)))

  if save_embeddings == True:
    output_file = filepath.split('.')[0] + '.pkl'
    with open(output_file, 'wb') as handle:
        pickle.dump(embedding_dict, handle, protocol=pickle.HIGHEST_PROTOCOL)
    print("Word embeddings saved to {} \n\n".format(output_file))

  return embedding_dict

# Load Dataset

In [3]:
data = pd.read_csv("../Arabic Sentiment Identification/dataset/ASI_preprocessed.csv",usecols = ["tweet_preprocessed","sarcasm","sentiment"])
data

Unnamed: 0,sarcasm,sentiment,tweet_preprocessed
0,False,NEU,""" د . # محمود _ العلايلي : أرى أن الفريق # أحم..."
1,False,NEU,""" مع فيدرر يا آجا والكبار وجه مبتسم بعيون قلب ..."
2,True,NEG,“ الداعون لمبدأ الاختلاط بين الجنسين ؛ كالداعي...
3,True,NEG,[بريد] [مستخدم] [مستخدم] [مستخدم] [مستخدم] [مس...
4,False,NEU,""" قل شرق حلب ولا تقل حلب الشرقية . . . . وقل غ..."
...,...,...,...
12532,True,NEG,هلأ صاير انت يا # فلعوط بدك تعطي محاضرات ع توي...
12533,False,NEU,""" لا اله الا اللهقلب بنفسجي # أيفون _ البروفيس..."
12534,False,NEU,""" RT [مستخدم] : # أردوغان : إذا كان المرتكب غي..."
12535,False,POS,RT [مستخدم] : هاري بوتر ؟ نظارات قبعة عالية [ر...


In [8]:
file1 = open("arabic_data_preprocessed.txt","w",encoding="utf-8")
cnt = 0
for tweet in data.tweet_preprocessed.values.tolist():
    try:
      cnt += 1
      file1.write(tweet + '\n')
    except:
      print(tweet)
print(cnt)
file1.close()

12537


# Word2Vec

# Glove

# FastText

In [1]:
!pip install fasttext



In [3]:
import fasttext
import fasttext.util

In [4]:
# Path to input text file
input_file = "../Arabic Sentiment Identification/dataset/arabic_data_preprocessed.txt"

# Model Hyperparameters
model_type = 'skipgram'
embedding_dimensions = 300
minCount = 1
neg = 20 ,
epochs = 50

In [6]:
# Train model
model= fasttext.train_unsupervised(input = input_file , model = model_type, minCount = minCount, dim = embedding_dimensions, epoch = epochs)
print("Model Trained. Total tokens {}".format(len(model.get_words())))

ValueError: Unrecognized model name

In [10]:
# Save embeddings to bin file
output_file = "../Arabic Sentiment Identification/word embeddings/fasttext_model-{}_dim-{}" + ".vec".format(model_type, embedding_dimensions)
save_fasttext_embeddings(model = model_type, output_file = output_file)
# model.save_model(output_file + ".bin")

Input Vocab:	 48696 
Model Dimensions:  300
Total words processed:  48696





# Bert

# Mazajak

In [18]:
from gensim.models import KeyedVectors
from gensim.models.wrappers import FastText

import pickle
import numpy as np
import fasttext

# def get_word_embeddings(filepath, vocab, ft = False, save_embeddings = False):
  
#   binary = False
#   embedding_dimension = 0
#   embedding_dict = {}

#   if ft == True:
#     word_vectors = fasttext.load_model(filepath)
#     embedding_dimension = len(get_word_vector(list(word_vectors.get_words())[0]))
#     print("File loaded. Total Words: {},\t Embedding Dimension: {}".format(len(word_vectors.get_words()), embedding_dimension))

#     for word in vocab:
#       try:
#         wv = word_vectors.get_word_vector[word]
#         embedding_dict[word] = wv

#       except Exception as e:
#         print("Exception reading vector for word:  {}, \n Exception : {} \n".format(word, e))
#         continue

#     print("Total embeddings found: {}\n\n".format(len(embedding_dict)))

#   else:
#     if ".bin" in filepath :
#       print("Processing binary file")
#       binary = True
      
#     print("Loading vectors from: {} \n".format(filepath))
#     word_vectors =  KeyedVectors.load_word2vec_format(filepath,binary=binary)

#     embedding_dimension = len(list(word_vectors.vectors)[0])
#     print("File loaded. Total Words: {},\t Embedding Dimension: {}".format(len(word_vectors.vocab), embedding_dimension))
  
#   for word in vocab:
#     try:
#       wv = word_vectors.wv[word]
#       embedding_dict[word] = wv

#     except Exception as e:
#       print("Exception reading vector for word:  {}, \n Exception : {} \n".format(word, e))
#       continue

#   print("Total embeddings found: {}\n\n".format(len(embedding_dict)))

#   if save_embeddings == True:
#     output_file = filepath.split('.')[2] + '.pickle'
#     with open(output_file, 'wb') as handle:
#         pickle.dump(embedding_dict, handle, protocol=pickle.HIGHEST_PROTOCOL)
#     print("Word embeddings saved to {} \n\n".format(output_file))

#   return embedding_dict

def get_embedding_matrix(embedding_dict, embedding_dimension):

  vocab_size = len(embedding_dict.keys()) + 1
  embedding_matrix = np.zeros((vocab_size, embedding_dimension))

  for i, word in enumerate(embedding_dict):
    try:
      embedding_matrix[i] = embedding_dict[word]
    except Exception as e:
      print("Exception reading vector for word:  {}, \n Exception : {} \n".format(word, e))
      continue

  print("Total words processed: {}\n\n".format(len(embedding_matrix)))
  return embedding_matrix

In [19]:
# filepath = "/content/gdrive/My Drive/Shared Tasks/Arabic Sentiment Identification shared task 2021/Word Embeddings/fasttext_arabic.vec"
filepath = "../Arabic Sentiment Identification/word embeddings/mazajak_100.bin"
data = pd.read_csv("../Arabic Sentiment Identification/dataset/ASI_training.csv")
data.dropna()

corpus = data.tweet.values.tolist()

tokenizer, x, y = tokenize_text(corpus, corpus, corpus)
vocab = tokenizer.word_index
print("Total Vocab: ",len(vocab))  

Total Vocab:  62336


In [20]:
embedding_dict = get_word_embeddings(filepath, vocab)

Processing binary file
Loading vectors from: ../Arabic Sentiment Identification/word embeddings/mazajak_100.bin 



UnicodeDecodeError: 'utf-8' codec can't decode byte 0xd9 in position 58: unexpected end of data

In [None]:
embeddings.shap

(52264, 300)

In [24]:
mazajak = open(filepath, "r",encoding = "latin-1")

In [26]:
embeddings = mazajak.readlines()

In [None]:
len(embeddings)

In [None]:
for embedding in embeddings:
    print(type(embedding))
    break

In [23]:
from gensim.models.keyedvectors import KeyedVectors

models = KeyedVectors.load_word2vec_format("../Arabic Sentiment Identification/word embeddings/fasttext_arabic.txt")

KeyboardInterrupt: 

In [29]:
embed = {}
with open("../Arabic Sentiment Identification/word embeddings/Mazajak100m.pkl", "rb") as f:
    embed = pkl.load(f)

In [30]:
len(embed)

83721

In [22]:
tokens = pd.read_csv("../Arabic Sentiment Identification/dataset/vocab.csv", usecols=["token"])
tokens = [str(x) for x in tokens.token.values.tolist()]
vocab = {x:y for y,x in enumerate(tokens)}
print(len(vocab))

99442


In [26]:
embeddings_file = "../Arabic Sentiment Identification/word embeddings/fasttext_arabic.vec"
embedding_dict = get_word_embeddings(embeddings_file, vocab, save_embeddings=True)

Loading vectors from: ../Arabic Sentiment Identification/word embeddings/fasttext_arabic.vec 

File loaded. Total Words: 610977,	 Embedding Dimension: 300
Exception reading vector for word:  [بريد], 
 Exception : "word '[بريد]' not in vocabulary" 

Exception reading vector for word:  [مستخدم], 
 Exception : "word '[مستخدم]' not in vocabulary" 

Exception reading vector for word:  هههههه, 
 Exception : "word 'هههههه' not in vocabulary" 

Exception reading vector for word:  2006, 
 Exception : "word '2006' not in vocabulary" 

Exception reading vector for word:  RT, 
 Exception : "word 'RT' not in vocabulary" 

Exception reading vector for word:  :, 
 Exception : "word ':' not in vocabulary" 

Exception reading vector for word:  صامو, 
 Exception : "word 'صامو' not in vocabulary" 

Exception reading vector for word:  بقولكم, 
 Exception : "word 'بقولكم' not in vocabulary" 

Exception reading vector for word:  ناسيين, 
 Exception : "word 'ناسيين' not in vocabulary" 

Exception reading vec




Exception reading vector for word:  اعيشه, 
 Exception : "word 'اعيشه' not in vocabulary" 

Exception reading vector for word:  نصلى, 
 Exception : "word 'نصلى' not in vocabulary" 

Exception reading vector for word:  الاندبندت, 
 Exception : "word 'الاندبندت' not in vocabulary" 

Exception reading vector for word:  تسريبا, 
 Exception : "word 'تسريبا' not in vocabulary" 

Exception reading vector for word:  يحرزكاس, 
 Exception : "word 'يحرزكاس' not in vocabulary" 

Exception reading vector for word:  إسطنبولشارك, 
 Exception : "word 'إسطنبولشارك' not in vocabulary" 

Exception reading vector for word:  ألنبضه, 
 Exception : "word 'ألنبضه' not in vocabulary" 

Exception reading vector for word:  تشعرني, 
 Exception : "word 'تشعرني' not in vocabulary" 

Exception reading vector for word:  انطلقتملئ, 
 Exception : "word 'انطلقتملئ' not in vocabulary" 

Exception reading vector for word:  لكنهالم, 
 Exception : "word 'لكنهالم' not in vocabulary" 

Exception reading vector for word:  بيق


Exception reading vector for word:  اتجبر, 
 Exception : "word 'اتجبر' not in vocabulary" 

Exception reading vector for word:  يتشطر, 
 Exception : "word 'يتشطر' not in vocabulary" 

Exception reading vector for word:  73, 
 Exception : "word '73' not in vocabulary" 

Exception reading vector for word:  دولارفي, 
 Exception : "word 'دولارفي' not in vocabulary" 

Exception reading vector for word:  القيادةللدكتور, 
 Exception : "word 'القيادةللدكتور' not in vocabulary" 

Exception reading vector for word:  سلاماعلى, 
 Exception : "word 'سلاماعلى' not in vocabulary" 

Exception reading vector for word:  الانتصارالعربى, 
 Exception : "word 'الانتصارالعربى' not in vocabulary" 

Exception reading vector for word:  تستقطبان, 
 Exception : "word 'تستقطبان' not in vocabulary" 

Exception reading vector for word:  والآتي, 
 Exception : "word 'والآتي' not in vocabulary" 

Exception reading vector for word:  ياااا, 
 Exception : "word 'ياااا' not in vocabulary" 

Exception reading vector for wo


Exception reading vector for word:  Apple, 
 Exception : "word 'Apple' not in vocabulary" 

Exception reading vector for word:  Samsung, 
 Exception : "word 'Samsung' not in vocabulary" 

Exception reading vector for word:  شهداءالحرية, 
 Exception : "word 'شهداءالحرية' not in vocabulary" 

Exception reading vector for word:  سيلاحققك, 
 Exception : "word 'سيلاحققك' not in vocabulary" 

Exception reading vector for word:  فلسط, 
 Exception : "word 'فلسط' not in vocabulary" 

Exception reading vector for word:  وحيات, 
 Exception : "word 'وحيات' not in vocabulary" 

Exception reading vector for word:  اصييل, 
 Exception : "word 'اصييل' not in vocabulary" 

Exception reading vector for word:  اشتموا, 
 Exception : "word 'اشتموا' not in vocabulary" 

Exception reading vector for word:  أدبكم, 
 Exception : "word 'أدبكم' not in vocabulary" 

Exception reading vector for word:  ومياده, 
 Exception : "word 'ومياده' not in vocabulary" 

Exception reading vector for word:  تسعدنا, 
 Exception


Exception reading vector for word:  وقدام, 
 Exception : "word 'وقدام' not in vocabulary" 

Exception reading vector for word:  وهت, 
 Exception : "word 'وهت' not in vocabulary" 

Exception reading vector for word:  قنفوذة, 
 Exception : "word 'قنفوذة' not in vocabulary" 

Exception reading vector for word:  القدسشاهد, 
 Exception : "word 'القدسشاهد' not in vocabulary" 

Exception reading vector for word:  خليفتنا, 
 Exception : "word 'خليفتنا' not in vocabulary" 

Exception reading vector for word:  وخبيريؤكد, 
 Exception : "word 'وخبيريؤكد' not in vocabulary" 

Exception reading vector for word:  ومواطنةلقد, 
 Exception : "word 'ومواطنةلقد' not in vocabulary" 

Exception reading vector for word:  أسعدتنا, 
 Exception : "word 'أسعدتنا' not in vocabulary" 

Exception reading vector for word:  مقدسااتنا, 
 Exception : "word 'مقدسااتنا' not in vocabulary" 

Exception reading vector for word:  ينادوننا, 
 Exception : "word 'ينادوننا' not in vocabulary" 

Exception reading vector for word

Exception reading vector for word:  اندمآج, 
 Exception : "word 'اندمآج' not in vocabulary" 

Exception reading vector for word:  وحمآس, 
 Exception : "word 'وحمآس' not in vocabulary" 

Exception reading vector for word:  إيتونز, 
 Exception : "word 'إيتونز' not in vocabulary" 

Exception reading vector for word:  يتابكون, 
 Exception : "word 'يتابكون' not in vocabulary" 

Exception reading vector for word:  وتناسوا, 
 Exception : "word 'وتناسوا' not in vocabulary" 

Exception reading vector for word:  تزامننا, 
 Exception : "word 'تزامننا' not in vocabulary" 

Exception reading vector for word:  Beasts, 
 Exception : "word 'Beasts' not in vocabulary" 

Exception reading vector for word:  مختلطهلن, 
 Exception : "word 'مختلطهلن' not in vocabulary" 

Exception reading vector for word:  فدولتنا, 
 Exception : "word 'فدولتنا' not in vocabulary" 

Exception reading vector for word:  لناالكثير, 
 Exception : "word 'لناالكثير' not in vocabulary" 

Exception reading vector for word:  دعوكم, 



Exception reading vector for word:  رمشق, 
 Exception : "word 'رمشق' not in vocabulary" 

Exception reading vector for word:  مكوة, 
 Exception : "word 'مكوة' not in vocabulary" 

Exception reading vector for word:  القروبات, 
 Exception : "word 'القروبات' not in vocabulary" 

Exception reading vector for word:  رائعةوهذا, 
 Exception : "word 'رائعةوهذا' not in vocabulary" 

Exception reading vector for word:  بقوقلما, 
 Exception : "word 'بقوقلما' not in vocabulary" 

Exception reading vector for word:  اعرفهاأكتب, 
 Exception : "word 'اعرفهاأكتب' not in vocabulary" 

Exception reading vector for word:  لقطتها, 
 Exception : "word 'لقطتها' not in vocabulary" 

Exception reading vector for word:  تعزرغم, 
 Exception : "word 'تعزرغم' not in vocabulary" 

Exception reading vector for word:  المعلنةشهداء, 
 Exception : "word 'المعلنةشهداء' not in vocabulary" 

Exception reading vector for word:  ماحملنا, 
 Exception : "word 'ماحملنا' not in vocabulary" 

Exception reading vector for word

Exception reading vector for word:  أتزكر, 
 Exception : "word 'أتزكر' not in vocabulary" 

Exception reading vector for word:  مسؤوليكم, 
 Exception : "word 'مسؤوليكم' not in vocabulary" 

Exception reading vector for word:  وأبايع, 
 Exception : "word 'وأبايع' not in vocabulary" 

Exception reading vector for word:  يوفقهم, 
 Exception : "word 'يوفقهم' not in vocabulary" 

Exception reading vector for word:  الهايفة, 
 Exception : "word 'الهايفة' not in vocabulary" 

Exception reading vector for word:  واتصدر, 
 Exception : "word 'واتصدر' not in vocabulary" 

Exception reading vector for word:  هسمع, 
 Exception : "word 'هسمع' not in vocabulary" 

Exception reading vector for word:  المهترئه, 
 Exception : "word 'المهترئه' not in vocabulary" 

Exception reading vector for word:  ملناش, 
 Exception : "word 'ملناش' not in vocabulary" 

Exception reading vector for word:  مبارتى, 
 Exception : "word 'مبارتى' not in vocabulary" 

Exception reading vector for word:  للاشقاء, 
 Exception :


Exception reading vector for word:  والحمقين, 
 Exception : "word 'والحمقين' not in vocabulary" 

Exception reading vector for word:  مبرووووووك, 
 Exception : "word 'مبرووووووك' not in vocabulary" 

Exception reading vector for word:  تفكروني, 
 Exception : "word 'تفكروني' not in vocabulary" 

Exception reading vector for word:  افكركم, 
 Exception : "word 'افكركم' not in vocabulary" 

Exception reading vector for word:  مسمهاش, 
 Exception : "word 'مسمهاش' not in vocabulary" 

Exception reading vector for word:  الفارغين, 
 Exception : "word 'الفارغين' not in vocabulary" 

Exception reading vector for word:  اخبارالعالم, 
 Exception : "word 'اخبارالعالم' not in vocabulary" 

Exception reading vector for word:  1951, 
 Exception : "word '1951' not in vocabulary" 

Exception reading vector for word:  سهوكه, 
 Exception : "word 'سهوكه' not in vocabulary" 

Exception reading vector for word:  ينقهر, 
 Exception : "word 'ينقهر' not in vocabulary" 

Exception reading vector for word:  الا


Exception reading vector for word:  928313424584630272, 
 Exception : "word '928313424584630272' not in vocabulary" 

Exception reading vector for word:  سلمانكل, 
 Exception : "word 'سلمانكل' not in vocabulary" 

Exception reading vector for word:  926604120244572161, 
 Exception : "word '926604120244572161' not in vocabulary" 

Exception reading vector for word:  929893865670237952, 
 Exception : "word '929893865670237952' not in vocabulary" 

Exception reading vector for word:  باعواالغاز, 
 Exception : "word 'باعواالغاز' not in vocabulary" 

Exception reading vector for word:  وبيفرطوا, 
 Exception : "word 'وبيفرطوا' not in vocabulary" 

Exception reading vector for word:  بشوال, 
 Exception : "word 'بشوال' not in vocabulary" 

Exception reading vector for word:  928529773923618816, 
 Exception : "word '928529773923618816' not in vocabulary" 

Exception reading vector for word:  SmackDownshowInunitedkingdom, 
 Exception : "word 'SmackDownshowInunitedkingdom' not in vocabulary" 

E


Exception reading vector for word:  اخده, 
 Exception : "word 'اخده' not in vocabulary" 

Exception reading vector for word:  اتلهي, 
 Exception : "word 'اتلهي' not in vocabulary" 

Exception reading vector for word:  نيلك, 
 Exception : "word 'نيلك' not in vocabulary" 

Exception reading vector for word:  929435294646045952, 
 Exception : "word '929435294646045952' not in vocabulary" 

Exception reading vector for word:  930521486233948160, 
 Exception : "word '930521486233948160' not in vocabulary" 

Exception reading vector for word:  حيقضي, 
 Exception : "word 'حيقضي' not in vocabulary" 

Exception reading vector for word:  926536984050520064, 
 Exception : "word '926536984050520064' not in vocabulary" 

Exception reading vector for word:  فمجتش, 
 Exception : "word 'فمجتش' not in vocabulary" 

Exception reading vector for word:  المولخيه, 
 Exception : "word 'المولخيه' not in vocabulary" 

Exception reading vector for word:  ياجاد, 
 Exception : "word 'ياجاد' not in vocabulary" 


Exception reading vector for word:  ابوكم, 
 Exception : "word 'ابوكم' not in vocabulary" 

Exception reading vector for word:  استرجلو, 
 Exception : "word 'استرجلو' not in vocabulary" 

Exception reading vector for word:  DAS17, 
 Exception : "word 'DAS17' not in vocabulary" 

Exception reading vector for word:  ADSD, 
 Exception : "word 'ADSD' not in vocabulary" 

Exception reading vector for word:  وسايبيه, 
 Exception : "word 'وسايبيه' not in vocabulary" 

Exception reading vector for word:  بمزاجهم, 
 Exception : "word 'بمزاجهم' not in vocabulary" 

Exception reading vector for word:  ساعت, 
 Exception : "word 'ساعت' not in vocabulary" 

Exception reading vector for word:  الإيذيدية, 
 Exception : "word 'الإيذيدية' not in vocabulary" 

Exception reading vector for word:  التصقيف, 
 Exception : "word 'التصقيف' not in vocabulary" 

Exception reading vector for word:  سينهون, 
 Exception : "word 'سينهون' not in vocabulary" 

Exception reading vector for word:  وغيروو, 
 Exception : 

Exception reading vector for word:  معدش, 
 Exception : "word 'معدش' not in vocabulary" 

Exception reading vector for word:  سيرافرات, 
 Exception : "word 'سيرافرات' not in vocabulary" 

Exception reading vector for word:  بيولعوا, 
 Exception : "word 'بيولعوا' not in vocabulary" 

Exception reading vector for word:  فنقر, 
 Exception : "word 'فنقر' not in vocabulary" 

Exception reading vector for word:  ولاتبيهم, 
 Exception : "word 'ولاتبيهم' not in vocabulary" 

Exception reading vector for word:  ادمى, 
 Exception : "word 'ادمى' not in vocabulary" 

Exception reading vector for word:  واراق, 
 Exception : "word 'واراق' not in vocabulary" 

Exception reading vector for word:  توقفون, 
 Exception : "word 'توقفون' not in vocabulary" 

Exception reading vector for word:  قولتكم, 
 Exception : "word 'قولتكم' not in vocabulary" 

Exception reading vector for word:  تناقضون, 
 Exception : "word 'تناقضون' not in vocabulary" 

Exception reading vector for word:  والمعيبه, 
 Exception : "w


Exception reading vector for word:  عليكو, 
 Exception : "word 'عليكو' not in vocabulary" 

Exception reading vector for word:  فضحانا, 
 Exception : "word 'فضحانا' not in vocabulary" 

Exception reading vector for word:  وبتقول, 
 Exception : "word 'وبتقول' not in vocabulary" 

Exception reading vector for word:  اتقو, 
 Exception : "word 'اتقو' not in vocabulary" 

Exception reading vector for word:  مطبلاتيه, 
 Exception : "word 'مطبلاتيه' not in vocabulary" 

Exception reading vector for word:  بتحكى, 
 Exception : "word 'بتحكى' not in vocabulary" 

Exception reading vector for word:  بنحميها, 
 Exception : "word 'بنحميها' not in vocabulary" 

Exception reading vector for word:  الفسادأتمنى, 
 Exception : "word 'الفسادأتمنى' not in vocabulary" 

Exception reading vector for word:  استرجعنا, 
 Exception : "word 'استرجعنا' not in vocabulary" 

Exception reading vector for word:  وشروه, 
 Exception : "word 'وشروه' not in vocabulary" 

Exception reading vector for word:  والموافقةلأثي

Exception reading vector for word:  خمتها, 
 Exception : "word 'خمتها' not in vocabulary" 

Exception reading vector for word:  مختطفه, 
 Exception : "word 'مختطفه' not in vocabulary" 

Exception reading vector for word:  يارخمه, 
 Exception : "word 'يارخمه' not in vocabulary" 

Exception reading vector for word:  فوراواللي, 
 Exception : "word 'فوراواللي' not in vocabulary" 

Exception reading vector for word:  927594341484449792, 
 Exception : "word '927594341484449792' not in vocabulary" 

Exception reading vector for word:  928711178934775808, 
 Exception : "word '928711178934775808' not in vocabulary" 

Exception reading vector for word:  لمركزدولي, 
 Exception : "word 'لمركزدولي' not in vocabulary" 

Exception reading vector for word:  929814023335563264, 
 Exception : "word '929814023335563264' not in vocabulary" 

Exception reading vector for word:  929043183807483008, 
 Exception : "word '929043183807483008' not in vocabulary" 

Exception reading vector for word:  ومنزلق, 
 Ex

Exception reading vector for word:  أخبعاجل, 
 Exception : "word 'أخبعاجل' not in vocabulary" 

Exception reading vector for word:  كدةيانبت, 
 Exception : "word 'كدةيانبت' not in vocabulary" 

Exception reading vector for word:  للريدز, 
 Exception : "word 'للريدز' not in vocabulary" 

Exception reading vector for word:  WHULIV, 
 Exception : "word 'WHULIV' not in vocabulary" 

Exception reading vector for word:  وبالفم, 
 Exception : "word 'وبالفم' not in vocabulary" 

Exception reading vector for word:  المغيبين, 
 Exception : "word 'المغيبين' not in vocabulary" 

Exception reading vector for word:  معندهوش, 
 Exception : "word 'معندهوش' not in vocabulary" 

Exception reading vector for word:  دعمو, 
 Exception : "word 'دعمو' not in vocabulary" 

Exception reading vector for word:  يساهمو, 
 Exception : "word 'يساهمو' not in vocabulary" 

Exception reading vector for word:  نستفيق, 
 Exception : "word 'نستفيق' not in vocabulary" 

Exception reading vector for word:  للانقلابات, 
 Ex

Exception reading vector for word:  عينيهم, 
 Exception : "word 'عينيهم' not in vocabulary" 

Exception reading vector for word:  اخلاقهم, 
 Exception : "word 'اخلاقهم' not in vocabulary" 

Exception reading vector for word:  مشغلهم, 
 Exception : "word 'مشغلهم' not in vocabulary" 

Exception reading vector for word:  كفوووو, 
 Exception : "word 'كفوووو' not in vocabulary" 

Exception reading vector for word:  مانيفيكتور, 
 Exception : "word 'مانيفيكتور' not in vocabulary" 

Exception reading vector for word:  أوباماينغ, 
 Exception : "word 'أوباماينغ' not in vocabulary" 

Exception reading vector for word:  أحمدموسي, 
 Exception : "word 'أحمدموسي' not in vocabulary" 

Exception reading vector for word:  لوفرابوظبي, 
 Exception : "word 'لوفرابوظبي' not in vocabulary" 

Exception reading vector for word:  شسالفه, 
 Exception : "word 'شسالفه' not in vocabulary" 

Exception reading vector for word:  ماترتب, 
 Exception : "word 'ماترتب' not in vocabulary" 

Exception reading vector for wor

Exception reading vector for word:  للثورتين, 
 Exception : "word 'للثورتين' not in vocabulary" 

Exception reading vector for word:  الكارطة, 
 Exception : "word 'الكارطة' not in vocabulary" 

Exception reading vector for word:  فإنتبهوا, 
 Exception : "word 'فإنتبهوا' not in vocabulary" 

Exception reading vector for word:  سهوكةابو, 
 Exception : "word 'سهوكةابو' not in vocabulary" 

Exception reading vector for word:  فلاتربلحةعواتشفطبيب, 
 Exception : "word 'فلاتربلحةعواتشفطبيب' not in vocabulary" 

Exception reading vector for word:  الفلاسفةالانقلابي, 
 Exception : "word 'الفلاسفةالانقلابي' not in vocabulary" 

Exception reading vector for word:  وشهادتها, 
 Exception : "word 'وشهادتها' not in vocabulary" 

Exception reading vector for word:  وتابت, 
 Exception : "word 'وتابت' not in vocabulary" 

Exception reading vector for word:  وإنصافهم, 
 Exception : "word 'وإنصافهم' not in vocabulary" 

Exception reading vector for word:  وغيرتهم, 
 Exception : "word 'وغيرتهم' not in voca


Exception reading vector for word:  متسمعش, 
 Exception : "word 'متسمعش' not in vocabulary" 

Exception reading vector for word:  GulfCrisis, 
 Exception : "word 'GulfCrisis' not in vocabulary" 

Exception reading vector for word:  مافكرتوا, 
 Exception : "word 'مافكرتوا' not in vocabulary" 

Exception reading vector for word:  تبغوه, 
 Exception : "word 'تبغوه' not in vocabulary" 

Exception reading vector for word:  يانعجه, 
 Exception : "word 'يانعجه' not in vocabulary" 

Exception reading vector for word:  هتحررها, 
 Exception : "word 'هتحررها' not in vocabulary" 

Exception reading vector for word:  ايمتى, 
 Exception : "word 'ايمتى' not in vocabulary" 

Exception reading vector for word:  مادمر, 
 Exception : "word 'مادمر' not in vocabulary" 

Exception reading vector for word:  نشوفها, 
 Exception : "word 'نشوفها' not in vocabulary" 

Exception reading vector for word:  عاملوهم, 
 Exception : "word 'عاملوهم' not in vocabulary" 

Exception reading vector for word:  القزافي, 
 Ex

Exception reading vector for word:  ولقطتوجه, 
 Exception : "word 'ولقطتوجه' not in vocabulary" 

Exception reading vector for word:  بعفويتك, 
 Exception : "word 'بعفويتك' not in vocabulary" 

Exception reading vector for word:  تتصنع, 
 Exception : "word 'تتصنع' not in vocabulary" 

Exception reading vector for word:  عليشه, 
 Exception : "word 'عليشه' not in vocabulary" 

Exception reading vector for word:  قرآتها, 
 Exception : "word 'قرآتها' not in vocabulary" 

Exception reading vector for word:  رينمبوو, 
 Exception : "word 'رينمبوو' not in vocabulary" 

Exception reading vector for word:  رينبمبو, 
 Exception : "word 'رينبمبو' not in vocabulary" 

Exception reading vector for word:  اذاني, 
 Exception : "word 'اذاني' not in vocabulary" 

Exception reading vector for word:  ااماذا, 
 Exception : "word 'ااماذا' not in vocabulary" 

Exception reading vector for word:  تناطحني, 
 Exception : "word 'تناطحني' not in vocabulary" 

Exception reading vector for word:  تؤذيني, 
 Exceptio


Exception reading vector for word:  للكابوريا, 
 Exception : "word 'للكابوريا' not in vocabulary" 

Exception reading vector for word:  المعافي, 
 Exception : "word 'المعافي' not in vocabulary" 

Exception reading vector for word:  شفائك, 
 Exception : "word 'شفائك' not in vocabulary" 

Exception reading vector for word:  الدفعةبدلة, 
 Exception : "word 'الدفعةبدلة' not in vocabulary" 

Exception reading vector for word:  سيرقى, 
 Exception : "word 'سيرقى' not in vocabulary" 

Exception reading vector for word:  يتشافوا, 
 Exception : "word 'يتشافوا' not in vocabulary" 

Exception reading vector for word:  والرغي, 
 Exception : "word 'والرغي' not in vocabulary" 

Exception reading vector for word:  يتشاف, 
 Exception : "word 'يتشاف' not in vocabulary" 

Exception reading vector for word:  ناسنا, 
 Exception : "word 'ناسنا' not in vocabulary" 

Exception reading vector for word:  بكله, 
 Exception : "word 'بكله' not in vocabulary" 

Exception reading vector for word:  داريت, 
 Exceptio

Exception reading vector for word:  يبرطعوا, 
 Exception : "word 'يبرطعوا' not in vocabulary" 

Exception reading vector for word:  ونضفتها, 
 Exception : "word 'ونضفتها' not in vocabulary" 

Exception reading vector for word:  عالتايم, 
 Exception : "word 'عالتايم' not in vocabulary" 

Exception reading vector for word:  انجلط, 
 Exception : "word 'انجلط' not in vocabulary" 

Exception reading vector for word:  تنظيفية, 
 Exception : "word 'تنظيفية' not in vocabulary" 

Exception reading vector for word:  لقطوا, 
 Exception : "word 'لقطوا' not in vocabulary" 

Exception reading vector for word:  زبالتكم, 
 Exception : "word 'زبالتكم' not in vocabulary" 

Exception reading vector for word:  لستوري, 
 Exception : "word 'لستوري' not in vocabulary" 

Exception reading vector for word:  بنهوهوهو, 
 Exception : "word 'بنهوهوهو' not in vocabulary" 

Exception reading vector for word:  يحبسوا, 
 Exception : "word 'يحبسوا' not in vocabulary" 

Exception reading vector for word:  يطيارات, 
 Exc


Exception reading vector for word:  مصدومه, 
 Exception : "word 'مصدومه' not in vocabulary" 

Exception reading vector for word:  وربييي, 
 Exception : "word 'وربييي' not in vocabulary" 

Exception reading vector for word:  اتحداااا, 
 Exception : "word 'اتحداااا' not in vocabulary" 

Exception reading vector for word:  قيمتك, 
 Exception : "word 'قيمتك' not in vocabulary" 

Exception reading vector for word:  مدعس, 
 Exception : "word 'مدعس' not in vocabulary" 

Exception reading vector for word:  بيطردوك, 
 Exception : "word 'بيطردوك' not in vocabulary" 

Exception reading vector for word:  اخرسسس, 
 Exception : "word 'اخرسسس' not in vocabulary" 

Exception reading vector for word:  عزيزين, 
 Exception : "word 'عزيزين' not in vocabulary" 

Exception reading vector for word:  بتتوقعوا, 
 Exception : "word 'بتتوقعوا' not in vocabulary" 

Exception reading vector for word:  بيكونوا, 
 Exception : "word 'بيكونوا' not in vocabulary" 

Exception reading vector for word:  ورأيكم, 
 Excepti

Exception reading vector for word:  بعملوله, 
 Exception : "word 'بعملوله' not in vocabulary" 

Exception reading vector for word:  اليمعه, 
 Exception : "word 'اليمعه' not in vocabulary" 

Exception reading vector for word:  هالساحات, 
 Exception : "word 'هالساحات' not in vocabulary" 

Exception reading vector for word:  فيغنيك, 
 Exception : "word 'فيغنيك' not in vocabulary" 

Exception reading vector for word:  بمطرك, 
 Exception : "word 'بمطرك' not in vocabulary" 

Exception reading vector for word:  اسق, 
 Exception : "word 'اسق' not in vocabulary" 

Exception reading vector for word:  إندبح, 
 Exception : "word 'إندبح' not in vocabulary" 

Exception reading vector for word:  تعرنا, 
 Exception : "word 'تعرنا' not in vocabulary" 

Exception reading vector for word:  الفضايح, 
 Exception : "word 'الفضايح' not in vocabulary" 

Exception reading vector for word:  نطول, 
 Exception : "word 'نطول' not in vocabulary" 

Exception reading vector for word:  بالنومة, 
 Exception : "word 'با

Word embeddings saved to .pkl 




In [32]:
for w in embed:
    if not len(embed[w]) == 300:
        print(w)