In [1]:
import pandas as pd
import csv
import re
import concurrent.futures
import tqdm
from collections import Counter 

### Utility function for multi-threading

In [2]:
# def create_multi_range(start, num_total, workers):
#     excess = num_total % workers 
#     split = int(num_total / workers)
#     ranges = []

#     for x in range(0, workers):
#         if excess > 0:
#             end = start + split + 1
#             excess -= 1
#         else:
#             end = start + split

#         ranges.append([start, end])
#         start = end
    
#     return ranges

# Lexicon

In [3]:
# cols = ['Emoji', 'Position', 'Unicode name']
# lexicon_df = pd.read_csv('Emoji_Sentiment_Data_v1.0.csv',  usecols = cols)
lexicon_df = pd.read_csv('Emoji_Sentiment_Data_v1.0.csv')
# lexicon_df['Occurrence'] = 0
lexicon_df.head()

Unnamed: 0,Emoji,Unicode codepoint,Occurrences,Position,Negative,Neutral,Positive,Unicode name,Unicode block
0,😂,0x1f602,14622,0.805101,3614,4163,6845,FACE WITH TEARS OF JOY,Emoticons
1,❤,0x2764,8050,0.746943,355,1334,6361,HEAVY BLACK HEART,Dingbats
2,♥,0x2665,7144,0.753806,252,1942,4950,BLACK HEART SUIT,Miscellaneous Symbols
3,😍,0x1f60d,6359,0.765292,329,1390,4640,SMILING FACE WITH HEART-SHAPED EYES,Emoticons
4,😭,0x1f62d,5526,0.803352,2412,1218,1896,LOUDLY CRYING FACE,Emoticons


In [4]:
text = '😂 lmao haha 😂 😂 😂'
text = re.sub(lexicon_df['Emoji'].iloc[0] +'[^ ]*', 'it worked', text)
text

## THEREFORE
# no need to convert unicode, just use the emoji itself

'it worked lmao haha it worked it worked it worked'

## Only keep Top 751 out of 969 

In [5]:
lexicon_df.sort_values('Occurrences', ascending=False)
lexicon_df = lexicon_df[:751]
lexicon_df['Score'] =  (lexicon_df['Positive']/lexicon_df['Occurrences']) - (lexicon_df['Negative']/lexicon_df['Occurrences'])
lexicon_df

Unnamed: 0,Emoji,Unicode codepoint,Occurrences,Position,Negative,Neutral,Positive,Unicode name,Unicode block,Score
0,😂,0x1f602,14622,0.805101,3614,4163,6845,FACE WITH TEARS OF JOY,Emoticons,0.220968
1,❤,0x2764,8050,0.746943,355,1334,6361,HEAVY BLACK HEART,Dingbats,0.746087
2,♥,0x2665,7144,0.753806,252,1942,4950,BLACK HEART SUIT,Miscellaneous Symbols,0.657615
3,😍,0x1f60d,6359,0.765292,329,1390,4640,SMILING FACE WITH HEART-SHAPED EYES,Emoticons,0.677937
4,😭,0x1f62d,5526,0.803352,2412,1218,1896,LOUDLY CRYING FACE,Emoticons,-0.093377
...,...,...,...,...,...,...,...,...,...,...
746,♮,0x266e,5,0.936640,0,4,1,MUSIC NATURAL SIGN,Miscellaneous Symbols,0.200000
747,🅾,0x1f17e,5,0.977469,2,2,1,NEGATIVE SQUARED LATIN CAPITAL LETTER O,Enclosed Alphanumeric Supplement,-0.200000
748,🔄,0x1f504,5,0.971014,0,5,0,ANTICLOCKWISE DOWNWARDS AND UPWARDS OPEN CIRCL...,Miscellaneous Symbols and Pictographs,0.000000
749,☄,0x2604,5,0.435374,0,5,0,COMET,Miscellaneous Symbols,0.000000


In [6]:
# ## REGEX but specifically all emojis from the lexicon
# ## This is super slower

# EMOJI_PATTERN = ''
# OR = '|'

# for ti, tr in lexicon_df[:750].iterrows():
#     EMOJI_PATTERN = EMOJI_PATTERN + tr['Emoji'] + OR

# EMOJI_PATTERN = EMOJI_PATTERN + lexicon_df['Emoji'][750]
# print(EMOJI_PATTERN) 

# Emoji processing

In [7]:
# create a hashmap of emojis and the emoji sentiment score
# for faster lookup
emo_score = lexicon_df[['Emoji', 'Score']]
emo_score = emo_score.sort_values('Emoji')
emo_score = emo_score.set_index('Emoji')
emo_score

Unnamed: 0_level_0,Score
Emoji,Unnamed: 1_level_1
¦,0.625000
©,0.117788
®,0.284672
۞,0.000000
۩,0.000000
...,...
🚹,0.769231
🚺,0.200000
🚼,0.666667
🚿,0.705882


In [8]:
es_dict = emo_score.to_dict()
es_dict

{'Score': {'¦': 0.625,
  '©': 0.11778846153846154,
  '®': 0.2846715328467153,
  '۞': 0.0,
  '۩': 0.0,
  '↪': 0.125,
  '↳': 0.0,
  '↾': 0.6666666666666666,
  '↿': 0.6666666666666666,
  '⇧': 0.14285714285714285,
  '⇨': 0.5263157894736842,
  '⇩': 0.0,
  '⌒': 0.7,
  '⌚': 0.23529411764705885,
  '⌛': 0.14285714285714285,
  '⏩': 0.16666666666666666,
  '⏰': 0.5384615384615384,
  '⏳': 0.0,
  'Ⓐ': -0.14285714285714285,
  'Ⓔ': 0.5,
  'Ⓛ': 0.5,
  'Ⓜ': 0.39999999999999997,
  '─': 0.14893617021276595,
  '━': 0.17948717948717946,
  '│': 0.35074626865671643,
  '┃': 0.5,
  '┈': -0.7142857142857142,
  '┊': 1.0,
  '┐': -0.2,
  '┓': 0.6666666666666666,
  '┛': 0.75,
  '┣': 0.6666666666666666,
  '┳': -0.4,
  '┻': -0.5,
  '┼': 0.0,
  '═': 0.016129032258064516,
  '║': 0.1506849315068493,
  '╔': 0.3076923076923077,
  '╗': 0.42857142857142855,
  '╚': 0.3333333333333333,
  '╝': 0.5384615384615384,
  '╠': 0.23076923076923078,
  '╣': 0.0,
  '╥': 0.125,
  '╦': 0.45454545454545453,
  '╩': 0.22727272727272727,
  '╬':

In [9]:
e = '☹'
es_dict['Score'].get(e)

-0.6

### Convert Emoji Variants to base form

### Variant Selector-16
VS-16 is used added to the unicode which modifies ☹ to ☹️
also does the same for some other emojis such as from ❤ to ❤️. What we need is the unmodified version, because that is the one in the lexicon

In [10]:
sad_face_unicode = '\U00002639'
yellow_sad_face_unicode = sad_face_unicode + '\U0000FE0F'
print('Sad face without Variant 16 modifier: ', sad_face_unicode)
print(es_dict['Score'].get(sad_face_unicode))
print('Sad face with Variant 16 modifier: ', yellow_sad_face_unicode)
print(es_dict['Score'].get(yellow_sad_face_unicode))
# None means the emoji does not have a sentiment score in the Emoji Sentiment Ranking

Sad face without Variant 16 modifier:  ☹
-0.6
Sad face with Variant 16 modifier:  ☹️
None


# Re-start from here
# OPEN TWEETS FILE, CHANGE FILENAME VAR

In [11]:
### tweets1.csv is a copy of tweets3 but not encoded in utf-8 and without full quotes
### I used it for all testings

fn = 'tweets1utf8' # open the CLEANED version of the file
extension = '.csv'
tweets_df = pd.read_csv(fn+extension, index_col=0)
tweets_df['score'] = 0.0 # add score row
tweets_df['contains_emoji'] = False 
tweets_df.head()

Unnamed: 0,text,date,lang,author,score,contains_emoji
0,@jmmmmramos Yonnn ikaw ang alay tsong lol,2020-03-25T01:50:43.000Z,tl,76292344,0.0,False
1,Panoorin ko ulit gangnam beauty,2020-03-25T01:50:44.000Z,tl,836197243757592578,0.0,False
2,@kathantonielle_ HAHAHAHAHAHAHA I KNOW U HAVE,2020-03-25T01:50:46.000Z,tl,2918002014,0.0,False
3,@JshuaCsyc No more hotel accomm. Puno na tanan...,2020-03-25T01:50:46.000Z,tl,339543717,0.0,False
4,Way kahumanang throwback lang sa..,2020-03-25T01:50:47.000Z,tl,74405457,0.0,False


In [12]:
#### FOR TEST ONLY
#### Use this to check which tweets have emojis converted to base form 

SKIN_MODS = r'[\U0001F3FB-\U0001F3FF]' # unicode range for light to dark skin tone
MOD1 = r'\U0000FE0F'                   # variant selector-16
MOD_PATTERN = SKIN_MODS + "|" + MOD1


for ti, tr in tweets_df[:1000].iterrows():
    #variants = re.findall('\U0000FE0F', tr.text)
    variants = re.findall(MOD_PATTERN, tr.text)
    if len(variants) > 0:
        print(ti, tr.text)
        #print(ti, re.sub('\U0000FE0F', '', tr.text))
        print(ti, re.sub(MOD_PATTERN, '', tr.text))
        

14 HAPPY HAPPY BIRTHDAY MOMMY!!! I LOVE YOU VERY MUCH ❤️❤️❤️❤️😘🎊🎉🥳 https://t.co/kvf74bmuOS
14 HAPPY HAPPY BIRTHDAY MOMMY!!! I LOVE YOU VERY MUCH ❤❤❤❤😘🎊🎉🥳 https://t.co/kvf74bmuOS
26 Maghapon nanaman ituu! 🙈🤷‍♀️
26 Maghapon nanaman ituu! 🙈🤷‍♀
47 Day 5❗️ workout 🏋🏻‍♀️ https://t.co/UNebF2vd2B
47 Day 5❗ workout 🏋‍♀ https://t.co/UNebF2vd2B
48 Gigising ng maaga para ayusin ulit yung shop🤦🏼‍♂️
48 Gigising ng maaga para ayusin ulit yung shop🤦‍♂
57 Kakainis, swe-sweldo nanaman ngayung araw. Tsk tsk tsk mag aaway-away nanaman ang mga tao sa wallet ko. 😭😂🤦‍♂️
57 Kakainis, swe-sweldo nanaman ngayung araw. Tsk tsk tsk mag aaway-away nanaman ang mga tao sa wallet ko. 😭😂🤦‍♂
100 No gym tas puro foodz ,its my time to fat🤦🏽‍♀️
100 No gym tas puro foodz ,its my time to fat🤦‍♀
127 ALL 👏🏻 OF 👏🏻 IT 👏🏻 https://t.co/pDLIbWs9yT
127 ALL 👏 OF 👏 IT 👏 https://t.co/pDLIbWs9yT
129 Let’s do something positive!Uploaf - pic of yourself. ONLY YOU! 

Kapuy tag☺️ https://t.co/IyVc36j0KY https://t.co/iZecxvRkBB
129 Let’s do

In [13]:
print(es_dict['Score'].get('👋🏻'))
print(es_dict['Score'].get('👋'))

None
0.41623036649214656


In [14]:
print(es_dict['Score'].get('👆🏻'))
print(es_dict['Score'].get('👆'))

None
0.3333333333333333


In [15]:
print(tweets_df.iloc[144])
print(tweets_df.iloc[24443]) 

text              Ung mga wala daw time for their skin care rout...
date                                       2020-03-25T01:53:04.000Z
lang                                                             tl
author                                                     34573083
score                                                           0.0
contains_emoji                                                False
Name: 144, dtype: object
text              No rice sa gabi, tapos tiyan nalang papaliitin ✔️
date                                       2020-03-25T09:52:24.000Z
lang                                                             tl
author                                                   2839890229
score                                                           0.0
contains_emoji                                                False
Name: 24443, dtype: object


In [16]:
# %%time
# for ti, tr in tweets_df.iterrows():
#     tweets_df.at[ti, 'text'] = re.sub('\U0000FE0F', '', tr.text)

### runs around 55 secs - 550k tweets
### iterrows is very slow

In [17]:
%%time

# convert to record array, then iterrate is faster than df.iterrows
df_rec = tweets_df.to_records()
for row in df_rec:
    tweets_df.at[row[0], 'text'] = re.sub(MOD_PATTERN, '', row[1])
    
print(tweets_df.iloc[144])
print(tweets_df.iloc[24443]) 

text              Ung mga wala daw time for their skin care rout...
date                                       2020-03-25T01:53:04.000Z
lang                                                             tl
author                                                     34573083
score                                                           0.0
contains_emoji                                                False
Name: 144, dtype: object
text              No rice sa gabi, tapos tiyan nalang papaliitin ✔
date                                      2020-03-25T09:52:24.000Z
lang                                                            tl
author                                                  2839890229
score                                                          0.0
contains_emoji                                               False
Name: 24443, dtype: object
Wall time: 10.3 s


# Emoji Mapping (Single-Thread)

In [18]:
# # EMOJI REGEX
# EMOJIS1 = r'[\U000021aa-\U0000fffd]' # ↪ [0x21aa] until � [0xfffd]
# EMOJIS2 = r'[\U0000fffd-\U0001F6c0]' # 🃏 [0x1f0cf] until 🛀 [0x1f6c0]
# PATTERN = EMOJIS1 + "|" + EMOJIS2 
# BY SIR ED

In [19]:
# EMOJI REGEX
EMOJIS_REG = [
    #r'[\U000000a6-\U000000ae]',
    r'[\U000006de-\U000006e9]',
    r'[\U000021aa-\U00002b50]',
    r'[\U0000fffc-\U0000fffd]',
    r'[\U0001f0cf-\U0001f4fb]',
    r'[\U0001f504-\U0001f64f]',
    r'[\U0001f680-\U0001f6c0]',
]

PATTERN = r'[\U000000a6-\U000000ae]'
for reg in EMOJIS_REG:
    PATTERN = PATTERN + '|' + reg 
    
# more precise range
# marami parin tong bungi


In [20]:
%%time
# tweet ind, row (ti, tr)
# emoji ind, row (ei, er)
# es_dict: dictionary of emoji-score pair for faster lookup
Ctr = Counter()
Unique_ctr = Counter()

for row in df_rec:
    emoji_found = re.findall(PATTERN, row[1]) # row[1] is text
    if len(emoji_found) > 0:
        # Count Unique Emojis
        unique_emojis = set(emoji_found)
        for ue in unique_emojis:
            Unique_ctr[ue] += 1
        # Score Emojis
        tweets_df.at[row[0], 'contains_emoji'] = True # row[0] is index
        for emoji in emoji_found:
            if es_dict['Score'].get(emoji) != None:
                Ctr[emoji] += 1
                tweets_df.at[row[0], 'score'] = tweets_df.at[row[0], 'score'] + es_dict['Score'].get(emoji)

# iterating on records is faster than df.iterrows()
# 9.43 old

Wall time: 9.92 s


In [21]:
tweets_df.sort_values('score', ascending=False).head()

Unnamed: 0,text,date,lang,author,score,contains_emoji
187771,🇱🇦🇱🇧🇱🇨🇱🇮🇱🇰🇱🇷🇱🇸🇱🇹🇱🇺🇱🇻🇱🇾🇲🇦🇲🇨🇲🇩🇲🇪🇲🇫🇲🇬🇲🇭🇲🇰🇲🇱🇲🇲🇲🇳🇲🇴...,2020-03-27T12:16:55.000Z,und,977237372105453568,89.326708,True
152403,@erjanomasakit @abelardojulius5 💕💕💕💕💕💕💕💕💕💕💕💕💕💕...,2020-03-27T03:22:01.000Z,und,736078177,88.608333,True
187655,WE HEAL AS ONE🙏🇦🇨🇦🇩🇦🇪🇦🇫🇦🇬🇦🇮🇦🇱🇦🇲🇦🇴🇦🇶🇦🇷🇦🇸🇦🇹🇦🇺🇦🇼🇦...,2020-03-27T12:15:45.000Z,en,977237372105453568,79.256046,True
231057,@Khaannssa 😍😍😍😍😍😍😍😍😍😍😍😍😍😍😍😍😍😍😍😍😍😍😍😍😍😍😍😍😍😍😍😍😍😍😍...,2020-03-28T06:53:13.000Z,und,779890766952804352,56.94669,True
389188,#GodHealOurLand\n\n💙💙💙💙💙💙💙💙💙💙\n⭐ 💙💙💙💙💙💙💙💙💙\n ...,2020-03-30T10:55:02.000Z,und,1075956891396669440,47.288363,True


In [22]:
# es_dict['Score'].get('👏')

In [23]:
sample = tweets_df.loc[212, 'text']
test = re.findall(PATTERN, sample)
print(sample)
print(len(test))
test
### the tweeet contains 100+ duplicate of emojis, hence a very high score
### but we will convert this to positive/negative so, not a big deal

🙄 https://t.co/pYITJbwO6O
1


['🙄']

In [24]:
tweets_df.sort_values('score').head()

Unnamed: 0,text,date,lang,author,score,contains_emoji
267445,☹☹☹☹☹☹☹☹☹☹☹☹☹☹☹☹☹☹☹☹☹☹☹☹☹☹☹☹☹☹☹☹☹☹☹☹☹☹☹ https:...,2020-03-28T13:36:42.000Z,und,848381430002401280,-23.4,True
42705,😶😶😶😶😶😶😶😶😶😶😶😶😶😶😶😶😶😶😶😶😶😶😶😶😶😶😶😶😶😶😶😶😶😶😶😶😶😶😶😶😶😶😶😶😶😶...,2020-03-25T12:30:26.000Z,und,1008479406,-20.314485,True
213669,LAMI KAAYO SA TNAN MAG TELEPORT DUNG DAVAO RN ...,2020-03-27T20:40:23.000Z,tl,78216350,-14.485714,True
236796,┳┻|\n┻┳|\n┳┻|\n┻┳|\n┳┻|\n┻┳|\n┳┻|\n┻┳|\n┳┻|\n┻...,2020-03-28T08:16:48.000Z,et,2998554614,-14.4,True
449555,"hindi maintindihan yung google translate, hind...",2020-03-31T07:19:31.000Z,tl,1181218051280134147,-12.506606,True


### Separate tweets with emoji and tweets without emoji
If a tweet only has emoji that is not in the lexicon, it is counted as tweet w/out emoji

In [25]:
tweets_emoji_df = tweets_df[tweets_df['contains_emoji'] == True].copy()
tweets_emoji_df.head()

Unnamed: 0,text,date,lang,author,score,contains_emoji
8,@aisesantos Wow nauumay sa babae jiba na😆🤧,2020-03-25T01:50:50.000Z,tl,1031902770,0.411765,True
10,Momsh angge @angelica_114 ung kita tlga ung ki...,2020-03-25T01:50:52.000Z,tl,831308328,1.379691,True
14,HAPPY HAPPY BIRTHDAY MOMMY!!! I LOVE YOU VERY ...,2020-03-25T01:50:54.000Z,en,939403524688900096,5.15293,True
15,"eto na naman tayo, umaga na naman 😊",2020-03-25T01:50:55.000Z,tl,971561419970830336,0.644696,True
18,You so hot💙 https://t.co/ls5HJaM71v,2020-03-25T01:50:56.000Z,en,1185625215599927297,0.732456,True


### Map score into positive or negative sentiment

In [26]:
tweets_emoji_rec = tweets_emoji_df.to_records()
tweets_emoji_df['is_positive'] = True
tweets_emoji_rec[0]

(8, '@aisesantos Wow nauumay sa babae jiba na😆🤧', '2020-03-25T01:50:50.000Z', 'tl', 1031902770, 0.41176471, True)

In [27]:
for row in tweets_emoji_rec:
    if row[5] < 0: 
        tweets_emoji_df.at[row[0], 'is_positive'] = False

tweets_emoji_df.sort_values('score', ascending=False)

Wall time: 765 ms


Unnamed: 0,text,date,lang,author,score,contains_emoji,is_positive
187771,🇱🇦🇱🇧🇱🇨🇱🇮🇱🇰🇱🇷🇱🇸🇱🇹🇱🇺🇱🇻🇱🇾🇲🇦🇲🇨🇲🇩🇲🇪🇲🇫🇲🇬🇲🇭🇲🇰🇲🇱🇲🇲🇲🇳🇲🇴...,2020-03-27T12:16:55.000Z,und,977237372105453568,89.326708,True,True
152403,@erjanomasakit @abelardojulius5 💕💕💕💕💕💕💕💕💕💕💕💕💕💕...,2020-03-27T03:22:01.000Z,und,736078177,88.608333,True,True
187655,WE HEAL AS ONE🙏🇦🇨🇦🇩🇦🇪🇦🇫🇦🇬🇦🇮🇦🇱🇦🇲🇦🇴🇦🇶🇦🇷🇦🇸🇦🇹🇦🇺🇦🇼🇦...,2020-03-27T12:15:45.000Z,en,977237372105453568,79.256046,True,True
231057,@Khaannssa 😍😍😍😍😍😍😍😍😍😍😍😍😍😍😍😍😍😍😍😍😍😍😍😍😍😍😍😍😍😍😍😍😍😍😍...,2020-03-28T06:53:13.000Z,und,779890766952804352,56.946690,True,True
389188,#GodHealOurLand\n\n💙💙💙💙💙💙💙💙💙💙\n⭐ 💙💙💙💙💙💙💙💙💙\n ...,2020-03-30T10:55:02.000Z,und,1075956891396669440,47.288363,True,True
...,...,...,...,...,...,...,...
449555,"hindi maintindihan yung google translate, hind...",2020-03-31T07:19:31.000Z,tl,1181218051280134147,-12.506606,True,False
236796,┳┻|\n┻┳|\n┳┻|\n┻┳|\n┳┻|\n┻┳|\n┳┻|\n┻┳|\n┳┻|\n┻...,2020-03-28T08:16:48.000Z,et,2998554614,-14.400000,True,False
213669,LAMI KAAYO SA TNAN MAG TELEPORT DUNG DAVAO RN ...,2020-03-27T20:40:23.000Z,tl,78216350,-14.485714,True,False
42705,😶😶😶😶😶😶😶😶😶😶😶😶😶😶😶😶😶😶😶😶😶😶😶😶😶😶😶😶😶😶😶😶😶😶😶😶😶😶😶😶😶😶😶😶😶😶...,2020-03-25T12:30:26.000Z,und,1008479406,-20.314485,True,False


### Occurence counts
Problem rn is that it doesn't count the number of tweets the emoji appeared in. Duplicates in a single tweet are counted.

In [28]:
Ctr

Counter({'😆': 2203,
         '😍': 11602,
         '😘': 5836,
         '❤': 23354,
         '🎊': 174,
         '🎉': 908,
         '😊': 4869,
         '💙': 2620,
         '😋': 1899,
         '💕': 3608,
         '🙈': 1469,
         '😇': 1591,
         '😭': 18780,
         '❗': 458,
         '♂': 4037,
         '😂': 35030,
         '💖': 3568,
         '😥': 888,
         '💓': 1514,
         '🍔': 29,
         '🍟': 51,
         '😬': 992,
         '✨': 2046,
         '💁': 339,
         '💜': 1745,
         '🌺': 37,
         '😅': 6100,
         '😁': 2688,
         '😢': 3308,
         '👏': 2252,
         '☺': 2284,
         '💩': 208,
         '😪': 985,
         '✌': 1486,
         '😚': 815,
         '😞': 1357,
         '💪': 1467,
         '💔': 2823,
         '👌': 1221,
         '🔥': 2263,
         '😟': 317,
         '👇': 335,
         '😎': 547,
         '🙏': 8702,
         '😑': 1083,
         '☑': 144,
         '🙌': 1032,
         '👍': 1037,
         '😹': 291,
         '😜': 788,
         '👀': 687

In [29]:
Unique_ctr

Counter({'😆': 1810,
         '😘': 4598,
         '😍': 7686,
         '❤': 17005,
         '🎊': 163,
         '🎉': 748,
         '😊': 4162,
         '💙': 1983,
         '😋': 1510,
         '💕': 2881,
         '🙈': 1205,
         '♀': 5224,
         '😇': 1412,
         '😭': 10543,
         '🏻': 9458,
         '🏋': 151,
         '❗': 201,
         '♂': 3348,
         '🏼': 2762,
         '😂': 23796,
         '💖': 2700,
         '😥': 739,
         '💓': 1231,
         '🍟': 46,
         '🍔': 27,
         '😬': 844,
         '💜': 1157,
         '💁': 308,
         '✨': 1711,
         '🌺': 33,
         '🏽': 812,
         '😅': 5211,
         '😁': 2205,
         '😢': 2645,
         '🇵': 611,
         '🇭': 645,
         '👏': 1180,
         '☺': 1888,
         '💩': 146,
         '😪': 882,
         '✌': 1275,
         '🙂': 1159,
         '😚': 703,
         '😞': 1142,
         '🙄': 3986,
         '💪': 1196,
         '💔': 2165,
         '🏿': 91,
         '🔥': 1536,
         '👌': 1076,
         '😟': 263,

In [30]:
tweets_no_emoji_df = tweets_df[tweets_df['contains_emoji'] == False]
tweets_no_emoji_df

Unnamed: 0,text,date,lang,author,score,contains_emoji
0,@jmmmmramos Yonnn ikaw ang alay tsong lol,2020-03-25T01:50:43.000Z,tl,76292344,0.0,False
1,Panoorin ko ulit gangnam beauty,2020-03-25T01:50:44.000Z,tl,836197243757592578,0.0,False
2,@kathantonielle_ HAHAHAHAHAHAHA I KNOW U HAVE,2020-03-25T01:50:46.000Z,tl,2918002014,0.0,False
3,@JshuaCsyc No more hotel accomm. Puno na tanan...,2020-03-25T01:50:46.000Z,tl,339543717,0.0,False
4,Way kahumanang throwback lang sa..,2020-03-25T01:50:47.000Z,tl,74405457,0.0,False
...,...,...,...,...,...,...
542447,@maraiisabella @Foodwithdom Isnt that how you ...,2020-04-01T08:56:15.000Z,en,880044986431021056,0.0,False
542448,"Di ako sinuyo, naghanap ng iba. Hahaha kasi na...",2020-04-01T08:56:16.000Z,tl,1208114580,0.0,False
542450,@YeahitsDunhill's account is temporarily unava...,2020-04-01T08:56:17.000Z,tl,1010871456303964160,0.0,False
542451,Gi kapoy nakos akong kinabuhi 🥺,2020-04-01T08:56:17.000Z,tl,851267043017895936,0.0,False


### Save into csv

In [31]:
# to add
#tweets_df.to_csv(filename+'_cleaned'+extension, encoding='utf-8', quoting=csv.QUOTE_ALL)
tweets_emoji_df.to_csv(fn+'_emoji'+extension, encoding='utf-8', quoting=csv.QUOTE_ALL)
tweets_no_emoji_df.to_csv(fn+'_no_emoji'+extension, encoding='utf-8', quoting=csv.QUOTE_ALL)