# Create Emoji-Prompt Mappings

Objective: Map each emoji to relevant keywords, phrases, and concepts using sentiment and category data.

### 1. Load the Merged Emoji Data

In [1]:
import pandas as pd
import os


data_path = os.path.join('..', 'data', 'processed', 'emoji_data_merged.csv')
merged_df = pd.read_csv(data_path)

merged_df.head()

Unnamed: 0,codepoint,occurrences,position,negative,neutral,positive,name_sentiment,block,codepoint_std,group,subgroup,codepoints,status,name_emoji,emoji,category
0,0x1f602,14622,0.805101,3614,4163,6845,FACE WITH TEARS OF JOY,Emoticons,1F602,Smileys & Emotion,face-smiling,1F602,fully-qualified,E0.6 face with tears of joy,😂,Smileys & Emotion
1,0x2764,8050,0.746943,355,1334,6361,HEAVY BLACK HEART,Dingbats,2764,Smileys & Emotion,heart,2764,unqualified,E0.6 red heart,❤,Symbols
2,0x2665,7144,0.753806,252,1942,4950,BLACK HEART SUIT,Miscellaneous Symbols,2665,Activities,game,2665,unqualified,E0.6 heart suit,♥,Symbols
3,0x1f60d,6359,0.765292,329,1390,4640,SMILING FACE WITH HEART-SHAPED EYES,Emoticons,1F60D,Smileys & Emotion,face-affection,1F60D,fully-qualified,E0.6 smiling face with heart-eyes,😍,Smileys & Emotion
4,0x1f62d,5526,0.803352,2412,1218,1896,LOUDLY CRYING FACE,Emoticons,1F62D,Smileys & Emotion,face-concerned,1F62D,fully-qualified,E0.6 loudly crying face,😭,Smileys & Emotion


### 2. Extract Relevant Columns

In [6]:
# Select relevant columns and create a copy to avoid SettingWithCopyWarning
emoji_df = merged_df[['emoji', 'name_emoji', 'category', 'positive', 'neutral', 'negative']].copy()

emoji_df.head()


Unnamed: 0,emoji,name_emoji,category,positive,neutral,negative
0,😂,E0.6 face with tears of joy,Smileys & Emotion,6845,4163,3614
1,❤,E0.6 red heart,Symbols,6361,1334,355
2,♥,E0.6 heart suit,Symbols,4950,1942,252
3,😍,E0.6 smiling face with heart-eyes,Smileys & Emotion,4640,1390,329
4,😭,E0.6 loudly crying face,Smileys & Emotion,1896,1218,2412


### 3. Process Emoji Names

Split the name column into keywords by replacing hyphens and splitting on spaces.

In [7]:
# Import regular expressions module
import re

# Function to clean 'name_emoji' column
def clean_name(name):
    # Remove patterns like 'E0.6 ', 'E1.0 ', etc.
    name = re.sub(r'^E\d+\.\d+\s', '', name)
    return name

# Apply the function to 'name_emoji' column
emoji_df['clean_name'] = emoji_df['name_emoji'].apply(clean_name)

# Process 'clean_name' column to extract keywords
emoji_df['keywords'] = emoji_df['clean_name'].str.replace('-', ' ').str.lower().str.split()

emoji_df[['emoji', 'keywords']].head()


Unnamed: 0,emoji,keywords
0,😂,"[face, with, tears, of, joy]"
1,❤,"[red, heart]"
2,♥,"[heart, suit]"
3,😍,"[smiling, face, with, heart, eyes]"
4,😭,"[loudly, crying, face]"


# 4. Add Category as a Keyword

In [8]:
# Include category in keywords
emoji_df['category_keywords'] = emoji_df['category'].str.lower().str.replace('&', '').str.replace(',', '').str.split()

# Combine keywords and category keywords
emoji_df['all_keywords'] = emoji_df['keywords'] + emoji_df['category_keywords']

# Remove duplicates in 'all_keywords'
emoji_df['all_keywords'] = emoji_df['all_keywords'].apply(lambda x: list(set(x)))

emoji_df[['emoji', 'all_keywords']].head()


Unnamed: 0,emoji,all_keywords
0,😂,"[emotion, joy, smileys, tears, with, face, of]"
1,❤,"[red, symbols, heart]"
2,♥,"[heart, symbols, suit]"
3,😍,"[emotion, smileys, smiling, with, face, heart,..."
4,😭,"[emotion, smileys, face, loudly, crying]"


# 5. Add Sentiments as Keywords

In [9]:
# function to assign sentiment labels
def get_sentiment_label(row):
    if row['positive'] > row['negative'] and row['positive'] > row['neutral']:
        return 'positive'
    elif row['negative'] > row['positive'] and row['negative'] > row['neutral']:
        return 'negative'
    else:
        return 'neutral'

emoji_df['sentiment_label'] = emoji_df.apply(get_sentiment_label, axis=1)

# Add sentiment label to keywords
emoji_df['all_keywords'] = emoji_df.apply(lambda row: row['all_keywords'] + [row['sentiment_label']], axis=1)

emoji_df[['emoji', 'all_keywords']].head()


Unnamed: 0,emoji,all_keywords
0,😂,"[emotion, joy, smileys, tears, with, face, of,..."
1,❤,"[red, symbols, heart, positive]"
2,♥,"[heart, symbols, suit, positive]"
3,😍,"[emotion, smileys, smiling, with, face, heart,..."
4,😭,"[emotion, smileys, face, loudly, crying, negat..."
