<a href="https://colab.research.google.com/github/PawanTony/emoji-to-text-converter-/blob/main/emoji_to_text_converter.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import pandas as pd
import random
import re
import string

def generate_emoji_dataset(num_samples=5000):
    """Generates a synthetic dataset with more intentional text-emoji associations."""
    emoji_categories = {
        "happy": (["😊", "😂", "😄", "😃", "😁", "😆", "😇", "🥰", "😍", "🤩"],
                  ["I'm feeling so happy today!", "This makes me happy.", "Pure joy!", "So glad!", "What a happy day.",
                   "Feeling good!", "Life is good!", "So cheerful!", "Happy vibes!", "Smiling all day."]),
        "sad": (["😭", "😢", "😔", "🥺", "😓", "😞", "😟", "🙁", "😫"],
                ["I'm feeling really sad.", "This makes me so sad.", "Feeling down and blue.", "So upset.", "Wish I wasn't so sad.",
                 "Feeling gloomy.", "Heartbroken.", "Tears in my eyes.", "So lonely.", "Just want to cry."]),
        "love": (["❤️", "💖", "💞", "💕", "💘", "💌", "😘", "😍", "🥰"],
                 ["I love you!", "Feeling so much love.", "Love is in the air.", "So romantic!", "Sending love.",
                  "You mean the world to me.", "Crazy in love.", "My heart beats for you.", "So affectionate.", "Love you always."]),
        "angry": (["😠", "😡", "😤", "🤬", "👿", "😾"],
                  ["I'm so angry right now!", "This makes me furious.", "Feeling so frustrated.", "I'm really mad!", "This is infuriating.",
                   "So annoyed!", "Beyond angry!", "Seeing red!", "Fuming!", "Absolutely livid!"]),
        "surprise": (["😮", "😲", "🤯", "😳", "😯", "😱"],
                     ["Wow!", "I'm so surprised!", "What a surprise!", "Unbelievable!", "Are you serious?",
                      "Can't believe it!", "Mind blown!", "Totally shocked!", "Unexpected!", "What the...?!"]),
        "funny": (["🤪", "😜", "😝", "🤣", "😹", "🤡"],
                  ["That's hilarious!", "So funny!", "I'm laughing so hard.", "What a joke!", "That's comedy gold.",
                   "ROFL!", "LOL!", "So amusing!", "What a comedian!", "Cracking up!"]),
        "thinking": (["🤔", "🤨", "🧐", "🙄"],
                     ["I'm thinking about it.", "Let me think...", "Just pondering.", "Considering...", "What do you think?",
                      "Hmm...", "Let me see...", "Trying to figure it out.", "Just wondering...", "Deep in thought."]),
        "celebration": (["🎉", "🎊", "🎈", "🎁", "🍾"],
                        ["Let's celebrate!", "Time for a party!", "Celebrating good times!", "Cheers to that!", "So festive!",
                         "Hip hip hooray!", "Party time!", "So exciting!", "Making memories!", "Feeling celebratory!"]),
        "food": (["😋", "🤤", "🍕", "🍔", "🍟", "🍣", "🍜", "🍝", "🍦", "🍰"],
                 ["This food is delicious!", "I'm so hungry for this.", "Craving some good food.", "Time to eat!", "Yummy!",
                  "So tasty!", "Food coma incoming!", "Delicious!", "My favorite!", "Can't get enough!"]),
        "travel": (["✈️", "🚗", "🚂", "🚌", "🚢", "🚀", "🌍", "🗺️", "🏖️", "⛰️"],
                   ["I love to travel.", "Dreaming of my next trip.", "Exploring new places.", "Adventure time!", "Wish I was there.",
                    "Wanderlust!", "On the road again!", "Soaking up the sun!", "Enjoying the view!", "Making memories abroad!"]),
        "music": (["🎶", "🎵", "🎧", "🎤", "🎸", "🎹"],
                  ["Listening to some great music.", "Enjoying the tunes.", "Love this song!", "Making some music.", "The power of music.",
                   "Soothing melodies.", "Upbeat rhythm!", "Lost in the music!", "Playing my favorite track!", "The sound of happiness!"]),
        "miss": (["😢", "😔", "🥺"],
                 ["I miss you so much.", "Wish you were here.", "Missing you a lot.", "Thinking of you.", "Can't wait to see you again.",
                  "Feeling your absence.", "Longing for you.", "You're always in my thoughts.", "So lonely without you.", "Hoping to see you soon."]),
        "hungry": (["😋", "🤤"],
                   ["I'm so hungry!", "Starving!", "Time to eat.", "I need some food.", "So hungry right now.",
                    "My stomach is rumbling!", "Food, please!", "Can't wait to eat!", "Ready for a feast!", "Need to grab a bite!"]),
    }

    data = []
    for category, (emojis, texts) in emoji_categories.items():
        for emoji in emojis:
            for text in texts:
                data.append({"text": text, "emoji": emoji})

    df = pd.DataFrame(random.sample(data, min(num_samples, len(data))))
    return df

# Generate the dataset
emoji_df = generate_emoji_dataset(num_samples=5000)
print(f"Generated dataset with {len(emoji_df)} samples and {emoji_df['emoji'].nunique()} unique emojis.")
print(emoji_df['emoji'].value_counts().head(20))
print(emoji_df.head())

Generated dataset with 860 samples and 79 unique emojis.
emoji
🥰     20
🥺     20
😔     20
🤤     20
😋     20
😍     20
😢     20
❤️    10
💘     10
😝     10
😭     10
🤨     10
😮     10
🎧     10
😞     10
😂     10
🗺️    10
🤯     10
🌍     10
💖     10
Name: count, dtype: int64
                      text emoji
0                 So glad!     🥰
1  The sound of happiness!     🎧
2               So lonely.     😞
3              Unexpected!     😮
4      That's comedy gold.     😝


In [3]:
from sklearn.feature_extraction.text import TfidfVectorizer
import re
import string

def clean_text(text):
    text = text.lower()
    text = re.sub(r'[%s]' % re.escape(string.punctuation), '', text)
    return text

emoji_df['cleaned_text'] = emoji_df['text'].apply(clean_text)
print("\nCleaned Text Examples:")
print(emoji_df[['text', 'cleaned_text']].head())

vectorizer = TfidfVectorizer()
X = vectorizer.fit_transform(emoji_df['cleaned_text'])
y = emoji_df['emoji']
print("\nShape of Feature Matrix (X):", X.shape)
print("Shape of Target Variable (y):", y.shape)


Cleaned Text Examples:
                      text            cleaned_text
0                 So glad!                 so glad
1  The sound of happiness!  the sound of happiness
2               So lonely.               so lonely
3              Unexpected!              unexpected
4      That's comedy gold.       thats comedy gold

Shape of Feature Matrix (X): (860, 197)
Shape of Target Variable (y): (860,)


In [4]:
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score, classification_report

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

model = MultinomialNB()
model.fit(X_train, y_train)
y_pred = model.predict(X_test)

accuracy = accuracy_score(y_test, y_pred)
print(f"\nAccuracy of the model: {accuracy:.2f}")
print("\nClassification Report:")
print(classification_report(y_test, y_pred, zero_division=0))


Accuracy of the model: 0.01

Classification Report:
              precision    recall  f1-score   support

          ⛰️       0.00      0.00      0.00         1
          ✈️       0.00      0.00      0.00         2
          ❤️       0.00      0.00      0.00         1
           🌍       0.00      0.00      0.00         2
           🍔       0.00      0.00      0.00         1
           🍕       0.00      0.00      0.00         1
           🍜       0.00      0.00      0.00         1
           🍝       0.00      0.00      0.00         3
           🍟       0.00      0.00      0.00         2
           🍣       0.00      0.00      0.00         4
           🍦       0.00      0.00      0.00         2
           🍰       0.00      0.00      0.00         1
           🍾       0.00      0.00      0.00         1
           🎁       0.00      0.00      0.00         4
           🎈       0.00      0.00      0.00         1
           🎉       0.00      0.00      0.00         2
           🎊       0.00     

In [5]:
def predict_emoji(text, model, vectorizer):
    cleaned_text = clean_text(text)
    text_vectorized = vectorizer.transform([cleaned_text])
    predicted_emoji = model.predict(text_vectorized)[0]
    return predicted_emoji

print("\nInteractive Emoji Prediction:")
while True:
    user_input = input("Enter text (or type 'exit' to quit): ")
    if user_input.lower() == 'exit':
        break
    if user_input:
        predicted_emoji = predict_emoji(user_input, model, vectorizer)
        print(f"Predicted emoji: {predicted_emoji}")


Interactive Emoji Prediction:
Enter text (or type 'exit' to quit): i love you 
Predicted emoji: 😍
Enter text (or type 'exit' to quit): i miss you 
Predicted emoji: 🥺
Enter text (or type 'exit' to quit): i am hungry 
Predicted emoji: 🤤
Enter text (or type 'exit' to quit): i want to meet you 
Predicted emoji: 🥺
Enter text (or type 'exit' to quit): i am studying 
Predicted emoji: 😔


KeyboardInterrupt: Interrupted by user

In [6]:
import joblib

model_filename = 'emoji_predictor_model.joblib'
vectorizer_filename = 'tfidf_vectorizer.joblib'
joblib.dump(model, model_filename)
joblib.dump(vectorizer, vectorizer_filename)
print(f"\nModel saved as {model_filename}")
print(f"Vectorizer saved as {vectorizer_filename}")

# To load the model and vectorizer later:
# loaded_model = joblib.load(model_filename)
# loaded_vectorizer = joblib.load(vectorizer_filename)


Model saved as emoji_predictor_model.joblib
Vectorizer saved as tfidf_vectorizer.joblib
