# Installation: Libraries and Modules

In [None]:
# Download Japanese Word Vector
!wget -c "https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.ja.300.vec.gz"
!gzip -d cc.ja.300.vec.gz

# Install FastText
!git clone https://github.com/facebookresearch/fastText.git
!cd fastText && pip install .

# Install MeCab
!apt-get install mecab libmecab-dev mecab-ipadic-utf8
!pip install mecab-python3
!pip install unidic-lite

# Download the Dataset
!wget https://github.com/SysWOW64UwU/Emoji_Predict/raw/main/SomethingSomething.zip
!unzip -P "慰獳潷摲" SomethingSomething.zip
!rm *.zip

# Training and Saving the Model

In [None]:
import json
import numpy as np
import tensorflow as tf
import MeCab
import gensim

# Load the dataset
with open('dataset.json', 'r') as f:
    dataset = json.load(f)

# Define the maximum length of the input text
MAX_LENGTH = max(set([len(entry['text'])for entry in dataset['data']]))

# Define the path to the pre-trained Japanese word embeddings
EMBEDDINGS_PATH = 'cc.ja.300.vec'

# Load the pre-trained Japanese word embeddings
embeddings = gensim.models.KeyedVectors.load_word2vec_format(EMBEDDINGS_PATH, binary=False, encoding='utf-8')

# Define the vocabulary size
VOCAB_SIZE = len(embeddings.key_to_index)

# Define the dimensionality of the word embeddings
EMBEDDING_DIM = 300

# Define the number of unique emojis in the dataset
NUM_EMOJIS = len(set([entry['emoji'] for entry in dataset['data']]))

# Define the MeCab tokenizer
tokenizer = MeCab.Tagger('-Owakati')

# Tokenize the input text
def tokenize(text):
    return tokenizer.parse(text).split()

# Pad or truncate the input text to the maximum length
def pad_or_truncate(tokens):
    if len(tokens) > MAX_LENGTH:
        return tokens[:MAX_LENGTH]
    else:
        return tokens + [''] * (MAX_LENGTH - len(tokens))

# Convert the input text to a sequence of word embeddings
def text_to_sequence(text):
    tokens = tokenize(text)
    padded_tokens = pad_or_truncate(tokens)
    sequence = np.zeros((MAX_LENGTH, EMBEDDING_DIM))
    for i, token in enumerate(padded_tokens):
        if token in embeddings.key_to_index:
            sequence[i] = embeddings.get_vector(token)
    return sequence

# Convert the input emoji to a one-hot vector
def emoji_to_one_hot(emoji):
    one_hot = np.zeros(NUM_EMOJIS)
    for i, unique_emoji in enumerate(set([entry['emoji'] for entry in dataset['data']])):
        if emoji == unique_emoji:
            one_hot[i] = 1
    return one_hot

# Build the model
model = tf.keras.Sequential([
    tf.keras.layers.Input(shape=(MAX_LENGTH, EMBEDDING_DIM)),
    tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(64)),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dense(NUM_EMOJIS, activation='softmax')
])

# Compile the model
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

# Prepare the training data
X_train = np.array([text_to_sequence(entry['text']) for entry in dataset['data']])
y_train = np.array([emoji_to_one_hot(entry['emoji']) for entry in dataset['data']])

# Train the model
model.fit(X_train, y_train, epochs=50, batch_size=32)

# Save the model
model.save('emoji_translator.h5')

# Load the Saved Model and Define a Function for Predicting Emojis

In [None]:
MODEL_PATH = 'emoji_translator.h5'
model = tf.keras.models.load_model(MODEL_PATH)

# Define a function that takes an input text and returns the predicted emoji
def predict_emoji(text):
    sequence = text_to_sequence(text)
    prediction = model.predict(np.array([sequence]))
    index = np.argmax(prediction)
    emojis = list(set([entry['emoji'] for entry in dataset['data']]))
    return emojis[index]

# Test the Model

In [None]:
test_texts = [
    '今日はいい天気ですね',
    '明日は試験があります',
    '昨日はとても悲しい出来事がありました',
    '今晩はパーティーに行きます',
    '最近は忙しくてストレスがたまっています',
    '旅行に行きたいです',
    '今日はとても疲れています',
    '友達と一緒に映画を見に行きます',
    '新しいレストランに行ってみたいです',
    '今日はとても暑いです',
    '明日から新しい仕事が始まります',
    '友達と一緒に運動をしました',
    '今日はとても幸せな気分です',
    '新しい趣味を始めました',
    '最近は食べ過ぎて太ってしまいました',
    '昨日はとても楽しいイベントに参加しました',
    '今週末は家族と旅行に行く予定です',
    '明日は大事なプレゼンテーションがあります',
    '友達と一緒にカラオケに行きました',
    '今日はとても忙しい日でした',
    '明日から旅行に行く予定です',
    '最近は寒くてコートが手放せません',
    '新しい本を読み始めました',
    '今日はとても快晴で気持ちがいいです',
    '友達と一緒に料理を作りました',
    '今日はとても暇で何をしようか迷っています',
    '新しい映画が公開されるのを楽しみにしています',
    '最近は運動不足で体が重いです',
    '昨日はとても眠かったので早く寝ました',
    '今日は家でゆっくり過ごすつもりです',
    '新しい音楽を聴き始めました',
    '明日の天気予報は雨が降るそうです',
    '今週末は友達と遊園地に行く予定です',
    '最近は忙しくて友達と会う時間がなかなか取れません',
    '昨日はとても暑くて外に出るのが辛かったです',
    '今日は仕事が休みなのでゆっくり寝坊しました',
    '新しいレシピに挑戦してみたいです',
    '最近はスマホを使いすぎて目が疲れてきました',
    '明日は友達の誕生日なのでプレゼントを買いに行きます',
    '今日はとても悲しいニュースがありました',
    '今日は家族と一緒にピクニックに行く予定です',
    '最近は仕事が忙しくてストレスがたまっています',
    '新しいスポーツを始めてみたいです',
    '昨日はとても楽しかったです',
    '今日はとても寒いのでおうちでまったりしています',
    '新しい映画を見るのが楽しみです',
    '今日は何か特別なことをしたい気分です',
    '最近は食事のバランスが悪くて体調がすぐれません',
    '昨日は友達とおしゃべりするのに夢中になりました',
    '今晩は家でゆっくりして過ごすつもりです',
    '新しいアプリをダウンロードしました',
    '明日から新しいダイエットを始めます',
    '最近は映画をよく見ます',
    '今週末は家族で温泉旅行に行く予定です',
    '昨日は雨が降っていたので家で過ごしました',
    '今日は何か美味しいものを食べたい気分です',
    '最近は本を読む時間がなかなか取れません',
    '明日はとても大切なプレゼンテーションがあります',
    '今日はとても眠いのでコーヒーを飲んで目を覚まします',
    '新しい趣味を見つけたいです',
]

expected_emojis = [
    '☀️',
    '📚',
    '😢',
    '🎉',
    '😫',
    '✈️',
    '😴',
    '🎥',
    '🍴',
    '🔥',
    '💼',
    '🏋️‍♂️',
    '😊',
    '🎨',
    '🐷',
    '🎊',
    '🚗',
    '💼',
    '🎤',
    '💻',
    '✈️',
    '🧥',
    '📖',
    '☀️',
    '👩‍🍳',
    '🤔',
    '🎬',
    '🏋️‍♂️',
    '💤',
    '🏠',
    '🎶',
    '☔',
    '🎢',
    '😔',
    '🔥',
    '😴',
    '🍴',
    '😪',
    '🎁',
    '😢',
    '🧺',
    '😫',
    '🏀',
    '😄',
    '🏠❄️',
    '🍿',
    '🎉',
    '🍎',
    '💬',
    '🏠',
    '📱',
    '🥗',
    '🎥',
    '♨️',
    '🌧️',
    '🍽️',
    '📚',
    '👔💼',
    '😴☕️',
    '🎨',
]

test_translations = [
    'Today is good weather, isn\'t it?',
    'There is an exam tomorrow.',
    'There was a very sad incident yesterday.',
    'I\'m going to a party tonight.',
    'I\'ve been busy lately and stressed out.',
    'I want to go on a trip.',
    'I\'m very tired today.',
    'I\'m going to see a movie with my friend.',
    'I want to go to a new restaurant.',
    'It\'s very hot today.',
    'I\'m starting a new job from tomorrow.',
    'I exercised with my friend.',
    'I\'m very happy today.',
    'I started a new hobby.',
    'I\'ve been eating too much and gained weight.',
    'I participated in a very fun event yesterday.',
    'I\'m planning to go on a trip with my family this weekend.',
    'There is an important presentation tomorrow.',
    'I went to karaoke with my friend.',
    'Today was a very busy day.',
    'I am planning to go on a trip from tomorrow.',
    'I\'ve been feeling cold lately and can\'t let go of my coat.',
    'I started reading a new book.',
    'Today is very clear and feels good.',
    'I cooked with my friend.',
    'I\'m very bored today and don\'t know what to do.',
    'I\'m looking forward to the release of a new movie.',
    'I\'ve been inactive lately and my body feels heavy.',
    'I slept early yesterday because I was very sleepy.',
    'I plan to spend a relaxing day at home today.',
    'I started listening to new music.',
    'The weather forecast for tomorrow says it will rain.',
    'I plan to go to an amusement park with my friends this weekend.',
    'I\'ve been very busy lately and haven\'t had time to meet my friends.',
    'Yesterday was very hot and it was hard to go outside.',
    'I slept in today because I had a day off from work.',
    'I want to try a new recipe.',
    'I\'ve been using my smartphone too much lately and my eyes are getting tired.',
    'I will go to buy a present for my friend\'s birthday tomorrow.',
    'There was very sad news today.',
    'I am planning to go on a picnic with my family today.',
    'I have been busy with work lately and have been accumulating stress.',
    'I want to try starting a new sport.',
    'Yesterday was a lot of fun.',
    'It is very cold today, so I am relaxing at home.',
    'I am looking forward to watching a new movie.',
    'I feel like doing something special today.',
    'My diet has been unbalanced lately and my health is not good.',
    'I got carried away talking with my friend yesterday.',
    'I plan to spend a relaxing evening at home tonight.',
    'I downloaded a new app.',
    'I will start a new diet from tomorrow.',
    'I have been watching a lot of movies lately.',
    'I plan to go on a hot spring trip with my family this weekend.',
    'It was raining yesterday, so I spent time at home.',
    'I feel like eating something delicious today.',
    'I haven\'t had time to read books lately.',
    'I have a very important presentation tomorrow.',
    'I am very sleepy today, so I will drink coffee to wake up.',
    'I want to find a new hobby.',
]

predicted_emojis = []
for x in test_texts:predicted_emojis.append(predict_emoji(x))

for c,x,y,z in zip(range(len(test_translations)),test_translations, expected_emojis, predicted_emojis):
  print(c+1,x,y,z)