In [12]:
import os
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.utils import Sequence, to_categorical
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense
from tensorflow.keras.layers import Dropout, BatchNormalization
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau, Callback, ModelCheckpoint

In [13]:
# --- Load Labels ---
augmented_df = pd.read_csv("D:/SE/Data/screenshots/augmented_data/labels.txt", header=None, names=["filename", "emoji", "unicode"], sep=",\s*", engine='python')
original_df = pd.read_csv("D:/SE/Data/screenshots/screenshots10cropped/labels.txt", header=None, names=["filename", "emoji", "unicode"], sep=",\s*", engine='python')

In [15]:
# Label Encoding
le = LabelEncoder()
augmented_df['encoded_unicode'] = le.fit_transform(augmented_df['unicode'])
original_df['encoded_unicode'] = le.transform(original_df['unicode'])  # Match encodings
num_classes = len(le.classes_)

In [16]:
# --- Custom Dataset Class ---
class EmojiDataset(Sequence):
    def __init__(self, df, img_dir, batch_size=32, img_size=(64, 64), shuffle=True):
        self.df = df
        self.img_dir = img_dir
        self.batch_size = batch_size
        self.img_size = img_size
        self.shuffle = shuffle
        self.indexes = np.arange(len(df))
        self.on_epoch_end()

    def __len__(self):
        return int(np.ceil(len(self.df) / self.batch_size))

    def __getitem__(self, index):
        batch_indexes = self.indexes[index*self.batch_size:(index+1)*self.batch_size]
        batch_data = self.df.iloc[batch_indexes]

        images = []
        labels = []

        for _, row in batch_data.iterrows():
            img_path = os.path.join(self.img_dir, row['filename'])
            image = load_img(img_path, target_size=self.img_size)
            image = img_to_array(image) / 255.0
            images.append(image)
            labels.append(row['encoded_unicode'])

        return np.array(images), to_categorical(np.array(labels), num_classes=num_classes)

    def on_epoch_end(self):
        if self.shuffle:
            np.random.shuffle(self.indexes)


In [17]:
# --- Model Definition ---
model = Sequential([
    Conv2D(32, (3, 3), activation='relu', input_shape=(64, 64, 3)),
    BatchNormalization(),
    MaxPooling2D(pool_size=(2, 2)),
    Dropout(0.25),

    Conv2D(64, (3, 3), activation='relu'),
    BatchNormalization(),
    MaxPooling2D(pool_size=(2, 2)),
    Dropout(0.25),

    Flatten(),
    Dense(128, activation='relu'),
    BatchNormalization(),
    Dropout(0.5),

    Dense(num_classes, activation='softmax')
])

model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# --- Dataset Generators ---
train_generator = EmojiDataset(augmented_df, img_dir='D:/SE/Data/screenshots/augmented_data/', batch_size=32)

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [18]:
# --- Manual Testing ---
def test_model_accuracy(model, df, img_dir):
    correct_predictions = 0
    total_predictions = len(df)

    for index, row in df.iterrows():
        img_path = os.path.join(img_dir, row['filename'])
        img = load_img(img_path, target_size=(64, 64))
        img = img_to_array(img) / 255.0
        img = np.expand_dims(img, axis=0)

        predicted_class = np.argmax(model.predict(img, verbose=0), axis=-1)[0]
        actual_class = row['encoded_unicode']

        if predicted_class == actual_class:
            correct_predictions += 1

    accuracy = correct_predictions / total_predictions * 100
    print(f"\nModel Accuracy (on original images): {accuracy:.2f}%")


In [19]:
# --- Accuracy Callback ---
class AccuracyTestingCallback(Callback):
    def __init__(self, df, img_dir, interval=10):
        super().__init__()
        self.df = df
        self.img_dir = img_dir
        self.interval = interval

    def on_epoch_end(self, epoch, logs=None):
        if (epoch + 1) % self.interval == 0:
            print(f"\nTesting accuracy after epoch {epoch + 1}...")
            test_model_accuracy(self.model, self.df, self.img_dir)

In [21]:
from tensorflow.keras.callbacks import Callback
import os

class SaveEveryNEpochs(Callback):
    def __init__(self, save_freq, save_path_template):
        super().__init__()
        self.save_freq = save_freq
        self.save_path_template = save_path_template
        os.makedirs(os.path.dirname(save_path_template), exist_ok=True)

    def on_epoch_end(self, epoch, logs=None):
        if (epoch + 1) % self.save_freq == 0:
            path = self.save_path_template.format(epoch=epoch + 1)
            self.model.save(path)
            print(f'\n✅ Saved model at: {path}')

# --- Callbacks ---
accuracy_callback = AccuracyTestingCallback(original_df, 'D:/SE/Data/screenshots/screenshots10cropped/', interval=10)
early_stop = EarlyStopping(monitor='loss', patience=5, restore_best_weights=True)
reduce_lr = ReduceLROnPlateau(monitor='loss', factor=0.5, patience=3, min_lr=1e-6)
checkpoint_callback = SaveEveryNEpochs(save_freq=5, save_path_template='model_checkpoints/model_epoch_{epoch:02d}.h5')


In [22]:
# --- Train Model ---
model.fit(
    train_generator,
    epochs=30,
    callbacks=[accuracy_callback, early_stop, reduce_lr, checkpoint_callback]
)

  self._warn_if_super_not_called()


Epoch 1/30
[1m1238/1238[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m466s[0m 374ms/step - accuracy: 0.0392 - loss: 6.7501 - learning_rate: 0.0010
Epoch 2/30
[1m1238/1238[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m199s[0m 160ms/step - accuracy: 0.4448 - loss: 2.6172 - learning_rate: 0.0010
Epoch 3/30
[1m1238/1238[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m202s[0m 163ms/step - accuracy: 0.6733 - loss: 1.2728 - learning_rate: 0.0010
Epoch 4/30
[1m1238/1238[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m203s[0m 164ms/step - accuracy: 0.7538 - loss: 0.8928 - learning_rate: 0.0010
Epoch 5/30
[1m1238/1238[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 162ms/step - accuracy: 0.7838 - loss: 0.7458




✅ Saved model at: model_checkpoints/model_epoch_05.h5
[1m1238/1238[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m201s[0m 162ms/step - accuracy: 0.7838 - loss: 0.7458 - learning_rate: 0.0010
Epoch 6/30
[1m1238/1238[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m199s[0m 161ms/step - accuracy: 0.8170 - loss: 0.6195 - learning_rate: 0.0010
Epoch 7/30
[1m1238/1238[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m200s[0m 161ms/step - accuracy: 0.8330 - loss: 0.5556 - learning_rate: 0.0010
Epoch 8/30
[1m1238/1238[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m201s[0m 162ms/step - accuracy: 0.8502 - loss: 0.4925 - learning_rate: 0.0010
Epoch 9/30
[1m1238/1238[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m201s[0m 162ms/step - accuracy: 0.8607 - loss: 0.4530 - learning_rate: 0.0010
Epoch 10/30
[1m1238/1238[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 163ms/step - accuracy: 0.8712 - loss: 0.4153
Testing accuracy after epoch 10...





Model Accuracy (on original images): 94.22%

✅ Saved model at: model_checkpoints/model_epoch_10.h5
[1m1238/1238[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m374s[0m 302ms/step - accuracy: 0.8712 - loss: 0.4153 - learning_rate: 0.0010
Epoch 11/30
[1m1238/1238[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m198s[0m 160ms/step - accuracy: 0.8803 - loss: 0.3883 - learning_rate: 0.0010
Epoch 12/30
[1m1238/1238[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m200s[0m 161ms/step - accuracy: 0.8860 - loss: 0.3716 - learning_rate: 0.0010
Epoch 13/30
[1m1238/1238[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m200s[0m 162ms/step - accuracy: 0.8914 - loss: 0.3497 - learning_rate: 0.0010
Epoch 14/30
[1m1238/1238[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m200s[0m 161ms/step - accuracy: 0.8986 - loss: 0.3286 - learning_rate: 0.0010
Epoch 15/30
[1m1238/1238[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 163ms/step - accuracy: 0.9025 - loss: 0.3113




✅ Saved model at: model_checkpoints/model_epoch_15.h5
[1m1238/1238[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m202s[0m 164ms/step - accuracy: 0.9025 - loss: 0.3113 - learning_rate: 0.0010
Epoch 16/30
[1m1238/1238[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m198s[0m 160ms/step - accuracy: 0.9058 - loss: 0.3005 - learning_rate: 0.0010
Epoch 17/30
[1m1238/1238[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m197s[0m 159ms/step - accuracy: 0.9105 - loss: 0.2868 - learning_rate: 0.0010
Epoch 18/30
[1m1238/1238[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m198s[0m 160ms/step - accuracy: 0.9169 - loss: 0.2673 - learning_rate: 0.0010
Epoch 19/30
[1m1238/1238[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m200s[0m 161ms/step - accuracy: 0.9192 - loss: 0.2639 - learning_rate: 0.0010
Epoch 20/30
[1m1238/1238[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 161ms/step - accuracy: 0.9236 - loss: 0.2480
Testing accuracy after epoch 20...





Model Accuracy (on original images): 95.12%

✅ Saved model at: model_checkpoints/model_epoch_20.h5
[1m1238/1238[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m348s[0m 281ms/step - accuracy: 0.9236 - loss: 0.2480 - learning_rate: 0.0010
Epoch 21/30
[1m1238/1238[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m196s[0m 159ms/step - accuracy: 0.9278 - loss: 0.2355 - learning_rate: 0.0010
Epoch 22/30
[1m1238/1238[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m199s[0m 161ms/step - accuracy: 0.9277 - loss: 0.2311 - learning_rate: 0.0010
Epoch 23/30
[1m1238/1238[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m199s[0m 160ms/step - accuracy: 0.9271 - loss: 0.2379 - learning_rate: 0.0010
Epoch 24/30
[1m1238/1238[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m200s[0m 161ms/step - accuracy: 0.9363 - loss: 0.2090 - learning_rate: 0.0010
Epoch 25/30
[1m1238/1238[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 161ms/step - accuracy: 0.9316 - loss: 0.2178




✅ Saved model at: model_checkpoints/model_epoch_25.h5
[1m1238/1238[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m199s[0m 161ms/step - accuracy: 0.9316 - loss: 0.2178 - learning_rate: 0.0010
Epoch 26/30
[1m1238/1238[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m197s[0m 159ms/step - accuracy: 0.9341 - loss: 0.2054 - learning_rate: 0.0010
Epoch 27/30
[1m1238/1238[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m197s[0m 159ms/step - accuracy: 0.9406 - loss: 0.1852 - learning_rate: 0.0010
Epoch 28/30
[1m1238/1238[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m199s[0m 160ms/step - accuracy: 0.9412 - loss: 0.1953 - learning_rate: 0.0010
Epoch 29/30
[1m1238/1238[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m200s[0m 162ms/step - accuracy: 0.9404 - loss: 0.1893 - learning_rate: 0.0010
Epoch 30/30
[1m1238/1238[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 161ms/step - accuracy: 0.9419 - loss: 0.1810
Testing accuracy after epoch 30...





Model Accuracy (on original images): 95.76%

✅ Saved model at: model_checkpoints/model_epoch_30.h5
[1m1238/1238[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m347s[0m 280ms/step - accuracy: 0.9419 - loss: 0.1810 - learning_rate: 0.0010


<keras.src.callbacks.history.History at 0x2b7957a8510>