# Emoji classification
## Imports
### Libraries

In [None]:
import os
import numpy as np
import pandas as pd 

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames[:5]: # I print 5 files per folder
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
from pathlib import Path
from PIL import Image
from skimage import io, color
import matplotlib.pyplot as plt
from tensorflow.keras import layers, models

### Functions

In [None]:
def load_single_image(path):
    """
    This will be the function you use to preprocess your image.
    
    """
    img = Image.open(path).convert("RGBA")
    img = np.array(img)

    # RGBA -> RGB
    if img.ndim == 3 and img.shape[2] == 4:
        img = color.rgba2rgb(img) 
    # grayscale -> RGB
    elif img.ndim == 2:
        img = np.stack([img] * 3, axis=-1)

    # unint8 conversion
    if img.dtype != np.uint8:
        img = (img * 255).astype(np.uint8)

    # here you could add additional preprocessing

    return img.astype("float32") / 255.0


def imageLoader(files, labels, batch_size):
    """
    This will create a generator for learning by batches.
    """
    L = len(files)

    while True: 
        batch_start = 0
        batch_end = batch_size

        while batch_start < L:
            limit = min(batch_end, L)

            # Batch files (slices)
            batch_files = files[batch_start:limit]

            X_batch = []
            y_batch = []

            for f in batch_files:
                img_id = Path(f).stem       # nom sans extension
                img = load_single_image(f)
                X_batch.append(img)
                y_batch.append(labels[img_id])

            X = np.stack(X_batch)
            Y = np.array(y_batch)

            yield X, Y

            batch_start += batch_size
            batch_end += batch_size
# source code https://stackoverflow.com/questions/47200146/keras-load-images-batch-wise-for-large-dataset

### Dataset

In [None]:
PATH = "/kaggle/input/2-computer-vision-2025-b-sc-aidams-final-proj/"

In [None]:
train_dir = Path(PATH + "train/")
train_files = sorted([str(p) for p in train_dir.iterdir() if p.is_file()])

In [None]:
test_dir = Path(PATH + "test/")
test_files = sorted([str(p) for p in test_dir.iterdir() if p.is_file()])
test_ids = [Path(f).stem for f in test_files]

In [None]:
y_train_df = pd.read_csv(PATH+ "train_labels.csv")

y_train_dct = dict(zip(y_train_df["Id"], y_train_df["Label"]))

In [None]:
gen = imageLoader(
    files=train_files,
    labels=y_train_dct,
    batch_size=32,
)

## Training

In [None]:
unique_labels = y_train_df["Label"].unique().tolist()

In [None]:
def predict_with_my_amazing_model(test_files):
    prediction = np.random.choice(unique_labels, size= len(test_files))
    ids = np.arange(1,len(test_files)+1)
    prediction_as_df = pd.DataFrame()
    return prediction

y_test_pred = predict_with_my_amazing_model(test_files)
y_test_pred

In [None]:
test_ids_sr = pd.Series(test_ids, name="Id")
y_test_pred_sr = pd.Series(y_test_pred, name="Label")
submission_df = pd.concat([test_ids_sr, y_test_pred_sr], axis=1)
submission_df.head()

In [None]:
submission_df.to_csv("submission.csv", index=False)