# Exercise 1: Introduction to GenAI with Microsoft Fluent UI Emojis

## Part 1: Preparing the Microsoft Fluent UI Emojis Dataset

In this exercise, we will use the Microsoft Fluent UI Emojis dataset to train a classification model. This dataset contains modern emoji icons in various styles.

In [1]:
# Necessary imports
import warnings
import os
import math
import random
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import matplotlib.pyplot as plt
import requests
import zipfile
import io
from PIL import Image
from torchvision import transforms, datasets
from torch.utils.data import DataLoader, Dataset, random_split

# Suppress warnings for cleaner output
warnings.filterwarnings("ignore")

# Ensure reproducibility
SEED = 42
random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)
torch.cuda.manual_seed_all(SEED)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False

# Check for GPU acceleration
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Device:", device)

ModuleNotFoundError: No module named 'numpy'

## Download and Prepare the Microsoft Fluent UI Emojis Dataset

We will download the Microsoft Fluent UI Emojis dataset from GitHub and prepare it for training.

In [None]:
# Define the path to the dataset
dataset_path = os.path.join(os.getcwd(), 'fluent_emojis')
os.makedirs(dataset_path, exist_ok=True)
print(f"Dataset will be stored in: {dataset_path}")

# Download Microsoft Fluent UI Emojis dataset
def download_fluent_emojis():
    # URL to Microsoft Fluent UI Emojis GitHub repository
    url = 'https://github.com/microsoft/fluentui-emoji/archive/refs/heads/main.zip'
    
    print("Downloading Microsoft Fluent UI Emojis dataset...")
    response = requests.get(url)
    
    if response.status_code == 200:
        print("Download complete. Extracting files...")
        
        z = zipfile.ZipFile(io.BytesIO(response.content))
        z.extractall(dataset_path)
        print("Extraction complete.")
        return os.path.join(dataset_path, 'fluentui-emoji-main')
    else:
        print(f"Error downloading: {response.status_code}")
        return None

# Download the dataset
emoji_path = download_fluent_emojis()

## Explore the Dataset

Let's explore the structure of the dataset and look at some examples.

In [None]:
def explore_dataset(emoji_path):
    if emoji_path is None or not os.path.exists(emoji_path):
        print("Dataset is not available.")
        return
    
    # Find all category folders
    categories = [d for d in os.listdir(emoji_path) if os.path.isdir(os.path.join(emoji_path, d)) and not d.startswith('.')]
    print(f"Number of emoji categories: {len(categories)}")
    print(f"Examples of categories: {categories[:10]}")
    
    # Show some examples of emojis
    plt.figure(figsize=(15, 10))
    sample_count = min(5, len(categories))
    for i, category in enumerate(categories[:sample_count]):
        category_path = os.path.join(emoji_path, category)
        style_dirs = [d for d in os.listdir(category_path) if os.path.isdir(os.path.join(category_path, d))]
        
        for j, style in enumerate(style_dirs[:2]):  # Show 2 styles per category
            style_path = os.path.join(category_path, style)
            png_files = [f for f in os.listdir(style_path) if f.endswith('.png')]
            
            if png_files:
                img_path = os.path.join(style_path, png_files[0])
                img = Image.open(img_path)
                plt.subplot(sample_count, 2, i*2 + j + 1)
                plt.imshow(img)
                plt.title(f"{category} - {style}")
                plt.axis('off')
    
    plt.tight_layout()
    plt.show()
    
    return categories

# Explore the dataset
categories = explore_dataset(emoji_path)

## Prepare the Dataset for Training

Now we'll prepare the dataset for training by creating a custom PyTorch Dataset class.

In [None]:
class FluentEmojiDataset(Dataset):
    def __init__(self, emoji_path, categories=None, transform=None, max_per_category=50):
        self.transform = transform
        self.samples = []
        self.class_to_idx = {}
        
        if categories is None or len(categories) == 0:
            return
        
        # Limit to a manageable number of categories for training
        selected_categories = categories[:20]  # Use the first 20 categories
        
        for idx, category in enumerate(selected_categories):
            self.class_to_idx[category] = idx
            category_path = os.path.join(emoji_path, category)
            style_dirs = [d for d in os.listdir(category_path) if os.path.isdir(os.path.join(category_path, d))]
            
            count = 0
            for style in style_dirs:
                style_path = os.path.join(category_path, style)
                png_files = [f for f in os.listdir(style_path) if f.endswith('.png')]
                
                for png_file in png_files:
                    if count >= max_per_category:
                        break
                    img_path = os.path.join(style_path, png_file)
                    self.samples.append((img_path, idx))
                    count += 1
                
                if count >= max_per_category:
                    break
        
        print(f"Total number of images: {len(self.samples)}")
        print(f"Number of classes: {len(self.class_to_idx)}")
    
    def __len__(self):
        return len(self.samples)
    
    def __getitem__(self, idx):
        img_path, label = self.samples[idx]
        image = Image.open(img_path).convert('RGB')
        
        if self.transform:
            image = self.transform(image)
        
        return image, label

# Define transformations
transform = transforms.Compose([
    transforms.Resize((64, 64)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

# Create the dataset
dataset = FluentEmojiDataset(emoji_path, categories=categories, transform=transform)

# Split the dataset into training and validation
train_size = int(0.8 * len(dataset))
val_size = len(dataset) - train_size
train_dataset, val_dataset = random_split(dataset, [train_size, val_size])

# Create dataloaders
batch_size = 32
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size)

print(f"Training set size: {len(train_dataset)}")
print(f"Validation set size: {len(val_dataset)}")

## Define the CNN Model

Now we'll define a simple CNN model to classify the emojis.

In [None]:
class EmojiCNN(nn.Module):
    def __init__(self, num_classes):
        super(EmojiCNN, self).__init__()
        self.features = nn.Sequential(
            nn.Conv2d(3, 32, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),
            
            nn.Conv2d(32, 64, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),
            
            nn.Conv2d(64, 128, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),
        )
        
        self.classifier = nn.Sequential(
            nn.Linear(128 * 8 * 8, 512),
            nn.ReLU(inplace=True),
            nn.Dropout(0.5),
            nn.Linear(512, num_classes)
        )
    
    def forward(self, x):
        x = self.features(x)
        x = x.view(x.size(0), -1)
        x = self.classifier(x)
        return x

# Create the model
num_classes = len(dataset.class_to_idx)
model = EmojiCNN(num_classes).to(device)
print(model)

## Train the Model

Now we'll train the model on the dataset.

In [None]:
# Define loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Training function
def train_model(model, train_loader, val_loader, criterion, optimizer, num_epochs=10):
    train_losses = []
    val_losses = []
    train_accuracies = []
    val_accuracies = []
    
    for epoch in range(num_epochs):
        # Training
        model.train()
        running_loss = 0.0
        correct = 0
        total = 0
        
        for inputs, labels in train_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            
            running_loss += loss.item() * inputs.size(0)
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
        
        epoch_loss = running_loss / len(train_loader.dataset)
        epoch_acc = correct / total
        train_losses.append(epoch_loss)
        train_accuracies.append(epoch_acc)
        
        # Validation
        model.eval()
        val_loss = 0.0
        val_correct = 0
        val_total = 0
        
        with torch.no_grad():
            for inputs, labels in val_loader:
                inputs, labels = inputs.to(device), labels.to(device)
                outputs = model(inputs)
                loss = criterion(outputs, labels)
                
                val_loss += loss.item() * inputs.size(0)
                _, predicted = torch.max(outputs, 1)
                val_total += labels.size(0)
                val_correct += (predicted == labels).sum().item()
        
        val_epoch_loss = val_loss / len(val_loader.dataset)
        val_epoch_acc = val_correct / val_total
        val_losses.append(val_epoch_loss)
        val_accuracies.append(val_epoch_acc)
        
        print(f"Epoch {epoch+1}/{num_epochs} => "
              f"Training: Loss: {epoch_loss:.4f}, Accuracy: {epoch_acc:.4f} | "
              f"Validation: Loss: {val_epoch_loss:.4f}, Accuracy: {val_epoch_acc:.4f}")
    
    return train_losses, val_losses, train_accuracies, val_accuracies

# Train the model
num_epochs = 10
train_losses, val_losses, train_accuracies, val_accuracies = train_model(
    model, train_loader, val_loader, criterion, optimizer, num_epochs)

## Visualize Training Results

Let's visualize the training results.

In [None]:
# Plot loss and accuracy
plt.figure(figsize=(12, 5))

plt.subplot(1, 2, 1)
plt.plot(train_losses, label='Training')
plt.plot(val_losses, label='Validation')
plt.title('Loss over epochs')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()

plt.subplot(1, 2, 2)
plt.plot(train_accuracies, label='Training')
plt.plot(val_accuracies, label='Validation')
plt.title('Accuracy over epochs')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.legend()

plt.tight_layout()
plt.show()

## Test the Model on Some Examples

Let's test the model on some examples from the validation set.

In [None]:
# Get class names from indices
idx_to_class = {v: k for k, v in dataset.class_to_idx.items()}

# Function to show predictions
def show_predictions(model, val_loader, idx_to_class, num_samples=5):
    model.eval()
    images, labels = next(iter(val_loader))
    images, labels = images[:num_samples].to(device), labels[:num_samples].to(device)
    
    with torch.no_grad():
        outputs = model(images)
        _, preds = torch.max(outputs, 1)
    
    # Convert images back to display format
    images = images.cpu().numpy()
    mean = np.array([0.485, 0.456, 0.406])
    std = np.array([0.229, 0.224, 0.225])
    
    # Denormalization
    images = std * images.transpose(0, 2, 3, 1) + mean
    images = np.clip(images, 0, 1)
    
    # Show images with predictions
    plt.figure(figsize=(15, 3))
    for i in range(num_samples):
        plt.subplot(1, num_samples, i + 1)
        plt.imshow(images[i])
        true_label = idx_to_class[labels[i].item()]
        pred_label = idx_to_class[preds[i].item()]
        title = f"True: {true_label}
Pred: {pred_label}"
        plt.title(title)
        plt.axis('off')
    
    plt.tight_layout()
    plt.show()

# Show some predictions
show_predictions(model, val_loader, idx_to_class)

## Save the Model

Let's save the trained model for later use.

In [None]:
# Save the model
model_path = os.path.join(os.getcwd(), 'emoji_classifier_model.pth')
torch.save({
    'model_state_dict': model.state_dict(),
    'optimizer_state_dict': optimizer.state_dict(),
    'class_to_idx': dataset.class_to_idx,
    'num_classes': num_classes
}, model_path)

print(f"Model saved to {model_path}")

## Conclusion

In this exercise, we have:
1. Downloaded and explored the Microsoft Fluent UI Emojis dataset
2. Prepared the dataset for training by creating a custom PyTorch Dataset class
3. Defined a CNN model to classify the emojis
4. Trained the model on the dataset
5. Visualized the training results
6. Tested the model on some examples
7. Saved the model for later use

This is a basic implementation that can be extended in several ways:
- Use more advanced CNN architectures like ResNet or EfficientNet
- Implement data augmentation to improve model robustness
- Use transfer learning from pre-trained models
- Experiment with different hyperparameters