# Emoji Prediction

In [None]:
from google.colab import drive
drive.mount('/content/drive')
# drive.flush_and_unmount()

In [None]:
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
from PIL import Image, ImageFont, ImageDraw,ImageFilter
from matplotlib import pyplot as plt
from skimage.feature import hog
import cv2
%pip install torch == 1.7.0 torchvision == 0.8.0 torchaudio == 0.7.0
%pip install -U opencv-python
%pip install -U opencv-contrib-python

# Dealing with unicode emoji

In [None]:
#Convolution through different direction
def conv_horizon(img):
  return img.filter(ImageFilter.Kernel((3, 3), (-1, -2, -1, 0, 0, 0, 1, 2, 1), 1, 0))
def conv_vert(img):
  return img.filter(ImageFilter.Kernel((3,3),(1,0,-1,2,0,-2,1,0,-1),1,0))
def conv_edge(img):
  return img.filter(ImageFilter.Kernel((3,3),(-1,-1,-1,-1,8,-1,-1,-1,-1),1,0))

In [None]:
def emoji_to_image(emoji: str, size: int = 64) -> np.ndarray:
  image = Image.new("L", (80,80), (255))
  font = ImageFont.truetype("/content/drive/MyDrive/Colab Notebooks/Assignment1/LiuYuzhou_A1/NotoEmoji-VariableFont_wght.ttf", 60, encoding='unic')
  draw = ImageDraw.Draw(image)
  draw.textbbox(xy=[0,0], text=emoji, font=font)
  draw.text((0, 0), emoji, fill=(0), font=font)

  return np.array(image.convert('L'))

# Summarization of the dataset

Include label visualization(All about Kaggle unicode emojis)
https://www.kaggle.com/datasets/thedevastator/analyzing-emoji-characteristics-through-unicode

In [None]:
import csv
import pandas as pd
csv_file_path = '/content/drive/MyDrive/Colab Notebooks/Assignment1/LiuYuzhou_A1/emoji_df_v2.csv'
#Extract both columns
column1_data = []
column2_data = []

data = pd.read_csv(csv_file_path)

In [None]:
data

For training purpose, we need to convert all the labels into numeric value, for example:

labels={0: 'face-smiling',1: 'face-affection',2: 'face-tongue',3: 'face-hand',4:'face-neutral-skeptical'}

In [None]:
categories={}
labels={}
for i,j in zip(data["emoji"],data["sub_group"]):
  categories[i]=j

In [None]:
index=0
for k in data["sub_group"]:
  if k not in labels:
    labels[k]=index
    index+=1

We are going to build all the training objects as a sample class for convinience, each sample represent a emoji image object

In [None]:
class Sample:
    def __init__(self, idx=0, cat=-1, img=None, lbp_feat=None, label=None):
      '''
        # idx: index of the object, img: the image corresponding to the emoji, cat: category it belongs to
        # lbp_feat: lbp feature of the img, label: training or testing set it belongs to.
      '''
      self.idx = idx
      self.cat = cat
      self.img = img
      self.lbp_feat=lbp_feat
      self.label = label
      self.conv_vert=conv_vert(Image.fromarray(self.img))
      self.conv_horizon=conv_horizon(Image.fromarray(self.img))
      self.conv_edge=conv_edge(Image.fromarray(self.img))
      self.histogram=[]
      self.conv=None
      self.pred = None
      self.sift_des=None
      self.bovw_vec=None
    def get_cat(self,cat):
      return labels[cat]
    def plot_images(self):
      # Create a figure and axis with a grid layout of 1 row and 3 columns
      fig, axs = plt.subplots(1, 4)
      # Plot the images
      axs[0].imshow(self.img,cmap='gray')
      axs[0].axis('off')
      axs[0].set_title('original')

      # Plot the images
      axs[1].imshow(np.array(self.conv_vert),cmap="gray")
      axs[1].axis('off')
      axs[1].set_title('conv_vert')

      axs[2].imshow(np.array(self.conv_horizon),cmap="gray")
      axs[2].axis('off')
      axs[2].set_title('conv_horizon')

      axs[3].imshow(np.array(self.conv_edge),cmap='gray')
      axs[3].axis('off')
      axs[3].set_title('conv_edge')

      # Adjust the layout of the subplots
      plt.tight_layout()

      # Show the plot
      plt.show()
    def display_sift_results(self):
        # Initialize the SIFT detector
        sift = cv2.SIFT_create()

        fig, axs = plt.subplots(1, 4, figsize=(15, 5))  # Adjust the figsize as needed

        images = [self.img, self.conv_vert, self.conv_horizon, self.conv_edge]
        titles = ['origin', 'conv_vert', 'conv_horizon', 'conv_edge']

        for i, image in enumerate(images):

            # Detect keypoints and compute descriptors
            keypoints, descriptors = sift.detectAndCompute(np.array(image), None)

            # Draw keypoints on the image
            image_with_keypoints = cv2.drawKeypoints(np.array(image), keypoints, None, flags=cv2.DRAW_MATCHES_FLAGS_DRAW_RICH_KEYPOINTS)

            # Display the image with keypoints and title
            axs[i].imshow(image_with_keypoints, cmap='gray')
            axs[i].set_title(titles[i])
            axs[i].axis('off')

        plt.show()
def get_label(num):
  for key in labels.keys():
    if(labels[key]==num):
      return key
  return None

# Training(Execute this part when training the model)
Following is to put all the images together

In [None]:
samples=[]
index=0
for k in categories.keys():
  samples.append(Sample(img=emoji_to_image(k)))
  samples[-1].cat=samples[-1].get_cat(categories[k])
  index+=1
train,validation= train_test_split(samples, test_size=0.2, random_state=42)

In [None]:
labels

# Following part is about testing in emoji kitchen (Do not execute if you are in training phase)

To build the emoji kitchen test set(Test part for unseen emoji generated by emoji kitchen)

In [None]:
samples=[]
index=0
for k in categories.keys():
  samples.append(Sample(img=emoji_to_image(k)))
  samples[-1].cat=samples[-1].get_cat(categories[k])
  index+=1

In [None]:
labels["face-neutral"]=2
labels["emotion"]=24

In [None]:
import os
src="/content/drive/MyDrive/Colab Notebooks/Assignment1/LiuYuzhou_A1/test_emojis"
file_name=["animal-mamal1.png","hand-fingers-closed1.png","face-negative1.png","face-negative2.png","face-positive1.png",
"face-positive2.png","face-positive3.png","sky&weather1.png"]
class_name=["animal-mammal","hand-fingers-closed","face-negative","face-negative","face-positive","face-positive","face-positive","sky & weather"]
test=[]
for i in range(8):
  fpath = os.path.join(src, file_name[i])
  img = cv2.imread(fpath, cv2.IMREAD_COLOR)[..., ::-1]
  img_gray = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
  img_gray = cv2.resize(img_gray, (80, 80))
  _, binary_img = cv2.threshold(img_gray, 125, 255, cv2.THRESH_BINARY)
  test.append(Sample(img=img_gray))
  test[-1].cat=labels[class_name[i]]
  display(binary_img)

Load self collected emoji kitchen data

In [None]:
index=0
# for k in categories.keys():
#   if("face" in categories[k]):
#     samples.append(Sample(img=emoji_to_image(k)))
#     samples[-1].cat=samples[-1].get_cat(categories[k])
#     index+=1
import os
import cv2
folder_name=["face-positive","face-negative","face-neutral","animal-mammal","sky & weather"]
human_labels={"face-positive":0,"face-negative":1,"face-neutral":2,"animal-mammal":15,"sky & weather":17}
folder_path = '/content/drive/MyDrive/Colab Notebooks/Assignment1/LiuYuzhou_A1/test_emojis/emoji_kitchen_set'
k=1

for i in range(len(folder_name)):
  file_path = os.path.join(folder_path, folder_name[i])
  image_files = [file for file in os.listdir(file_path)]
  for file in enumerate(image_files):
      fpath = os.path.join(file_path, file[1])
      img = cv2.imread(fpath, cv2.IMREAD_COLOR)[..., ::-1]
      img_gray = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
      img_gray = cv2.resize(img_gray, (80, 80))
      # _, binary_img = cv2.threshold(img_gray, 125, 255, cv2.THRESH_BINARY)
      samples.append(Sample(img=img_gray))
      samples[-1].cat=labels[folder_name[i]]
      # display(255-binary_img)
train,validation= train_test_split(samples, test_size=0.2, random_state=42)

In [None]:
samples=[]
for sample in validation:
  samples.append(sample)
for sample in train:
  samples.append(sample)
for sample in test:
  samples.append(sample)

#Test By using human imgages(Do not execute this part if it is training phase)https://www.kaggle.com/datasets/sudarshanvaidya/random-images-for-face-emotion-recognition

In [None]:
import numpy as np

def mask_upper_half(gray_img):
    # height, width = gray_img.shape
    mask = np.zeros((80, 80), dtype=np.uint8)
    mask[:45, :] = 255
    masked_img = cv2.bitwise_and(gray_img, mask)
    return masked_img

def mask_lower_half(gray_img):
    # height, width = gray_img.shape
    mask = np.zeros((80,80), dtype=np.uint8)
    mask[45:, :] = 255
    masked_img = cv2.bitwise_and(gray_img, mask)
    return masked_img

In [None]:
labels["face-neutral"]=2
labels["emotion"]=24

load self collected facial data

In [None]:
samples=[]
index=0
# for k in categories.keys():
#   if("face" in categories[k]):
#     samples.append(Sample(img=emoji_to_image(k)))
#     samples[-1].cat=samples[-1].get_cat(categories[k])
#     index+=1
import os
import cv2
folder_name=["face-positive","face-negative","face-neutral"]
human_labels={"face-positive":0,"face-negative":1,"face-neutral":2}
folder_path = '/content/drive/MyDrive/Colab Notebooks/Assignment1/LiuYuzhou_A1/test_emojis/emoji_kitchen_set'
k=1

for i in range(len(folder_name)):
  file_path = os.path.join(folder_path, folder_name[i])
  image_files = [file for file in os.listdir(file_path)]
  for file in enumerate(image_files):
      fpath = os.path.join(file_path, file[1])
      img = cv2.imread(fpath, cv2.IMREAD_COLOR)[..., ::-1]
      img_gray = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
      img_gray = cv2.resize(img_gray, (80, 80))
      # _, binary_img = cv2.threshold(img_gray, 125, 255, cv2.THRESH_BINARY)
      samples.append(Sample(img=img_gray))
      samples[-1].cat=labels[folder_name[i]]
      # display(255-binary_img)
train,validation= train_test_split(samples, test_size=0.2, random_state=42)

load human facial data

In [None]:
import os
import cv2
folder_name=["face-positive","face-negative","face-neutral"]
human_labels={"face-positive":0,"face-negative":1,"face-neutral":2}
folder_path = '/content/drive/MyDrive/Colab Notebooks/Assignment1/LiuYuzhou_A1/human_test'
k=1
test=[]
for i in range(len(folder_name)):
  file_path = os.path.join(folder_path, folder_name[i])
  image_files = [file for file in os.listdir(file_path)]
  for file in enumerate(image_files):
      fpath = os.path.join(file_path, file[1])
      img = cv2.imread(fpath, cv2.IMREAD_COLOR)[..., ::-1]
      img_gray = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
      img_gray = cv2.resize(img_gray, (80, 80))
      # _, binary_img = cv2.threshold(img_gray, 125, 255, cv2.THRESH_BINARY)
      test.append(Sample(img=img_gray))
      test[-1].cat=labels[folder_name[i]]
      # display(255-binary_img)

#Mask Strategy
If u want to try to use masks stategy, please execute here, it onlt mask the training samples

In [None]:
incoming=[]
for sample in train:
  new_sample=Sample(img=mask_upper_half(sample.img))
  new_sample.cat=sample.cat
  incoming.append(new_sample)

In [None]:
for sample in train:
  new_sample=Sample(img=mask_lower_half(sample.img))
  new_sample.cat=sample.cat
  incoming.append(new_sample)

In [None]:
for sample in train:
  incoming.append(sample)
train=incoming

In [None]:
samples=[]
for sample in train:
  samples.append(sample)
for sample in validation:
  samples.append(sample)
for sample in test:
  samples.append(sample)

In [None]:
len(samples)

# SVM, Random Forest Classifier,and KNN Classifier

SVM
Just simply train it based on a linear SVM classifier.

In [None]:
# train an SVM model.
train_feats=[np.array(sample.img).flatten() for sample in train]
train_cats=[sample.cat for sample in train]
val_feats=[np.array(sample.img).flatten() for sample in validation]
val_cats=[sample.cat for sample in validation]
test_feats=[np.array(sample.img).flatten() for sample in test]
test_cats=[sample.cat for sample in test]
svm = SVC(kernel='linear')
svm.fit(train_feats, train_cats)

# predict on the validation set.
y_pred = svm.predict(val_feats)

# calculate accuracy.
accuracy = accuracy_score(val_cats, y_pred)
print(f'Accuracy: {accuracy * 100:.2f}%')

Test part

In [None]:
y_pred = svm.predict(test_feats)
accuracy = accuracy_score(test_cats, y_pred)
print(f'Accuracy: {accuracy * 100:.2f}%')

10-Fold Validation

In [None]:
from sklearn.model_selection import KFold
from sklearn.model_selection import cross_val_score
from sklearn.svm import SVC

X = train_feats
y = train_cats

# Create an SVM classifier
svm = SVC(kernel='linear')

# Define the number of folds for cross-validation
k = 10

# Perform K-fold cross-validation
kfold = KFold(n_splits=k, shuffle=True, random_state=42)

# Perform cross-validation and compute the mean accuracy
accuracy_scores = cross_val_score(svm, X, y, cv=kfold, scoring='accuracy')
mean_accuracy = accuracy_scores.mean()
# Perform K-fold cross-validation
kfold = KFold(n_splits=k, shuffle=True, random_state=42)

# Perform cross-validation and compute the mean accuracy
accuracy_scores = cross_val_score(svm, X, y, cv=kfold, scoring='accuracy')
mean_accuracy = accuracy_scores.mean()

# Print the accuracy for each fold and the mean accuracy
i=1
for score in accuracy_scores:
  print("Accuracy for fold {} : {}".format(i,score))
  i+=1
print(mean_accuracy)

Random Forest Classifier

In [None]:
# train an Random Forest Classifier
from sklearn.ensemble import RandomForestClassifier
# Create an Instance of the Random Forest Classifier
classifier = RandomForestClassifier(n_estimators=100, max_depth=10)

# Train the Classifier
classifier.fit(train_feats, train_cats)

# Step Make Predictions
y_pred = classifier.predict(val_feats)

# Step Evaluate the Model
accuracy = accuracy_score(val_cats, y_pred)
print(f'Accuracy: {accuracy * 100:.2f}%')

KNN Classifier

In [None]:
from sklearn.neighbors import KNeighborsClassifier

# Create an Instance of the KNN Classifier
classifier = KNeighborsClassifier(n_neighbors=5, metric='euclidean')

# Train the Classifier
classifier.fit(train_feats, train_cats)

# Make Predictions
y_pred = classifier.predict(val_feats)

# Step 8: Evaluate the Model
accuracy = accuracy_score(val_cats, y_pred)
print(f'Accuracy: {accuracy * 100:.2f}%')

# Bag of Visual world

In [None]:
# Extract sift features for all images in IMHere

from scipy.cluster.vq import *
from sklearn import preprocessing
import cv2

imgs = {}
des_list = []
sift = cv2.SIFT_create()
k=0

for i in range(len(samples)):
    kpt, des = sift.detectAndCompute(samples[i].img, None)
    samples[i].sift_des=des
    #train by concatenate
    kpt1, des1 = sift.detectAndCompute(np.array(samples[i].conv_vert), None)
    kpt2,des2 =sift.detectAndCompute(np.array(samples[i].conv_horizon), None)
    kpt3,des3 =sift.detectAndCompute(np.array(samples[i].conv_edge), None)
    # #train bt max
    # max_num_keypoints = 0

    # if len(kpt) > max_num_keypoints:
    #     max_num_keypoints = len(kpt)
    #     samples[i].sift_des=des

    # if len(kpt1) > max_num_keypoints:
    #     max_num_keypoints = len(kpt1)
    #     samples[i].sift_des=des1

    # if len(kpt2) > max_num_keypoints:
    #     max_num_keypoints = len(kpt2)
    #     samples[i].sift_des=des2
    # if len(kpt3) > max_num_keypoints:
    #     samples[i].sift_des=des3
    # Filter out zero-dimensional descriptors
    # Create a list to store valid descriptors
    #train by combination
    descriptors_list = []
    if des1 is not None:
        descriptors_list.append(des1)
    if des2 is not None:
        descriptors_list.append(des2)
    if des3 is not None:
        descriptors_list.append(des3)
    if des is not None:
        descriptors_list.append(des)
    # # Concatenate the descriptors
    descriptors = np.concatenate(descriptors_list, axis=0)
    samples[i].sift_des=descriptors

In [None]:
test[0].sift_des

In [None]:
# do the clustering
# consturct the dictionary
from scipy.cluster.vq import vq,kmeans
numWords = 1000

# TODO: Stack all the descriptors vertically in a numpy array
descriptors = samples[0].sift_des
for sample in samples[1:]:
    descriptors = np.vstack((descriptors,sample.sift_des))

# TODO: Perform k-means clustering (using scipy.cluster.vq.kmeans)
print("Start k-means: %d words, %d key points" %(numWords, descriptors.shape[0]))
voc, variance = kmeans(descriptors,numWords,1)

In [None]:
# build histograms

for i in range(len(samples)):
    im_feature = np.zeros((numWords), "float32")
    # TODO: using scipy.cluster.vq
    words, distance = vq(samples[i].sift_des,voc)
    for w in words:
        im_feature[w] += 1
    samples[i].bovw_vec=im_feature
# perform L2 normalization
# display(np.unique(im_features))
# im_features = preprocessing.normalize(im_features, norm='l2')

In [None]:
svm = SVC(kernel='linear')
train_feats=[sample.bovw_vec for sample in train]
train_cats=[sample.cat for sample in train]
val_feats=[sample.bovw_vec for sample in validation]
val_cats=[sample.cat for sample in validation]
test_feats=[sample.bovw_vec for sample in test]
test_cats=[sample.cat for sample in test]
svm.fit(train_feats, train_cats)

# predict on the validation set.
y_pred = svm.predict(val_feats)

# calculate accuracy.
accuracy = accuracy_score(val_cats, y_pred)
print(f'Accuracy: {accuracy * 100:.2f}%')

test part

In [None]:
# predict on the validation set.
y_pred = svm.predict(test_feats)

# calculate accuracy.
accuracy = accuracy_score(test_cats, y_pred)
print(f'Accuracy: {accuracy * 100:.2f}%')

In [None]:
y_pred

K-Fold validation

In [None]:
from sklearn.model_selection import KFold
from sklearn.model_selection import cross_val_score
from sklearn.svm import SVC
from sklearn.datasets import load_iris

# Load the Iris dataset
X = [sample.bovw_vec for sample in samples]
y = [sample.cat for sample in samples]

# Create an SVM classifier
svm = SVC(kernel='linear')

# Define the number of folds for cross-validation
k = 10

# Perform K-fold cross-validation
kfold = KFold(n_splits=k, shuffle=True, random_state=42)

# Perform cross-validation and compute the mean accuracy
accuracy_scores = cross_val_score(svm, X, y, cv=kfold, scoring='accuracy')
mean_accuracy = accuracy_scores.mean()

# Print the accuracy for each fold and the mean accuracy
i=1
print("Totolly {} images".format(len(samples)))
for score in accuracy_scores:
  print("SVM(combination) in fold {}'s accuracy : {}%".format(i,round(score,2)*100))
  i+=1
print("The overall mean score is "+str(mean_accuracy*100)+"%")

In [None]:
mean_accuracy

In [None]:
classifier = RandomForestClassifier(n_estimators=100, max_depth=10)
classifier.fit(train_feats, train_cats)
y_pred = classifier.predict(val_feats)

# Step Evaluate the Model
accuracy = accuracy_score(val_cats, y_pred)
print(f'Accuracy: {accuracy * 100:.2f}%')

In [None]:
# Create an Instance of the KNN Classifier
classifier = KNeighborsClassifier(n_neighbors=2, metric='euclidean')

# Train the Classifier
classifier.fit(train_feats, train_cats)

# Make Predictions
y_pred = classifier.predict(val_feats)

# Step 8: Evaluate the Model
accuracy = accuracy_score(val_cats, y_pred)
print(f'Accuracy: {accuracy * 100:.2f}%')

# Train with LBP+CNN

1.   We first try to convert all the data points by using LBP method
2.   Then we build a CNN to train the model



In [None]:
# draw the LBP pattern images and feature vectors

def unfold(img : np.array, ksize : int = 3) -> np.array:
    """ unfold without center point, only odd kernel size is supported

    Params:
        img:
            An image with size of H x W.
        ksize:
            The kernel size
    """
    assert ksize % 2 == 1
    assert img.ndim == 2
    H, W = img.shape
    # Expand the orignal image's shape for better moving. For the third channel, we pre-define its dimension as eight,
    # the LBP result of each pixel depends on the values ​​of the surrounding 8 points
    target = np.zeros((H+ksize-1, W+ksize-1, ksize**2-1), dtype=img.dtype)
    n = 0
    for h in range(ksize):
        for w in range(ksize):
            if h == ksize // 2 and w == ksize // 2:
                continue
            target[h:h+H, w:w+W, n] = img
            n += 1
    return target[ksize//2:ksize//2+H, ksize//2:ksize//2+W, :]


def original_LBP(img : np.array) -> np.array:
    """ calculate the original version of LBP

    Params:
        img:
            An image with size of H x W.

    Pattern:
        4 3 2
        5 / 1
        6 7 8
    """
    img_unfold = unfold(img)
    factor1 = img_unfold >= img[..., None]
    factor2 = np.array([128, 64, 32, 1, 16, 2, 4, 8], dtype=np.int32)
    return np.sum((factor1 * factor2), axis=-1)
for s in samples:
  s.lbp_feat=original_LBP(s.img)

In [None]:
def normalize_gray_image(image):
    # Convert the image to float32 data type
    image = image.astype(np.float32)

    # Apply normalization
    normalized_image = (image - np.mean(image)) / np.std(image)

    return normalized_image

Split feature and label of the train set and validation set

In [None]:
train_feats=[normalize_gray_image(sample.img) for sample in train]
train_cats=[sample.cat for sample in train]
val_feats=[normalize_gray_image(sample.img) for sample in validation]
val_cats=[sample.cat for sample in validation]
test_feats=[normalize_gray_image(sample.img) for sample in test]
test_cats=[sample.cat for sample in test]

In [None]:
test_cats

Use one hot encoding in order to use the softmax for prediction in CNN

In [None]:
#Adjust it according to the number of categories of current class
num_cats=25

In [None]:
temp_cats = [[0] * num_cats for _ in range(len(train_cats))]
for i in range(len(train_cats)):
  temp_cats[i][train_cats[i]]=1
train_cats=temp_cats
temp_cats = [[0] * num_cats for _ in range(len(val_cats))]
for i in range(len(val_cats)):
  temp_cats[i][val_cats[i]]=1
val_cats=temp_cats
temp_cats = [[0] * num_cats for _ in range(len(test_cats))]
for i in range(len(test_cats)):
  temp_cats[i][test_cats[i]]=1
test_cats=temp_cats

Following is the EMONN model

In [None]:
from torch.functional import Tensor
import torch.nn.functional as F
import torch
import torch.nn as nn
from torch.utils.data import DataLoader
from torch.autograd import Variable
import numpy as np
import matplotlib.pyplot as plt
import torch.optim as optim

x_train = torch.tensor(train_feats).to(torch.float32)
x_train = torch.reshape(x_train,[x_train.shape[0],1,80,80])
y_train=torch.tensor(train_cats).to(torch.float32)
# x_test = torch.tensor(test_samples).to(torch.float32)
x_val = torch.tensor(val_feats).to(torch.float32)
y_val = torch.tensor(val_cats).to(torch.float32)
x_test = torch.tensor(test_feats).to(torch.float32)
y_test = torch.tensor(test_cats).to(torch.float32)
# Specify batch size
batch_size = 32
num_batches = len(x_train) // batch_size
num_epochs=50
output_channel=num_cats
class EMONN(nn.Module):
    def __init__(self):
        super(EMONN, self).__init__()
        #because the shape of the image is gray image, so the input chaneel should be 1
        self.conv1 = nn.Conv2d(1,6,kernel_size=5)
        self.pool1 = nn.MaxPool2d(kernel_size=2, stride=2)
        self.conv2 = nn.Conv2d(6,16, kernel_size=5)
        self.pool2 = nn.MaxPool2d(kernel_size=2,stride=2)
        self.linear1 = nn.Linear(16*17*17, 80)
        self.linear2 = nn.Linear(80,output_channel)
        self.softmax = nn.Softmax()

    # forward function is inherted from parent's class. x denotes the input feature.
    def forward(self, x):
        y_pred = self.conv1(x)
        y_pred = F.relu(y_pred)
        train_conv=y_pred
        y_pred = self.pool1(y_pred)
        y_pred = F.relu(self.conv2(y_pred))
        y_pred = self.pool2(y_pred)
        y_pred = y_pred.view(-1,16*17*17)
        y_pred = F.relu(self.linear1(y_pred))
        y_pred = self.linear2(y_pred)
        return y_pred,train_conv

# create model
model = EMONN()

criterion = nn.CrossEntropyLoss()

# create optimizer. 1st parameter: the parameters will be optimized; 2nd: learning rate
optimizer = torch.optim.SGD(model.parameters(), lr=0.05)
train_conv=[]
# Define the training loop
for epoch in range(num_epochs):
    print(f"Epoch {epoch+1}/{num_epochs}")

    for batch_idx in range(num_batches):
        start_idx = batch_idx * batch_size
        end_idx = (batch_idx + 1) * batch_size

        if torch.cuda.is_available():
            inputs = Variable(x_train[start_idx:end_idx]).cuda()
            target = Variable(y_train[start_idx:end_idx]).cuda()
        else:
            inputs = Variable(x_train[start_idx:end_idx])
            target = Variable(y_train[start_idx:end_idx])
                # Clear gradients
        optimizer.zero_grad()
        # Forward pass
        out,_= model(inputs)
        # Calculate loss
        loss = criterion(out, target)

        # Backward propagation
        loss.backward()
        # Updating parameters via SGD
        optimizer.step()
        # Print training progress
        if(batch_idx==num_batches-1):
          print(f"Batch {batch_idx+1}/{num_batches} - Loss: {loss.item()}")


Get the midle generate images

In [None]:
train_feats=[normalize_gray_image(sample.img) for sample in train]
train_cats=[sample.cat for sample in train]
val_feats=[normalize_gray_image(sample.img) for sample in validation]
val_cats=[sample.cat for sample in validation]
test_feats=[normalize_gray_image(sample.img) for sample in test]
test_cats=[sample.cat for sample in test]

In [None]:
_, train_conv=model(x_train)
result1,val_conv=model(torch.reshape(x_val,[x_val.shape[0],1,80,80]))
test_result,test_conv=model(torch.reshape(x_test,[x_test.shape[0],1,80,80]))

Validation part

In [None]:
# Find the maximum value for each 1D element
result1=np.array(torch.detach(result1))
max_values = max_indices = np.argmax(result1, axis=1)

# Print the maximum values


In [None]:
accuracy = accuracy_score(val_cats, max_values)
accuracy

Test part

In [None]:
# Find the maximum value for each 1D element
result=np.array(torch.detach(test_result))
max_values = max_indices = np.argmax(result, axis=1)

# Print the maximum values
accuracy = accuracy_score(test_cats, max_values)
accuracy

In [None]:
max_values

# Midway method

In [None]:
train_conv=np.array(torch.detach(train_conv))
val_conv=np.array(torch.detach(val_conv))
test_conv=np.array(torch.detach(test_conv))

In [None]:
conv_edge(Image.fromarray(train[0].img))

In [None]:
for sample,imgs in zip(train,train_conv):
  sample.conv= imgs
for sample,imgs in zip(validation,val_conv):
  sample.conv=imgs
for sample,imgs in zip(test,test_conv):
  sample.conv=imgs

In [None]:
train[0].conv[1].shape

In [None]:
import numpy as np
import matplotlib.pyplot as plt

fig, axs = plt.subplots(5, 6, figsize=(20, 20))  # Adjust the figsize as needed
for j in range(5):
  for i in range(train[0].conv.shape[0]):
      axs[j,i].imshow(cv2.convertScaleAbs(samples[j].conv[i], alpha=(255.0)), cmap='gray')
      axs[j,i].axis('off')

plt.show()

In [None]:
# Extract sift features for all images in IMHere

from scipy.cluster.vq import *
from sklearn import preprocessing
import cv2

imgs = {}
des_list = []
sift = cv2.SIFT_create()
k=0

for j in range(len(samples)):
    descriptor_list=[]
    i=0
    samples[j].sift_des=[]
    for i in range(samples[j].conv.shape[0]):
      new_img=cv2.convertScaleAbs(samples[j].conv[i], alpha=(255.0))
      kpt, des = sift.detectAndCompute(new_img, None)
      if(des is not None):
        samples[j].sift_des.append(des)
      # print(len(kpt))
      # display(cv2.convertScaleAbs(samples[j].conv[i], alpha=(255.0)))

      # if(des is not None):
      #   descriptor_list.append(des)
      # kpt, des = sift.detectAndCompute(cv2.convertScaleAbs(samples[j].conv[i], alpha=(255.0)), None)

    # if(len(descriptor_list)!=6):
    #   print(len(descriptor_list))
    # descriptors = np.concatenate(descriptors_list, axis=0)
    # samples[i].sift_des=descriptors
    # k+=descriptors.shape[0]

In [None]:
# do the clustering
# consturct the dictionary
from scipy.cluster.vq import vq,kmeans
numWords = 125
all_pred=[]
all_test_pred=[]
classifier = RandomForestClassifier()
for ind in range(5):
  # TODO: Stack all the descriptors vertically in a numpy array
  descriptors = samples[0].sift_des[ind]
  for sample in samples[1:]:
    descriptors = np.vstack((descriptors,sample.sift_des[ind]))
  # TODO: Perform k-means clustering (using scipy.cluster.vq.kmeans)
  print("Start k-means: %d words, %d key points" %(numWords, descriptors.shape[0]))
  voc, variance = kmeans(descriptors,numWords,1)
  for s in range(len(train)):
    im_feature = np.zeros((numWords), "float32")
    # TODO: using scipy.cluster.vq
    words, distance = vq(train[s].sift_des[ind],voc)
    for w in words:
        im_feature[w] += 1
    train[s].bovw_vec=im_feature
  for s in range(len(validation)):
      im_feature = np.zeros((numWords), "float32")
      # TODO: using scipy.cluster.vq
      words, distance = vq(validation[s].sift_des[ind],voc)
      for w in words:
          im_feature[w] += 1
      validation[s].bovw_vec=im_feature
  '''
  Enable only when testing
  '''
  for s in range(len(test)):
      im_feature = np.zeros((numWords), "float32")
      # TODO: using scipy.cluster.vq
      words, distance = vq(test[s].sift_des[ind],voc)
      for w in words:
          im_feature[w] += 1
      test[s].bovw_vec=im_feature
  test_feats=[sample.bovw_vec for sample in test]
  test_cats=[sample.cat for sample in test]

  train_feats=[sample.bovw_vec for sample in train]
  train_cats=[sample.cat for sample in train]
  val_feats=[sample.bovw_vec for sample in validation]
  val_cats=[sample.cat for sample in validation]
  classifier = RandomForestClassifier()
  classifier.fit(train_feats, train_cats)
  # predict on the validation set.
  y_pred = classifier.predict(val_feats)
  all_pred.append(y_pred)
  accuracy = accuracy_score(val_cats, y_pred)
  print(f'Val Accuracy: {accuracy * 100:.2f}%')
  ######################################
  test_pred=classifier.predict(test_feats)
  all_test_pred.append(test_pred)
  # train_pred=svm.predict(train_feats)
  # calculate accuracy.
  accuracy = accuracy_score(test_cats, test_pred)
  print(f'Test Accuracy: {accuracy * 100:.2f}%')


# Histogram extraction

In [None]:
import numpy as np
import matplotlib.pyplot as plt
for j in range(len(samples)):
  for i in range(samples[j].conv.shape[0]):
      # axs[j,i].imshow(cv2.convertScaleAbs(samples[j].conv[i], alpha=(255.0)), cmap='gray')
      # axs[j,i].axis('off')
    column_histograms = []
    for row in range(samples[j].conv[i].shape[0]):
      sum=0
      for column in range(samples[j].conv[i].shape[1]):
        sum+=samples[j].conv[i][row][column]
      column_histograms.append(sum)
    samples[j].histogram.append(column_histograms)

In [None]:
y_pred=[]
all_pred=[]
all_test_pred=[]
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.ensemble import RandomForestClassifier
for i in range(6):
  svm = RandomForestClassifier()
  train_feats=[sample.histogram[i] for sample in train]
  train_cats=[sample.cat for sample in train]
  val_feats=[sample.histogram[i] for sample in validation]
  val_cats=[sample.cat for sample in validation]
  svm.fit(train_feats, train_cats)

  # predict on the validation set.
  y_pred = svm.predict(val_feats)
  all_pred.append(y_pred)

  # calculate accuracy.
  accuracy = accuracy_score(val_cats, y_pred)
  print(f'Random Forest: Accuracy for sub-group {i+1}: {accuracy * 100:.2f}%')
  test_feats=[sample.histogram[i] for sample in test]
  test_cats=[sample.cat for sample in test]
  y_pred = svm.predict(test_feats)
  all_test_pred.append(y_pred)
  accuracy = accuracy_score(test_cats, y_pred)
  print(f'Random Forest: Accuracy for test-group {i+1}: {accuracy * 100:.2f}%')

# Implement a Voting strategy

In [None]:
all_test_pred

Validation vote


In [None]:
vote=[]
for i in range(len(all_pred[0])):
  pred_for_each=[]
  for j in range(len(all_pred)):
    pred_for_each.append(all_pred[j][i])
  vote.append(pred_for_each)

In [None]:
np.array(vote).shape

In [None]:
pred_for_each

In [None]:
import numpy as np
vote_result=[]
for i in range(len(vote)):
  # Define the training results
  training_results = np.array(vote[i])

  # Perform voting
  result = np.argmax(np.bincount(training_results))
  vote_result.append(result)

In [None]:
accuracy = accuracy_score(vote_result, val_cats)
print(f'Accuracy: {accuracy * 100:.2f}%')

In [None]:
y_pred

Test vote

In [None]:
test_vote=[]
for i in range(len(all_test_pred[0])):
  pred_for_each=[]
  for j in range(len(all_test_pred)):
    pred_for_each.append(all_test_pred[j][i])
  test_vote.append(pred_for_each)

In [None]:
import numpy as np
vote_result=[]
for i in range(len(test_vote)):
  # Define the training results
  training_results = np.array(test_vote[i])

  # Perform voting
  result = np.argmax(np.bincount(training_results))
  vote_result.append(result)

In [None]:
test_accuracy = accuracy_score(vote_result, test_cats)
print(f'Accuracy: {accuracy * 100:.2f}%')

Visualization of test and validation result

In [None]:
import numpy as np
import matplotlib.pyplot as plt
# Create a figure and subplots
fig, axes = plt.subplots(2, 4, figsize=(10, 6))
k=0
switched_dict = {value: key for key, value in labels.items()}
# Plot each image in the corresponding subplot
print("Validate vote accuracy is {}%".format(accuracy*100))
print("Test vote accuracy is {}%".format(test_accuracy*100))
for i, ax in enumerate(axes.flat):
    ax.imshow(test[i].img)
    ax.axis('off')
    ax.set_title(switched_dict[vote_result[k]])
    k+=1

plt.show()