# Assignment 1 #
Karthik Karavatt
2061996
5

In [148]:
import cv2 as cv
# Read the image
img = cv.imread("digits.png")
# Convert the image to grayscale
img = cv.cvtColor(img, cv.COLOR_BGR2GRAY)

## Task 2.1 Data Preparation 

### 2.1.1
Split the image into 5000 images of size 20x20 pixels each. Each image contains a digit.
Store this data in a numpy array

We know the digit is 20x20 pixels. So we can split the image into 5000 images of size 20x20 pixels each.
As the image is 1000x500 pixels, we can split the image into 50 rows and 100 columns.

In [149]:
import numpy as np
# Split the image into 5000 cells of size 20x20 pixels each
data_cells = np.array([np.hsplit(row, 100) for row in np.vsplit(img, 50)])

data = np.array(data_cells)

### 2.1.2
Split the data into training and test sets. Use the first 1000 images for training and the rest for testing.
Every 5th row is a different digit, there are 5 rows of each digit and the digits go up to 9, we can use one row of each digit for training and the rest for testing. So we take one row from every 5 rows for training and the rest for testing
.

In [150]:
# Select every 5th row for testing
test_data = data[::5]

# Select all but every 5th row for training
train_data = np.delete(data, np.arange(0, data.shape[0], 5), axis=0)

### 2.1.3
Write the images to a folders. The images should be written to the train and test directories.

In [151]:
import os
import shutil
import cv2 as cv

# Define your directory paths
train_dir = 'train'
test_dir = 'test'

# If the directories already exist, remove them and their contents
for dir_path in [train_dir, test_dir]:
    if os.path.exists(dir_path) and os.path.isdir(dir_path):
        shutil.rmtree(dir_path)

    # Create the directories
    os.makedirs(dir_path)

# Save your train and test data to the respective directories as JPEGs

for i in range(train_data.shape[0]):
    for j in range(train_data.shape[1]):
        digit = i // 4  # The digit is determined by integer division of the row index by 4
        row = i % 4  # The row is determined by the remainder of the row index divided by 4
        cv.imwrite(os.path.join(train_dir, f'train_{digit}_{row}_{j}.jpg'), train_data[i, j])

for i in range(test_data.shape[0]):
    for j in range(test_data.shape[1]):
        cv.imwrite(os.path.join(test_dir, f'test_{i}_{j}.jpg'), test_data[i, j])

def load_images_to_array(path):
    # Get the list of files in the directory
    files = os.listdir(path)
    # Sort the files
    files.sort()
    # Load the images into a numpy array
    images = np.array([cv.imread(os.path.join(path, file), cv.IMREAD_GRAYSCALE) for file in files])
    return images

## Task 2.2: Nearest Neighbor method for image classification 

In this task we will use the k - nearest neighbor method to classify the images in the test set.
### 2.2
label the images in the training set. The labels should be the digit in the image.

In [152]:
# Load the images from the train and test directories into numpy arrays
train_data = load_images_to_array(train_dir)
test_data = load_images_to_array(test_dir)

# Reshape the training data to be 2D and convert to float32
train_data = train_data.reshape(-1, 400)
train_data = np.float32(train_data)

# Reshape the test data to be 2D and convert to float32
test_data = test_data.reshape(-1, 400)
test_data = np.float32(test_data)

# Create the labels for the training data
train_labels = np.repeat(np.arange(10), 400).reshape(-1, 1)
# Create the labels for the test data
test_labels = np.repeat(np.arange(10), 100).reshape(-1, 1)


### 2.3 Implement the k - nearest neighbor method to classify the images in the test set.

In [153]:
def get_accuracy_and_confusion_matrix(k):
    # Create the kNN model
    knn = cv.ml.KNearest_create()
    knn.train(train_data, cv.ml.ROW_SAMPLE, train_labels)
    ret, result, neighbours, dist = knn.findNearest(test_data, k=k)
    # Calculate the accuracy of classification
    matches = np.equal(result, test_labels)
    correct = np.count_nonzero(matches)
    accuracy = correct * (100.0 / result.size)
    print(f'Accuracy is {accuracy}%')

    # Convert the test labels and the predicted results to integer type
    test_labels_int = test_labels.astype(int)
    result_int = result.astype(int)

    # Get the number of classes
    num_classes = len(np.unique(test_labels_int))

    # Initialize the confusion matrix
    confusion_matrix = np.zeros((num_classes, num_classes), dtype=np.int32)

    # Populate the confusion matrix
    for j in range(len(test_labels_int)):
        confusion_matrix[test_labels_int[j][0]][result_int[j][0]] += 1

    # Print the confusion matrix in latex format for markdown
    print(f'k = {k}')
    print(r'$\begin{bmatrix}')
    for row in confusion_matrix:
        for i, col in enumerate(row):
            if i == len(row) - 1:
                print(f'{col}\\\\')
            else:
                print(f'{col}&', end='')
    print(r'\end{bmatrix}$')
    print('\n')

get_accuracy_and_confusion_matrix(3)
    

Accuracy is 93.10000000000001%
k = 3
$\begin{bmatrix}
100&0&0&0&0&0&0&0&0&0\\
0&98&1&1&0&0&0&0&0&0\\
2&5&88&0&0&1&0&4&0&0\\
1&0&2&91&0&3&1&2&0&0\\
0&1&0&0&90&1&1&0&0&7\\
0&2&1&6&0&88&1&0&0&2\\
1&0&0&0&0&0&99&0&0&0\\
0&3&0&0&1&0&0&92&0&4\\
0&4&1&1&0&0&0&0&93&1\\
4&0&0&1&2&0&0&1&0&92\\
\end{bmatrix}$


| k  | Accuracy           |
|----|--------------------|
| 1  | 92.60000000000001% |
| 2  | 92.2%              |
| 3  | 93.10000000000001% |
| 4  | 92.7%              |
| 5  | 92.10000000000001% |
| 6  | 92.0%              |
| 7  | 92.0%              |
| 8  | 91.9%              |
| 9  | 91.80000000000001% |
| 10 | 92.0%              |
| 11 | 90.9%              |
| 12 | 90.9%              |
| 13 | 90.80000000000001% |
| 14 | 90.80000000000001% |
| 15 | 90.60000000000001% |
| 16 | 90.10000000000001% |
| 17 | 90.2%              |
| 18 | 90.30000000000001% |
| 19 | 90.10000000000001% |
| 20 | 89.9%              |


k = 1
$\begin{bmatrix}
98&0&2&0&0&0&0&0&0&0\\
0&97&2&1&0&0&0&0&0&0\\
1&3&86&3&1&1&0&4&0&1\\
0&0&0&93&0&2&1&2&2&0\\
0&0&0&0&90&1&0&1&0&8\\
0&1&1&4&0&89&1&0&0&4\\
1&0&0&0&1&1&97&0&0&0\\
0&3&0&0&1&0&0&91&0&5\\
0&2&0&2&0&1&0&0&95&0\\
3&0&0&0&2&0&1&4&0&90\\
\end{bmatrix}$


k = 2
$\begin{bmatrix}
100&0&0&0&0&0&0&0&0&0\\
0&98&2&0&0&0&0&0&0&0\\
3&5&87&0&0&1&0&4&0&0\\
1&0&3&91&0&1&1&2&1&0\\
0&1&0&0&95&0&0&2&0&2\\
0&1&1&9&2&86&0&0&0&1\\
1&1&0&0&1&2&95&0&0&0\\
0&4&0&0&2&0&0&93&0&1\\
0&5&1&1&0&3&0&0&90&0\\
4&0&0&1&4&0&0&4&0&87\\
\end{bmatrix}$


k = 3
$\begin{bmatrix}
100&0&0&0&0&0&0&0&0&0\\
0&98&1&1&0&0&0&0&0&0\\
2&5&88&0&0&1&0&4&0&0\\
1&0&2&91&0&3&1&2&0&0\\
0&1&0&0&90&1&1&0&0&7\\
0&2&1&6&0&88&1&0&0&2\\
1&0&0&0&0&0&99&0&0&0\\
0&3&0&0&1&0&0&92&0&4\\
0&4&1&1&0&0&0&0&93&1\\
4&0&0&1&2&0&0&1&0&92\\
\end{bmatrix}$


k = 4
$\begin{bmatrix}
100&0&0&0&0&0&0&0&0&0\\
0&97&1&1&1&0&0&0&0&0\\
2&7&86&0&0&1&0&4&0&0\\
1&0&2&92&0&1&1&2&1&0\\
0&1&0&0&93&0&1&0&0&5\\
0&2&1&4&0&90&1&0&0&2\\
1&1&0&0&0&0&98&0&0&0\\
0&5&0&0&1&0&0&91&0&3\\
0&3&0&2&0&0&0&0&94&1\\
1&0&1&2&5&0&2&3&0&86\\
\end{bmatrix}$


k = 5
$\begin{bmatrix}
99&0&0&0&0&0&1&0&0&0\\
0&98&0&1&0&1&0&0&0&0\\
2&6&86&0&0&1&0&5&0&0\\
1&0&2&91&0&2&1&2&1&0\\
0&2&0&0&90&1&2&0&0&5\\
0&1&1&5&0&88&2&0&0&3\\
1&1&0&0&0&0&98&0&0&0\\
0&4&0&0&1&0&0&92&0&3\\
0&3&1&3&0&1&1&0&90&1\\
1&0&1&1&2&0&2&4&0&89\\
\end{bmatrix}$


k = 6
$\begin{bmatrix}
99&0&0&0&0&0&1&0&0&0\\
0&98&0&1&0&1&0&0&0&0\\
3&7&86&0&0&0&0&4&0&0\\
0&0&2&92&0&1&1&2&1&1\\
0&2&0&0&92&0&2&0&0&4\\
0&1&1&6&1&88&1&0&0&2\\
1&1&0&0&0&0&98&0&0&0\\
0&5&0&0&1&0&0&90&0&4\\
0&4&1&2&0&2&1&0&89&1\\
3&0&0&2&2&1&0&4&0&88\\
\end{bmatrix}$


k = 7
$\begin{bmatrix}
98&0&0&0&0&2&0&0&0&0\\
0&98&1&1&0&0&0&0&0&0\\
2&7&84&0&0&1&0&6&0&0\\
0&0&2&92&0&1&1&2&1&1\\
0&2&0&0&92&0&2&0&0&4\\
0&1&1&5&0&88&2&0&0&3\\
1&1&0&0&0&0&98&0&0&0\\
0&5&0&0&0&0&0&92&0&3\\
0&4&0&4&0&1&1&0&89&1\\
2&0&0&1&2&1&0&5&0&89\\
\end{bmatrix}$


k = 8
$\begin{bmatrix}
99&0&0&0&0&0&1&0&0&0\\
0&97&2&0&1&0&0&0&0&0\\
2&8&85&0&0&1&0&4&0&0\\
0&0&2&92&0&1&1&2&1&1\\
0&3&0&0&90&0&2&0&0&5\\
0&1&1&7&0&87&1&0&0&3\\
1&1&0&0&0&0&98&0&0&0\\
0&5&0&0&0&0&0&92&0&3\\
0&4&0&2&0&1&1&0&91&1\\
3&0&0&1&2&0&1&5&0&88\\
\end{bmatrix}$


k = 9
$\begin{bmatrix}
97&0&0&0&0&2&1&0&0&0\\
0&98&0&1&0&1&0&0&0&0\\
2&9&84&0&0&1&0&4&0&0\\
0&0&2&91&0&2&1&2&1&1\\
0&3&0&0&90&0&2&0&0&5\\
0&1&1&6&0&88&1&0&0&3\\
1&1&0&0&0&0&98&0&0&0\\
0&5&0&0&0&0&0&92&0&3\\
0&4&0&2&0&2&1&0&90&1\\
2&0&0&1&1&1&0&5&0&90\\
\end{bmatrix}$


k = 10
$\begin{bmatrix}
98&0&0&0&0&1&1&0&0&0\\
0&98&0&1&0&1&0&0&0&0\\
2&9&84&0&0&1&0&4&0&0\\
1&0&2&92&0&1&1&2&0&1\\
0&3&0&0&90&0&2&0&0&5\\
0&1&1&7&1&87&1&0&0&2\\
1&1&0&0&0&0&98&0&0&0\\
0&5&0&0&0&0&0&92&0&3\\
0&4&0&2&0&2&1&0&90&1\\
2&0&0&1&1&1&0&4&0&91\\
\end{bmatrix}$


k = 11
$\begin{bmatrix}
96&0&0&0&0&2&2&0&0&0\\
0&98&0&0&0&1&0&0&0&1\\
3&9&82&0&0&1&0&5&0&0\\
1&1&1&90&0&2&1&2&0&2\\
0&3&0&0&90&0&2&0&0&5\\
0&1&1&7&0&86&1&0&1&3\\
1&1&0&0&0&0&98&0&0&0\\
0&6&0&0&1&0&0&90&0&3\\
0&4&0&2&0&2&1&0&90&1\\
2&0&0&1&2&1&0&5&0&89\\
\end{bmatrix}$


k = 12
$\begin{bmatrix}
96&0&0&0&0&3&1&0&0&0\\
0&97&0&0&1&1&0&0&0&1\\
3&8&82&0&0&1&0&6&0&0\\
1&1&2&90&0&2&1&2&0&1\\
0&4&0&0&88&1&2&0&0&5\\
0&1&1&5&0&89&1&0&0&3\\
1&1&0&0&0&0&98&0&0&0\\
0&6&0&0&1&0&0&90&0&3\\
0&4&0&2&0&2&1&0&90&1\\
2&0&0&1&3&0&1&4&0&89\\
\end{bmatrix}$


k = 13
$\begin{bmatrix}
96&0&0&0&0&2&2&0&0&0\\
0&98&0&0&0&1&0&0&0&1\\
3&9&80&0&0&1&0&7&0&0\\
1&1&1&91&0&1&1&3&0&1\\
0&4&0&0&90&0&2&0&0&4\\
0&1&1&5&1&88&1&0&0&3\\
1&1&0&0&0&0&98&0&0&0\\
0&6&0&0&0&0&0&91&0&3\\
0&3&0&4&0&3&1&0&88&1\\
2&0&1&1&2&0&1&5&0&88\\
\end{bmatrix}$


k = 14
$\begin{bmatrix}
96&0&0&0&0&2&2&0&0&0\\
0&98&0&0&0&1&0&1&0&0\\
2&9&81&0&0&1&0&6&1&0\\
1&1&1&91&0&1&1&3&0&1\\
0&4&0&0&87&1&2&0&0&6\\
0&1&1&5&1&88&1&0&0&3\\
1&1&0&0&0&0&98&0&0&0\\
0&6&0&0&0&0&0&91&0&3\\
0&3&0&3&0&3&1&0&89&1\\
2&0&0&1&3&0&1&4&0&89\\
\end{bmatrix}$


k = 15
$\begin{bmatrix}
96&0&0&0&0&2&2&0&0&0\\
0&97&0&0&1&1&0&0&0&1\\
2&9&81&0&0&1&0&6&0&1\\
1&1&1&91&0&1&1&3&0&1\\
0&4&0&0&90&0&2&0&0&4\\
0&1&1&6&0&86&2&0&0&4\\
1&1&0&0&0&0&98&0&0&0\\
0&7&0&0&1&0&0&88&0&4\\
0&3&0&3&0&3&1&0&89&1\\
2&0&0&1&1&0&1&5&0&90\\
\end{bmatrix}$


k = 16
$\begin{bmatrix}
96&0&0&0&0&2&2&0&0&0\\
0&97&0&0&1&1&0&0&0&1\\
2&9&80&0&0&1&0&6&1&1\\
1&1&1&89&0&2&1&3&0&2\\
0&4&0&0&88&0&2&0&0&6\\
0&1&1&5&0&88&1&0&0&4\\
1&1&0&0&0&1&97&0&0&0\\
0&7&0&0&1&0&0&88&0&4\\
0&5&0&4&0&2&1&0&87&1\\
2&0&0&1&1&0&1&4&0&91\\
\end{bmatrix}$


k = 17
$\begin{bmatrix}
96&0&0&0&0&2&2&0&0&0\\
0&97&0&0&1&1&0&1&0&0\\
3&9&79&0&0&1&0&6&1&1\\
1&1&2&87&0&2&1&3&0&3\\
0&4&0&0&90&0&2&0&0&4\\
0&1&1&6&1&86&2&0&0&3\\
1&1&0&0&0&1&97&0&0&0\\
0&6&0&0&1&0&0&90&0&3\\
0&3&0&3&0&3&1&0&89&1\\
2&0&0&1&1&0&1&4&0&91\\
\end{bmatrix}$


k = 18
$\begin{bmatrix}
96&0&0&0&0&2&2&0&0&0\\
0&97&0&0&1&1&0&1&0&0\\
2&9&81&0&0&1&0&5&1&1\\
1&1&2&85&0&3&1&3&0&4\\
0&4&0&0&90&0&2&0&0&4\\
0&1&1&6&1&87&1&0&0&3\\
1&1&0&0&0&1&97&0&0&0\\
0&6&0&0&1&0&0&90&0&3\\
0&3&0&3&0&3&1&0&89&1\\
2&1&0&1&1&0&1&3&0&91\\
\end{bmatrix}$


k = 19
$\begin{bmatrix}
96&0&0&0&0&2&2&0&0&0\\
0&97&0&0&1&1&0&1&0&0\\
2&9&81&0&0&1&0&5&1&1\\
1&1&2&85&0&3&1&3&0&4\\
0&4&0&0&91&0&2&0&0&3\\
0&1&1&6&1&86&2&0&0&3\\
1&1&0&0&0&1&97&0&0&0\\
0&7&0&0&1&0&0&89&0&3\\
0&3&0&3&0&3&1&0&89&1\\
2&1&0&1&1&0&1&4&0&90\\
\end{bmatrix}$


k = 20
$\begin{bmatrix}
96&0&1&0&0&2&1&0&0&0\\
0&97&0&0&1&1&0&0&0&1\\
2&9&81&0&0&1&0&5&1&1\\
1&1&2&85&0&3&1&3&0&4\\
0&4&0&0&90&0&2&0&0&4\\
0&1&1&6&1&87&1&0&0&3\\
1&1&0&0&0&1&97&0&0&0\\
0&7&0&0&1&0&0&89&0&3\\
0&4&0&3&0&4&1&0&87&1\\
2&1&0&1&1&0&1&4&0&90\\
\end{bmatrix}$



### 2.3 Linear classifier for image classification
We will use support vector machines to classify the images in the test set.

In [163]:
# load the data
train_data = load_images_to_array(train_dir)
test_data = load_images_to_array(test_dir)
# create the labels
train_labels = np.repeat(np.arange(10), 400).reshape(-1, 1)
test_labels = np.repeat(np.arange(10), 100).reshape(-1, 1)
# reshape the data
train_data = train_data.reshape(-1, 400)
train_data = np.float32(train_data)
test_data = test_data.reshape(-1, 400)
test_data = np.float32(test_data)

# train SVM
svm = cv.ml.SVM_create()
svm.setKernel(cv.ml.SVM_LINEAR)
svm.setType(cv.ml.SVM_C_SVC)
svm.setTermCriteria((cv.TERM_CRITERIA_MAX_ITER, 100, 1e-6))
svm.train(train_data, cv.ml.ROW_SAMPLE, train_labels)

# predict the labels
result = svm.predict(test_data)[1]
# calculate the accuracy
matches = np.equal(result, test_labels)
correct = np.count_nonzero(matches)
accuracy = correct * (100.0 / result.size)
print(f'Accuracy is {accuracy}% ')


Accuracy is 87.10000000000001%


The accuracy is lower than the k nearest neighbor method when k = 3 

### 2.4 Image classification using a bag of visual words