In [1]:
import cv2
import math
import multiprocessing
import nbimporter
import numpy as np
import os, time
import queue
import random
import scipy.ndimage
import scipy.spatial.distance
import skimage
from skimage import io
import sklearn.cluster
import threading

from ipynb.fs.defs.p1 import compute_dictionary_one_image, get_visual_words
from ipynb.fs.defs.p2 import get_image_feature, distance_to_set
from ipynb.fs.defs.p3 import evaluate_recognition_system
from util import display_filter_responses

### Q3.1.3 [Extra Credit](10 points) Manually Graded:

Now that you have seen how well your recognition system can perform on a set of real images, you can experiment with different ways of improving this baseline system. Here are a few suggestions:

* Hyperparameter Tuning: here is a list of hypterparameters in the system that you can tune to get better performance for your system:
        
        * `filter_scales`: a list of filter scales used in extracting filter response;
        * `K`: the number of visual words and also the size of the dictionary;
        * `alpha`: the number of sampled pixels in each image when creating the dictionary;
        * `L`: the number of spatial pyramid layers used in feature extraction.
        
* Image manipulation: Try using image augmentation techniques such as random-crop, flipping, etc. to obtain more training data for your system. You can also try resizing the images, subtracting the mean color, etc. 

* Better classifier: in part 2 we used the nearest neighbor classifier to classify test images. However, with our extracted SPM features from training images, we can use other classifiers such as multi-class logistic regression, multi-class support vector machine, etc. to gain better performance. For this, you can use implementation of these algorithms from `scipy`.


Tune the system you build to reach around 65\% accuracy on the provided test set (``data/test_data.npz``). <font color="blue">**In your writeup, document what you did to achieve such performance: (1) what you did, (2) what you expected would happen, and (3) what actually happened.** Also, include a file called ``custom.py/ipynb`` for running your code. </font>

In [2]:
# Not changing anything about filters for now
ALPHA = 150
N_CLUSTERS = 600  # Was 200
SPM_LAYER_NUM = 3  # L is SPM_LAYER_NUM-1

In [3]:
def compute_dictionary(num_workers=6):
    '''
    Creates the dictionary of visual words by clustering using k-means.

    [input]
    * num_workers: number of workers to process in parallel

    [saved]
    * dictionary: numpy.ndarray of shape (K,3F)
    '''

    train_data = np.load("./data/train_data.npz")
    # ----- TODO -----
    list_of_args = []
    
    image_names = train_data['files']
    num_images = image_names.shape[0]
    print("Processing {} images total".format(num_images))
    print("=" * 80)

    for i in range(num_images):
        full_image_name = './data/' + image_names[i]
        list_of_args.append([i, ALPHA, full_image_name])

    with multiprocessing.Pool(num_workers) as p:
        p.map(compute_dictionary_one_image, list_of_args)

    '''
    HINTS:
    
    1.> Use multiprocessing for parallel processing of elements
    2.> Next, load the tmp files and stack the responses stored as npy
    '''
    filter_responses = np.concatenate(
        [np.load('tmp/%05d.npy' % i) for i in range(num_images)], 
        axis=0,
    )
    
    '''
    HINTS:
    1.> use sklearn.cluster.KMeans for clustersing
    2.> dictionary will be the cluster_centers_
    '''
    print("Starting the cluster operation")
    kmeans = sklearn.cluster.KMeans(n_clusters=N_CLUSTERS).fit(filter_responses)
    np.save('custom_dictionary.npy', kmeans.cluster_centers_)
    print("Finished clustering, wrote dictionary")


compute_dictionary()

Processing 1000 images total
Saved 42, 150, ./data/park/sun_argnmdrhdmnzdmza.jpg to tmp/00042.npy
Saved 168, 150, ./data/desert/sun_bcorncjicjbueeqd.jpg to tmp/00168.npy
Saved 84, 150, ./data/desert/sun_bwbeayteffpmoluy.jpg to tmp/00084.npy
Saved 0, 150, ./data/windmill/sun_agudhwulyxcizdjv.jpg to tmp/00000.npy
Saved 210, 150, ./data/laundromat/sun_ailkrcpigmkhnqkm.jpg to tmp/00210.npy
Saved 126, 150, ./data/highway/sun_bejktkrdtjvdxhpz.jpg to tmp/00126.npy
Saved 211, 150, ./data/aquarium/sun_aunixsaxzlrueygg.jpg to tmp/00211.npy
Saved 127, 150, ./data/desert/sun_afferxhafrjnpuri.jpg to tmp/00127.npy
Saved 43, 150, ./data/kitchen/sun_aomzbprctpzqwjzj.jpg to tmp/00043.npy
Saved 169, 150, ./data/desert/sun_byobuhsgvryskzzl.jpg to tmp/00169.npy
Saved 1, 150, ./data/laundromat/sun_aodzngrfoqxcuxiq.jpg to tmp/00001.npy
Saved 85, 150, ./data/highway/sun_boyvqutltxispgdo.jpg to tmp/00085.npy
Saved 44, 150, ./data/waterfall/sun_apjthvdrlhsmywmi.jpg to tmp/00044.npy
Saved 170, 150, ./data/park/

Saved 17, 150, ./data/park/labelme_jsjzaunwfazsmjx.jpg to tmp/00017.npy
Saved 60, 150, ./data/highway/sun_asexmybwhofzjwno.jpg to tmp/00060.npy
Saved 102, 150, ./data/highway/sun_avpwctedcdxoikod.jpg to tmp/00102.npy
Saved 18, 150, ./data/windmill/sun_bbhwehaaapkoyhqm.jpg to tmp/00018.npy
Saved 188, 150, ./data/aquarium/sun_ahbydirwmqznizud.jpg to tmp/00188.npy
Saved 145, 150, ./data/park/labelme_earnkljoflmdqyb.jpg to tmp/00145.npy
Saved 229, 150, ./data/kitchen/sun_ajgisfszhervnyxr.jpg to tmp/00229.npy
Saved 61, 150, ./data/kitchen/sun_abubevclcmcjqgae.jpg to tmp/00061.npy
Saved 189, 150, ./data/waterfall/sun_begqmgtlnyhedlpy.jpg to tmp/00189.npy
Saved 103, 150, ./data/aquarium/sun_aegjsohwfecizxjd.jpg to tmp/00103.npy
Saved 230, 150, ./data/aquarium/sun_aswmxnajswweurre.jpg to tmp/00230.npy
Saved 146, 150, ./data/waterfall/sun_ajkybsokdhevbyaw.jpg to tmp/00146.npy
Saved 19, 150, ./data/windmill/sun_bsngeuxxmgmcsesp.jpg to tmp/00019.npy
Saved 62, 150, ./data/highway/sun_beakjawckqywu

Saved 78, 150, ./data/kitchen/sun_aqmitamtngunumli.jpg to tmp/00078.npy
Saved 246, 150, ./data/kitchen/sun_azkalqubuavyhanp.jpg to tmp/00246.npy
Saved 165, 150, ./data/highway/sun_bhcqciazzelbazep.jpg to tmp/00165.npy
Saved 123, 150, ./data/laundromat/sun_arhskcrgwkddthtx.jpg to tmp/00123.npy
Saved 35, 150, ./data/waterfall/sun_aubqmdekaxxzvkig.jpg to tmp/00035.npy
Saved 206, 150, ./data/park/labelme_wjxputzultzgxie.jpg to tmp/00206.npy
Saved 166, 150, ./data/laundromat/sun_agpkwtvivpjrnjjq.jpg to tmp/00166.npy
Saved 79, 150, ./data/windmill/sun_bgberbtekrbujrju.jpg to tmp/00079.npy
Saved 247, 150, ./data/aquarium/sun_avuuhyyjdrhcfzlu.jpg to tmp/00247.npy
Saved 124, 150, ./data/aquarium/sun_awgplcderwsdwkqd.jpg to tmp/00124.npy
Saved 80, 150, ./data/kitchen/sun_afqpfzmhhuegedtn.jpg to tmp/00080.npy
Saved 36, 150, ./data/aquarium/sun_albheujeaogefcsy.jpg to tmp/00036.npy
Saved 248, 150, ./data/laundromat/sun_akherkavsiteuceb.jpg to tmp/00248.npy
Saved 207, 150, ./data/laundromat/sun_avr

Saved 435, 150, ./data/waterfall/sun_aumtoinjhttkanhi.jpg to tmp/00435.npy
Saved 267, 150, ./data/kitchen/sun_azgxmfkamfoixcqm.jpg to tmp/00267.npy
Saved 349, 150, ./data/desert/sun_bnvjjlyzatkvovei.jpg to tmp/00349.npy
Saved 308, 150, ./data/kitchen/sun_aiwjjxhtgtrwjdsf.jpg to tmp/00308.npy
Saved 473, 150, ./data/aquarium/sun_auhbzrixgbnkynar.jpg to tmp/00473.npy
Saved 392, 150, ./data/laundromat/sun_azajlzmaakxwpzks.jpg to tmp/00392.npy
Saved 268, 150, ./data/windmill/sun_bdtchkepcotagfds.jpg to tmp/00268.npy
Saved 436, 150, ./data/highway/sun_aagkjhignpmigxkv.jpg to tmp/00436.npy
Saved 269, 150, ./data/waterfall/sun_aghcxbzinxkqdans.jpg to tmp/00269.npy
Saved 350, 150, ./data/kitchen/sun_aobyfgpmnvnnuolx.jpg to tmp/00350.npy
Saved 474, 150, ./data/park/labelme_hmiuzegsjzzqwkq.jpg to tmp/00474.npy
Saved 309, 150, ./data/aquarium/sun_akjpgxvhcvynabhn.jpg to tmp/00309.npy
Saved 393, 150, ./data/highway/sun_amgfowdqviytyqct.jpg to tmp/00393.npy
Saved 351, 150, ./data/desert/sun_akvwwvuz

Saved 452, 150, ./data/aquarium/sun_ahyyislfsjgqxbmo.jpg to tmp/00452.npy
Saved 287, 150, ./data/kitchen/sun_ayakdnbiuvnibwap.jpg to tmp/00287.npy
Saved 366, 150, ./data/desert/sun_aloizwznuhnkgfby.jpg to tmp/00366.npy
Saved 409, 150, ./data/waterfall/sun_awfkpifyxaxuwswq.jpg to tmp/00409.npy
Saved 327, 150, ./data/windmill/sun_bhexonkjpkebkqou.jpg to tmp/00327.npy
Saved 494, 150, ./data/windmill/sun_bjdnqlyksnkbjcap.jpg to tmp/00494.npy
Saved 328, 150, ./data/aquarium/sun_akyriexvlsmqmlcr.jpg to tmp/00328.npy
Saved 367, 150, ./data/aquarium/sun_afvxbfryrzzpcifr.jpg to tmp/00367.npy
Saved 453, 150, ./data/highway/sun_aohtjpajmfyipfbo.jpg to tmp/00453.npy
Saved 410, 150, ./data/kitchen/sun_aqrdtsxisrulfusj.jpg to tmp/00410.npy
Saved 288, 150, ./data/highway/sun_blghdewmpheqljet.jpg to tmp/00288.npy
Saved 495, 150, ./data/park/labelme_ibbymekfwcwqmqe.jpg to tmp/00495.npy
Saved 454, 150, ./data/waterfall/sun_aicxciesluvbngzg.jpg to tmp/00454.npy
Saved 368, 150, ./data/waterfall/sun_apepjy

Saved 554, 150, ./data/highway/sun_aikloxuincjpmnby.jpg to tmp/00554.npy
Saved 513, 150, ./data/laundromat/sun_auassvantyxikpvl.jpg to tmp/00513.npy
Saved 640, 150, ./data/desert/sun_ajpunlbvbjnwwoxn.jpg to tmp/00640.npy
Saved 681, 150, ./data/highway/sun_bwojhutovsywaapp.jpg to tmp/00681.npy
Saved 596, 150, ./data/laundromat/sun_avvffykmzlawufhs.jpg to tmp/00596.npy
Saved 722, 150, ./data/desert/sun_bygjynspaxnvwvyk.jpg to tmp/00722.npy
Saved 555, 150, ./data/laundromat/sun_aykyvbyvywaysnkx.jpg to tmp/00555.npy
Saved 514, 150, ./data/windmill/sun_bbeuudsvoakejtcw.jpg to tmp/00514.npy
Saved 597, 150, ./data/aquarium/sun_akmrxdeeztdccihv.jpg to tmp/00597.npy
Saved 682, 150, ./data/laundromat/sun_aqtxuwpwcozkpjsu.jpg to tmp/00682.npy
Saved 641, 150, ./data/windmill/sun_beeuabqrqyboliqz.jpg to tmp/00641.npy
Saved 723, 150, ./data/windmill/sun_buoutmofnezgokzo.jpg to tmp/00723.npy
Saved 556, 150, ./data/laundromat/sun_aqvlbpodygadjhqg.jpg to tmp/00556.npy
Saved 515, 150, ./data/kitchen/sun

Saved 571, 150, ./data/aquarium/sun_ahkoddwbeunoomwa.jpg to tmp/00571.npy
Saved 699, 150, ./data/windmill/sun_bjjwhpajbvwfimfc.jpg to tmp/00699.npy
Saved 740, 150, ./data/windmill/sun_bybdvwzyfhxtuvtf.jpg to tmp/00740.npy
Saved 660, 150, ./data/waterfall/sun_bbfntiruonnefmkt.jpg to tmp/00660.npy
Saved 531, 150, ./data/highway/sun_bpfnisagawgmjznp.jpg to tmp/00531.npy
Saved 700, 150, ./data/laundromat/sun_akledoavsnmncxkd.jpg to tmp/00700.npy
Saved 741, 150, ./data/aquarium/sun_axpzpvgtvwzunxhy.jpg to tmp/00741.npy
Saved 615, 150, ./data/highway/sun_baoqmontgzgcvklg.jpg to tmp/00615.npy
Saved 532, 150, ./data/waterfall/sun_ariqkpbnyitlvwdi.jpg to tmp/00532.npy
Saved 572, 150, ./data/windmill/sun_bimpqkhrzlokwldq.jpg to tmp/00572.npy
Saved 616, 150, ./data/waterfall/sun_advmohdjsqjtynxr.jpg to tmp/00616.npy
Saved 742, 150, ./data/kitchen/sun_anzbjguxdgpobefx.jpg to tmp/00742.npy
Saved 661, 150, ./data/highway/sun_byrykbmnxeackski.jpg to tmp/00661.npy
Saved 701, 150, ./data/kitchen/sun_al

Saved 762, 150, ./data/desert/sun_bnyuunzbgpuyrcxm.jpg to tmp/00762.npy
Saved 966, 150, ./data/windmill/sun_bvtbnfulhkixbvev.jpg to tmp/00966.npy
Saved 802, 150, ./data/kitchen/sun_aenotlpyftoioydp.jpg to tmp/00802.npy
Saved 763, 150, ./data/park/sun_bjmcizwdiixltjaw.jpg to tmp/00763.npy
Saved 887, 150, ./data/desert/sun_bssrzuslposilujg.jpg to tmp/00887.npy
Saved 926, 150, ./data/waterfall/sun_aqlrhdmixnakvcfq.jpg to tmp/00926.npy
Saved 844, 150, ./data/kitchen/sun_aqlpfaorsvofqfuy.jpg to tmp/00844.npy
Saved 803, 150, ./data/kitchen/sun_agorklincwxrdtga.jpg to tmp/00803.npy
Saved 967, 150, ./data/aquarium/sun_auqfioruanbaaarh.jpg to tmp/00967.npy
Saved 888, 150, ./data/highway/sun_bckmblqgdjkaojjd.jpg to tmp/00888.npy
Saved 845, 150, ./data/aquarium/sun_absqijdywpmqtdtj.jpg to tmp/00845.npy
Saved 764, 150, ./data/kitchen/sun_afbibxsnpvkmvhxn.jpg to tmp/00764.npy
Saved 927, 150, ./data/park/labelme_entwioeipyqzcaa.jpg to tmp/00927.npy
Saved 889, 150, ./data/waterfall/sun_advocchixohdoi

Saved 944, 150, ./data/highway/sun_bnvaatlldzndddpg.jpg to tmp/00944.npy
Saved 905, 150, ./data/highway/sun_btvskgegczcrfwwa.jpg to tmp/00905.npy
Saved 821, 150, ./data/laundromat/sun_ahatmjfslrvdayyf.jpg to tmp/00821.npy
Saved 781, 150, ./data/waterfall/sun_aecgdxztcovcpyvx.jpg to tmp/00781.npy
Saved 906, 150, ./data/waterfall/sun_aastyysdvtnkdcvt.jpg to tmp/00906.npy
Saved 862, 150, ./data/highway/sun_bvmqekshrpdclajj.jpg to tmp/00862.npy
Saved 985, 150, ./data/kitchen/sun_arffihzwrfsleobm.jpg to tmp/00985.npy
Saved 945, 150, ./data/aquarium/sun_aashbnpnxflmstxu.jpg to tmp/00945.npy
Saved 907, 150, ./data/kitchen/sun_atgzwnczaxwddrai.jpg to tmp/00907.npy
Saved 822, 150, ./data/aquarium/sun_aezldsdrgzfkuwhw.jpg to tmp/00822.npy
Saved 782, 150, ./data/highway/sun_bvgsyrrrvqwmclew.jpg to tmp/00782.npy
Saved 908, 150, ./data/laundromat/sun_aodwvqchicnebypb.jpg to tmp/00908.npy
Saved 863, 150, ./data/desert/sun_awhsntyjdsqbntyr.jpg to tmp/00863.npy
Saved 986, 150, ./data/highway/sun_bloie

Saved 965, 150, ./data/waterfall/sun_bjgujnjeakwxtjtz.jpg to tmp/00965.npy
Starting the cluster operation
Finished clustering, wrote dictionary


In [4]:
# def write_image_feature(args):
#     file_path, dictionary, spm_layer_num, K, i, ordered = args
#     _, feature = get_image_feature(file_path,
#                                    dictionary,
#                                    spm_layer_num,
#                                    K)
#     ordered[i, :] = feature
#     print(f"Processed {i}")


def build_recognition_system(num_workers=6):
    '''
    Creates a trained recognition system by generating training features from all training images.

    [input]
    * num_workers: number of workers to process in parallel

    [saved]
    * features: numpy.ndarray of shape (N,M)
    * labels: numpy.ndarray of shape (N)
    * dictionary: numpy.ndarray of shape (K,3F)
    * SPM_layer_num: number of spatial pyramid layers
    '''

    train_data = np.load("./data/train_data.npz")
    dictionary = np.load("custom_dictionary.npy")

    num_images = train_data["labels"].shape[0]
    num_words = dictionary.shape[0]
    feature_len = num_words * int((4**SPM_LAYER_NUM - 1) / 3)

    ordered_features = np.zeros((num_images, feature_len))
    
    # This did not appear to work, I think the write mechanisms probably
    # doesn't work in a shared fashion
    # list_of_args = [
    #     [
    #         "./data/" + file_path,
    #         dictionary,
    #         SPM_LAYER_NUM,
    #         num_words,
    #         i,
    #         ordered_features,
    #     ]
    #     for i, file_path in enumerate(train_data["files"])
    # ]
    # with multiprocessing.Pool(num_workers) as p:
    #     p.map(write_image_feature, list_of_args)

    for i, file_path in enumerate(train_data["files"]):
        _, feature = get_image_feature("./data/" + file_path,
                                       dictionary,
                                       SPM_LAYER_NUM,
                                       K=num_words)
        ordered_features[i, :] = feature
        print(f"Processed {i}")
    
    np.savez('custom_trained_system.npz',
             features=ordered_features,
             labels=train_data["labels"],
             dictionary=dictionary,
             SPM_layer_num=SPM_LAYER_NUM)


build_recognition_system()

Processed 0
Processed 1
Processed 2
Processed 3
Processed 4
Processed 5
Processed 6
Processed 7
Processed 8
Processed 9
Processed 10
Processed 11
Processed 12
Processed 13
Processed 14
Processed 15
Processed 16
Processed 17
Processed 18
Processed 19
Processed 20
Processed 21
Processed 22
Processed 23
Processed 24
Processed 25
Processed 26
Processed 27
Processed 28
Processed 29
Processed 30
Processed 31
Processed 32
Processed 33
Processed 34
Processed 35
Processed 36
Processed 37
Processed 38
Processed 39
Processed 40
Processed 41
Processed 42
Processed 43
Processed 44
Processed 45
Processed 46
Processed 47
Processed 48
Processed 49
Processed 50
Processed 51
Processed 52
Processed 53
Processed 54
Processed 55
Processed 56
Processed 57
Processed 58
Processed 59
Processed 60
Processed 61
Processed 62
Processed 63
Processed 64
Processed 65
Processed 66
Processed 67
Processed 68
Processed 69
Processed 70
Processed 71
Processed 72
Processed 73
Processed 74
Processed 75
Processed 76
Processed

Processed 594
Processed 595
Processed 596
Processed 597
Processed 598
Processed 599
Processed 600
Processed 601
Processed 602
Processed 603
Processed 604
Processed 605
Processed 606
Processed 607
Processed 608
Processed 609
Processed 610
Processed 611
Processed 612
Processed 613
Processed 614
Processed 615
Processed 616
Processed 617
Processed 618
Processed 619
Processed 620
Processed 621
Processed 622
Processed 623
Processed 624
Processed 625
Processed 626
Processed 627
Processed 628
Processed 629
Processed 630
Processed 631
Processed 632
Processed 633
Processed 634
Processed 635
Processed 636
Processed 637
Processed 638
Processed 639
Processed 640
Processed 641
Processed 642
Processed 643
Processed 644
Processed 645
Processed 646
Processed 647
Processed 648
Processed 649
Processed 650
Processed 651
Processed 652
Processed 653
Processed 654
Processed 655
Processed 656
Processed 657
Processed 658
Processed 659
Processed 660
Processed 661
Processed 662
Processed 663
Processed 664
Proces

In [5]:
conf_matrix, accuracy = evaluate_recognition_system(trained_sys="custom_trained_system.npz")
print("confusion matrix")
print(conf_matrix)
print("accuracy")
print(accuracy)

Trained features shape:  (1000, 12600)
Processing 160 test images
Processed index 0
Processed index 1
Processed index 2
Processed index 3
Processed index 4
Processed index 5
Processed index 6
Processed index 7
Processed index 8
Processed index 9
Processed index 10
Processed index 11
Processed index 12
Processed index 13
Processed index 14
Processed index 15
Processed index 16
Processed index 17
Processed index 18
Processed index 19
Processed index 20
Processed index 21
Processed index 22
Processed index 23
Processed index 24
Processed index 25
Processed index 26
Processed index 27
Processed index 28
Processed index 29
Processed index 30
Processed index 31
Processed index 32
Processed index 33
Processed index 34
Processed index 35
Processed index 36
Processed index 37
Processed index 38
Processed index 39
Processed index 40
Processed index 41
Processed index 42
Processed index 43
Processed index 44
Processed index 45
Processed index 46
Processed index 47
Processed index 48
Processed ind