In [None]:
#Load Modules
import os       
import cv2
import math
import random
import numpy as np

import matplotlib.pyplot as plt
%matplotlib inline
from sklearn.model_selection import train_test_split

import pandas as pd

import matplotlib.image as mpimg
%matplotlib inline

from google.colab import drive    
drive.mount('/content/drive')


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
def center_crop(img, dim):
	"""Returns center cropped image
	Args:
	img: image to be center cropped
	dim: dimensions (width, height) to be cropped
	"""
	width, height = img.shape[1], img.shape[0]

	# process crop width and height for max available dimension
	crop_width = dim[0] if dim[0]<img.shape[1] else img.shape[1]
	crop_height = dim[1] if dim[1]<img.shape[0] else img.shape[0] 
	mid_x, mid_y = int(width/2), int(height/2)
	cw2, ch2 = int(crop_width/2), int(crop_height/2) 
	crop_img = img[mid_y-ch2:mid_y+ch2, mid_x-cw2:mid_x+cw2]
	return crop_img

In [None]:
#Load Training Images

IMG_WIDTH=16
IMG_HEIGHT=16
img_folder=r"/content/drive/MyDrive/training"

def create_dataset(img_folder):

    img_data_array=[]
    class_name=[]
   
    for dir1 in os.listdir(img_folder):
        for file in os.listdir(os.path.join(img_folder, dir1)):                               #select each image in current dir
            image_path= os.path.join(img_folder, dir1,  file)                   
            image= cv2.imread( image_path, 0)                                                 #read current image
            image = center_crop(image, (100,100))                                             #crop to centre
            image=cv2.resize(image, (IMG_HEIGHT, IMG_WIDTH),interpolation = cv2.INTER_AREA)   #resize image to 16x16
            image=np.array(image).flatten()                                                   #flatten into a 1d array
            image = (image - np.mean(image))/np.std(image)                                    #normalize to zero mean and unit variance
            img_data_array.append(image)                                                      #save current image
            class_name.append(dir1)                                                           #save current class
    return img_data_array, class_name

  # extract the image array and class name
img_data, class_name =create_dataset(r'/content/drive/MyDrive/training')                      #generate dataset

In [None]:
#convert list to numpy array
x=np.array(img_data)
#assign unique values to each class
target_dict={k: v for v, k in enumerate(np.unique(class_name))}
target_val=  [target_dict[class_name[i]] for i in range(len(class_name))]

In [None]:
#testing display image
#from skimage import io
#import numpy as np
#io.imshow(img_data[443])

In [None]:
#testing
img_data[0]

array([ 1.29657875,  0.91705096, -0.51973281, -0.60106019,  0.99837834,
       -0.08598676,  2.10985258,  1.350797  ,  0.15799538,  1.67610653,
        0.18510451,  0.21221364,  1.5405609 , -0.43840542,  1.59477915,
        0.75439619,  0.59174143,  1.48634264, -1.76675268,  0.91705096,
       -0.05887764,  1.02548747,  2.1369617 ,  1.26946962,  0.076668  ,
        0.59174143,  0.45619579,  0.04955887,  2.24539821, -0.32996891,
        1.24236049,  1.24236049,  1.29657875, -0.51973281,  1.0525966 ,
        1.62188828,  1.37790613,  0.88994183,  2.00141607,  0.42908666,
       -0.46551455,  0.8628327 ,  2.65203513, -0.27575066,  0.5646323 ,
       -0.19442327,  0.72728707,  1.16103311,  0.72728707, -0.00465938,
       -0.62816932, -0.54684193,  0.8628327 , -0.16731415,  0.18510451,
       -0.4112963 ,  0.2664319 ,  0.75439619,  2.19117996,  0.21221364,
        1.32368787,  2.16407083,  0.29354102,  1.51345177, -1.03480623,
       -0.2215324 , -0.65527845, -0.84504234, -0.32996891,  1.07

In [None]:
#initialize and train model
from sklearn.neighbors import KNeighborsClassifier
neigh = KNeighborsClassifier(n_neighbors=12)             #sqrt n /2 for k val
neigh.fit(x, target_val)

KNeighborsClassifier(n_neighbors=12)

In [None]:
#predict on training set, get accuracy
y_predictions=neigh.predict(x)
from sklearn.metrics import accuracy_score
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix

print ("Training Accuracy")
print(accuracy_score(target_val,y_predictions))
print(classification_report(target_val,y_predictions ))
print(confusion_matrix(target_val,y_predictions ))


Training Accuracy
0.25866666666666666
              precision    recall  f1-score   support

           0       0.16      0.65      0.26       100
           1       0.50      0.06      0.11       100
           2       0.23      0.56      0.33       100
           3       0.25      0.10      0.14       100
           4       0.27      0.20      0.23       100
           5       0.29      0.23      0.26       100
           6       0.22      0.28      0.25       100
           7       0.50      0.38      0.43       100
           8       0.30      0.37      0.33       100
           9       0.32      0.62      0.42       100
          10       0.36      0.17      0.23       100
          11       0.20      0.05      0.08       100
          12       0.23      0.03      0.05       100
          13       0.31      0.16      0.21       100
          14       0.40      0.02      0.04       100

    accuracy                           0.26      1500
   macro avg       0.30      0.26      0.2

In [None]:
#read test set
def create_testset(img_folder):
    img_data_array=[]
    img_names=[]
    for dir1 in os.listdir(img_folder):
        for file in os.listdir(os.path.join(img_folder, dir1)):
            img_names.append(file)
            image_path= os.path.join(img_folder, dir1,  file)
            image= cv2.imread( image_path, 0)            
            image = center_crop(image, (100,100))
            image=cv2.resize(image, (IMG_HEIGHT, IMG_WIDTH),interpolation = cv2.INTER_AREA)
            image=np.array(image).flatten()
            image = (image - np.mean(image))/np.std(image)
            img_data_array.append(image)
    return img_data_array, img_names

In [None]:
#load testing images
test_data, img_names = create_testset(r'/content/drive/MyDrive/testing2')

In [None]:
#convert list to numpy array
test_data2=np.array(test_data)

In [None]:
test_data2.shape

(2985, 256)

In [None]:
#predict on test set
test_predictions=neigh.predict(test_data2)

In [None]:
#get class names from predicted class values
test_predictions_class_names=[]
for x in test_predictions:
  test_predictions_class_names.append(list(target_dict.keys())[list(target_dict.values()).index(x)])  

In [None]:
#save results to text file
with open('/content/drive/MyDrive/Example55.txt', 'w') as writefile:
  for i in range(0,len(img_names)):
    line1= (str(img_names[i]) + " " + str(test_predictions_class_names[i]))
    print (line1)
    writefile.write(line1 + "\n")


  

276.jpg Suburb
2753.jpg Highway
2795.jpg Coast
2754.jpg Highway
2792.jpg TallBuilding
2796.jpg industrial
2776.jpg Coast
2778.jpg Suburb
2766.jpg Suburb
2759.jpg Coast
2763.jpg bedroom
2791.jpg TallBuilding
2773.jpg Highway
2779.jpg bedroom
281.jpg Suburb
2756.jpg Coast
2749.jpg Suburb
2803.jpg Suburb
2772.jpg Suburb
275.jpg TallBuilding
2805.jpg Coast
28.jpg TallBuilding
2800.jpg Street
2783.jpg Coast
2790.jpg Coast
2798.jpg Street
2758.jpg Highway
2760.jpg Street
2844.jpg Highway
2824.jpg TallBuilding
2861.jpg Coast
2841.jpg Insidecity
287.jpg OpenCountry
2842.jpg TallBuilding
2839.jpg Highway
2820.jpg Street
2831.jpg Coast
283.jpg Highway
2849.jpg Coast
2862.jpg TallBuilding
2834.jpg Coast
2811.jpg Forest
282.jpg Office
2812.jpg Office
2838.jpg TallBuilding
2859.jpg Suburb
2867.jpg Mountain
2814.jpg Suburb
2828.jpg Coast
2813.jpg Suburb
2817.jpg Mountain
2854.jpg bedroom
2852.jpg Street
285.jpg Highway
2851.jpg Highway
2847.jpg Office
2869.jpg Highway
2853.jpg Coast
286.jpg TallBuil

In [None]:
# issues to sort
# (1) test set images is being read in a weird order; this affects the txt file too
# create validaton set

