#### Import Libraries

In [1]:
import numpy as np
from skimage import feature, color, util, io
import os
import matplotlib.pyplot as plt
import torch
from torch.utils.data import TensorDataset, DataLoader
from sklearn.model_selection import train_test_split
from tqdm import tqdm
from torch.nn.functional import one_hot

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

#### Extract image features and image labels to x and y tensors

In [3]:
path = '../weatherconditiondataset/'
n = 1125
n_bins = 10
x = torch.zeros((n, n_bins))  # features
y = [] # labels
for i, f in enumerate(os.listdir(path)):
    # FEATURES
    img = io.imread(path+f)
    # Convert image to grayscale
    if (img.ndim == 3 and img[0, 0, :].size == 3):
        img_gray = color.rgb2gray(img)
    elif(img.ndim == 3 and img[0, 0, :].size == 4):
        img_gray = color.rgb2gray(img[:, :, :3])
    else:
        img_gray = img
    
    # Convert the image to an unsigned 8-bit integer type
    discretized_image = util.img_as_ubyte(img_gray)
    # Extract LBP features
    radius = 1
    n_points = 8 * radius
    lbp = feature.local_binary_pattern(discretized_image, n_points, radius, method='uniform')
    # Create histogram of LBP codes
    n_bins = 10
    hist, bins = np.histogram(lbp, bins=n_bins, range=(0, n_bins), density=True)
    x[i] = torch.tensor(hist)
    # LABELS
    string = ""
    i = 0
    while(ord(f[i]) > 57):
        string += f[i]
        i = i + 1
    y.append(string)

#### Get information on labels, transform them from string to one hot

In [None]:
classNames = set(y)
classCount = {}
classNumber = {}
numbClasses = len(classNames)

for i, c in enumerate(classNames): 
    classNumber[c] =  i
for c in y: 
    classCount[c] =  classCount.get(c, 0) + 1

y_OneHot = torch.zeros((n, numbClasses))  # features

for i, c in enumerate(y):
    numb = torch.tensor(classNumber[c])
    y_OneHot[i] = one_hot(numb, num_classes = numbClasses).float()

print(classCount)
print(classNumber)

#### Showcase the LBP histograms for different classes

In [None]:
classMeanHist = torch.zeros(len(classNames), n_bins)

for hist, label in zip(x,y):
    classMeanHist[classNumber[label]] = classMeanHist[classNumber[label]] + hist

for c in classNames:
    classMeanHist[classNumber[c]] = classMeanHist[classNumber[c]] / classCount[c]


fig, axes = plt.subplots(nrows=1, ncols=4, figsize=(8, 4))
# Display each image in a different subplot
for i, c in enumerate(classNames):
    axes[i].bar(bins[0:10], classMeanHist[classNumber[c]])
    axes[i].set_title(c + " image")

plt.xticks(bins[0:10], [int(bin) for bin in bins[0:10]])

plt.show()

### MAIN

In [77]:
import sys
sys.path.insert(1, '/media/commlab/TenTB/home/dmytro/AI_HW#1/')
%run ../heatmap.py
from sklearn.model_selection import KFold
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import confusion_matrix

# create a KFold object
kf = KFold(n_splits=k, shuffle=True)

# shuffle the data and split it in two
shuffled_indices = np.random.permutation(len(x))
x = x[shuffled_indices]
y_OneHot = y_OneHot[shuffled_indices]

x = x[:round(n/2), :]
y_OneHot = y_OneHot[:round(n/2), :]

k = 5  # number of folds
neighborN = 100 # Choose the value of k

cm_total = np.zeros((4, 4))
avg_test_acc = 0

# create a KFold object
kf = KFold(n_splits=k, shuffle=True)

# loop over the folds
i = 1
for train_index, test_index in kf.split(x):
    print("Fold #" + str(i) + ":")

    # split the data into training and test sets
    x_train, y_train = x[train_index], y_OneHot[train_index]
    x_test, y_test = x[test_index], y_OneHot[test_index]

    # Create a kNN classifier object
    knn = KNeighborsClassifier(n_neighbors=neighborN)

    # Train the classifier on your data
    knn.fit(x_train, y_train)

    y_pred = knn.predict(x_test)

    # Get the index of an array element with the highest probability
    y_pred = np.argmax(y_pred, axis = 1)
    y_test = np.argmax(y_test, axis = 1)
    # Count correct predictions
    y_test = y_test.detach().cpu().numpy()
    test_acc = np.equal(y_pred, y_test).sum()/y_pred.size

    cm = confusion_matrix(y_test, y_pred, labels = [0, 1, 2, 3])

    avg_test_acc += test_acc
    cm_total = cm_total + cm

    i = i + 1

print("Average test accuracy:", str(round(avg_test_acc/k*100, 2)) + "%")
grid_labels = ['cloudy', 'sunrise', 'shine', 'rain']
# compute the sum of each row
row_sums = cm_total.sum(axis=1)
cm_normalized = cm_total / row_sums[:, np.newaxis]
im, cbar = heatmap(cm_normalized, grid_labels, grid_labels)
annotate_heatmap(im)
plt.savefig("confusionMatrix_k_100_half_data.png")
plt.close()

Fold #1:
Fold #2:
Fold #3:
Fold #4:
Fold #5:
Average test accuracy: 41.46%


Full data <br>
k = 3: Average test accuracy: 57.16% <br>
k = 5: Average test accuracy: 60.36% <br>
k = 10: Average test accuracy: 57.07% <br>
k = 20: Average test accuracy: 56.98% <br>
k = 100: Average test accuracy: 49.16% <br>

Half data <br>
k = 3: Average test accuracy: 57.14% <br>
k = 5: Average test accuracy: 59.43% <br>
k = 10: Average test accuracy: 57.28% <br>
k = 20: Average test accuracy: 55.52% <br>
k = 100: Average test accuracy: 41.65% <br>

In [48]:
if torch.cuda.is_available():
    print(f'{torch.cuda.device_count()} GPU(s) are available!')
    for i in range(torch.cuda.device_count()):
        print(f'GPU {i}: {torch.cuda.get_device_name(i)}')
        print(f'\tCompute capability: {torch.cuda.get_device_capability(i)}')
        print(f'\tMemory: {torch.cuda.get_device_properties(i).total_memory / 1024 ** 2:.2f} MB')
else:
    print('CUDA is not available.')


2 GPU(s) are available!
GPU 0: NVIDIA GeForce RTX 2080 Ti
	Compute capability: (7, 5)
	Memory: 11019.56 MB
GPU 1: NVIDIA GeForce RTX 2080 Ti
	Compute capability: (7, 5)
	Memory: 11018.25 MB
