In [None]:
import numpy as np
import torch
import torchvision
from torchvision import datasets
from torchvision import transforms
from tqdm import tqdm
from tqdm.contrib import itertools
import torch.nn as nn
from torch.utils.data import DataLoader
import matplotlib.pyplot as plt
from sklearn.metrics import classification_report, accuracy_score
from sklearn.neighbors import KNeighborsClassifier
from sklearn.decomposition import PCA

In [None]:
# Создание и нормализация обучающей, тестовой и выборки валидации

batch_size = 32

transform = transforms.Compose([transforms.ToTensor(),
                                transforms.Resize((256, 256)),
                                transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

train_path = "C:/Users/das-s/Downloads/Plants Classification/train"
train_ds = datasets.ImageFolder(root=train_path, transform=transform)
train_dataloader = DataLoader(train_ds, batch_size=batch_size, shuffle=True)

test_path = "C:/Users/das-s/Downloads/Plants Classification/test"
test_ds = datasets.ImageFolder(root=test_path, transform=transform)
test_dataloader = DataLoader(test_ds, batch_size=batch_size, shuffle=True)

In [None]:
def imshow(img):
    img = img / 2 + 0.5     # unnormalize
    npimg = img.numpy()
    plt.imshow(np.transpose(npimg, (1, 2, 0)))
    plt.show()

In [None]:
train_features, train_labels = next(iter(train_dataloader))
print(f"Feature batch shape: {train_features.size()}")
print(f"Labels batch shape: {train_labels.size()}")
images = []
for i in range(4):
    images.append(train_features[i])
imshow(torchvision.utils.make_grid(images)) # show images
print(' '.join(f'{train_ds.classes[train_labels[j]]}\t' for j in range(4))) # print labels


In [None]:
# Функция формирования набора данных (картинки и метки)
def extract_dataset_images(dataloader):
  labels = []
  images = []
  for batch, batch_labels in tqdm(dataloader):
    images.append(batch.detach().numpy().reshape((batch.shape[0], -1))) # Делаем картинки в виде векторов
    labels.append(batch_labels.detach().numpy())
  return np.concatenate(images), np.concatenate(labels)

In [None]:
x_train, y_train = extract_dataset_images(train_dataloader)
x_test, y_test = extract_dataset_images(test_dataloader)

# KNN

In [None]:
def train_and_test_KNN(x_train, y_train, x_test, y_test, n_neighbors, metric, weights):
  # Обучение KNN на картинках
  knn_classifier = KNeighborsClassifier(n_neighbors=n_neighbors, metric = metric, weights = weights)
  knn_classifier.fit(x_train, y_train)

  # Тестирование
  # Классификация тестовых изображений с использованием KNN
  y_pred = knn_classifier.predict(x_test)

  accuracy = accuracy_score(y_test, y_pred)
  return accuracy

In [None]:
def inverse_squared_distance(dist):
    arr = []
    for d in dist:
        arr.append(1 / (d**2))
    return arr

In [None]:
metrics_array = ["minkowski", "euclidean", "cosine"]
weights_array = ["uniform", "distance", inverse_squared_distance]
n_neighbors = [3, 5, 10, 20]

In [None]:
#KNN accuracy
parameters = 0, "", ""
accuracy = 0

for metric, weights, n in itertools.product(metrics_array, weights_array, n_neighbors):
    temp_accuracy = train_and_test_KNN(x_train, y_train, x_test, y_test, n, metric, weights)
    if accuracy < temp_accuracy:
        parameters = n, metric, weights
        accuracy = temp_accuracy

print("Best KNN accuracy:")
print(f"n_neighbors = {parameters[0]} \nmetric = {parameters[1]} \nweights = {parameters[2]} \naccuracy = {accuracy}")

# PCA+KNN

In [None]:
def train_and_test_PCA_KNN(x_train, y_train, x_test, y_test, n_components, knn_classifier):
  pca = PCA(n_components=n_components)
  x_train_pca = pca.fit_transform(x_train)
  knn_classifier.fit(x_train_pca, y_train)

  # Тестирование
  # Применение созданного ранее PCA для тестовых данных
  x_test_pca = pca.transform(x_test)
  # Классификация тестовых изображений с использованием KNN
  y_pred = knn_classifier.predict(x_test_pca)
  accuracy = accuracy_score(y_test, y_pred)
  return accuracy

In [None]:
n_components = [10, 50, 100, 200, 400]

In [None]:
#KNN + PCA accuracy
parameters = 0, "", "", 0
accuracy = 0

for metric, weights, n_neigh, n_comp in itertools.product(metrics_array, weights_array, n_neighbors, n_components):
    knn_classifier = KNeighborsClassifier(n_neighbors=n_neigh, metric = metric, weights = weights)
    temp_accuracy = train_and_test_PCA_KNN(x_train, y_train, x_test, y_test, n_comp, knn_classifier)
    if accuracy < temp_accuracy:
        parameters = n_neigh, metric, weights, n_comp
        accuracy = temp_accuracy

print("Best KNN accuracy:")
print(f"n_neighbors = {parameters[0]} \nmetric = {parameters[1]} \nweights = {parameters[2]} \nn_components = {parameters[3]} \naccuracy = {accuracy}")

# CNN+PCA+KNN

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [None]:
from torchvision.models import vgg16
model = vgg16(pretrained=True)
model.classifier = nn.Flatten(start_dim= 1)
model.to(device)

In [None]:
def extract_features(dataset, model):
    features = []
    labels = []
    for batch_images, batch_labels in tqdm(dataset):
        batch_features = model(batch_images.to(device))
        labels.append(batch_labels.detach().numpy())
        features.append(batch_features.cpu().detach().numpy())
    return np.concatenate(features), np.concatenate(labels)

In [None]:
x_train_features, y_train_labels = extract_features(train_dataloader, model)
x_test_features, y_test_labels = extract_features(test_dataloader, model)

In [None]:
#KNN + PCA + CNN accuracy
parameters = 0, "", "", 0
accuracy = 0

for metric, weights, n_neigh, n_comp in itertools.product(metrics_array, weights_array, n_neighbors, n_components):
    knn_classifier = KNeighborsClassifier(n_neighbors=n_neigh, metric = metric, weights = weights)
    temp_accuracy = train_and_test_PCA_KNN(x_train_features, y_train_labels, x_test_features, y_test_labels, n_comp, knn_classifier)
    if accuracy < temp_accuracy:
        parameters = n_neigh, metric, weights, n_comp
        accuracy = temp_accuracy

print("Best KNN accuracy:")
print(f"n_neighbors = {parameters[0]} \nmetric = {parameters[1]} \nweights = {parameters[2]} \nn_components = {parameters[3]} \naccuracy = {accuracy}")

In [None]:
knn_classifier = KNeighborsClassifier(n_neighbors=parameters[0], metric=parameters[1], weights=parameters[2])
pca = PCA(n_components=parameters[3])

x_train_pca = pca.fit_transform(x_train_features)
knn_classifier.fit(x_train_pca, y_train_labels)

x_test_pca = pca.transform(x_test_features)
y_pred = knn_classifier.predict(x_test_pca)

print(classification_report(y_test_labels, y_pred, target_names=test_ds.classes))