**Code um Open World Klassifikation auszuführen und über mehrere Klassen zu testen**

In [None]:
from google.colab import drive
drive.mount('/content/gdrive')

Mounted at /content/gdrive


In [None]:
!git clone https://github.com/cocodataset/cocoapi

!cd /content/cocoapi/PythonAPI && make

In [None]:
 !pip install timm
 
 !git clone https://github.com/Moldazien/BA.git

In [None]:
import os
os.chdir('/content/BA')

In [None]:
#kopieren der featurevektoren aus google drive, um später längere ladezeiten beim öffnen der dateien aus googledrive zu vermeiden

!mkdir /content/dataset
!cp -R /content/gdrive/MyDrive/Datasets/iNaturalist2019/features /content/dataset/features

In [None]:
import os
import sys
import argparse
import cv2
import random
import colorsys
import requests
from io import BytesIO

import skimage.io
from skimage.measure import find_contours
import matplotlib.pyplot as plt
from matplotlib.patches import Polygon
import torch
import torch.nn as nn
import torchvision
from torchvision import transforms as pth_transforms
import numpy as np
from PIL import Image

import utils
import vision_transformer as vits

from pycocotools.coco import COCO

In [None]:
#datensatzklasse um merkmalsvektoren und groundtruth sinnvoll laden zu können

import os
from torchvision.io import read_image
from torch.utils.data import Dataset, DataLoader

class Seg_Dataset(Dataset):
  def __init__(self, annotations_file, dataset_dir, taxonomy):  #taxonomy must be: kingdom phylum class order family genus name
    self.annotations_file = annotations_file
    self.coco = COCO(annotations_file)  #for fast reading
    
    self.dataset = dataset_dir
    self.img_ids = self.coco.getImgIds()

    def mapping(taxonomy):
      mapping = {}

      cat_ids = self.coco.getCatIds()
      categories = self.coco.loadCats(cat_ids)

      by_tax_cat = list(set([cat[taxonomy] for cat in categories]))
      by_tax_cat.sort()
      numb_cats = len(by_tax_cat)

      for i in range(numb_cats):
        for cat in categories:
          if cat[taxonomy] == by_tax_cat[i]:
            mapping[cat['id']] = i + 1
      return mapping
      
    self.cat_mapping = mapping(taxonomy)

  def __len__(self):
    return len(self.img_ids)


  def __getitem__(self, idx):
    img_id = self.img_ids[idx]

    img = self.coco.loadImgs(img_id)

    ann_ids = self.coco.getAnnIds(img[0]['id'])
    anns = self.coco.loadAnns(ann_ids)

    fileN = img[0]['file_name'].split('/')

    classN = fileN[1]
    direcN = fileN[2]
    imgN = fileN[3].split('.')[0]

    feature_path = self.dataset + '/features/' + classN + '/' + direcN + '/' + imgN + '_feature.pt'
    features = torch.load(feature_path, map_location=torch.device('cpu'))

    ground_truth = 0

    if len(anns) > 0:
      ground_truth = self.cat_mapping[anns[0]['category_id']]


    return ground_truth, features

In [None]:
#erstellen der trainings und testdatensätzen

dataset_path = '/content/dataset'

trainset = Seg_Dataset('/content/gdrive/MyDrive/Datasets/iNaturalist2019/annotations/meg_train.json', dataset_path, 'order') #class order family genus name
testset = Seg_Dataset('/content/gdrive/MyDrive/Datasets/iNaturalist2019/annotations/meg_test.json', dataset_path, 'order') 

In [None]:
import random
random.seed(7)

train_ids = np.arange(0,trainset.__len__(),1)
test_ids = np.arange(0,testset.__len__(),1)

In [None]:
def getVectors(trainset, remove_index):
  X = []
  Y = []

  otherX = []
  otherY = [] 

  train_ids = np.arange(0,trainset.__len__(),1)

  for i in train_ids:
    gt, features = trainset.__getitem__(i)

    features = features.reshape(-1)

    npfeatures = np.asarray(features)

    if gt == remove_index:
      otherX.append(npfeatures)
      otherY.append(999)
    else:
      X.append(npfeatures)
      Y.append(gt)

  X_arr = np.asarray(X)
  Y_arr = np.asarray(Y) 

  Xo_arr = np.asarray(otherX)
  Yo_arr = np.asarray(otherY)

  return X_arr, Y_arr, Xo_arr, Yo_arr

In [None]:
from sklearn.neighbors import KNeighborsClassifier as kNN

def fitPredictor(X, Y, n_neighbors = 40):
  clf = kNN(n_neighbors)
  clf.fit(X, Y)
  return clf

In [None]:
from sklearn.neighbors import LocalOutlierFactor

def fitOutlierPredictor(X, Y, n_neighbors = 40):
  clf = LocalOutlierFactor(n_neighbors, novelty=True, contamination = 0.26)
  clf.fit(X,Y)
  return clf

In [None]:
def predict(testset, remove_index, predictor, threshold = 0.5):
  Ygt = []
  Ypred = []

  test_ids = np.arange(0,testset.__len__(),1)

  mapping = {}
  class_ctr = 1
  for i in range(4):
    if class_ctr == remove_index:
      class_ctr = class_ctr + 1 
    mapping[class_ctr] = i
    class_ctr = class_ctr + 1 


  for i in test_ids:
    gt, features = testset.__getitem__(i)

    features = features.reshape(1, -1)

    npfeatures = np.asarray(features)

    if gt == remove_index:
      Ygt.append(6) #remove_index)#15
    else:
      Ygt.append(gt)

    pred = predictor.predict(npfeatures)
    probs = predictor.predict_proba(npfeatures)

    if probs[0, mapping[pred[0]]] < threshold:
      Ypred.append(6)#remove_index)#15
    else:
      Ypred.append(pred[0])

  return Ygt, Ypred

In [None]:
def other_predict(testset, remove_index, predictor, outlier_predictor):
  Ygt = []
  Ypred = []

  test_ids = np.arange(0,testset.__len__(),1)

  mapping = {}
  class_ctr = 1
  for i in range(4):
    if class_ctr == remove_index:
      class_ctr = class_ctr + 1 
    mapping[class_ctr] = i
    class_ctr = class_ctr + 1 

  for i in test_ids:
    gt, features = testset.__getitem__(i)

    features = features.reshape(1, -1)

    npfeatures = np.asarray(features)

    if gt == remove_index:
      Ygt.append(6) #remove_index)#15
    else:
      Ygt.append(gt)

    pred = predictor.predict(npfeatures)
    outlier = outlier_predictor[pred[0]-1].predict(npfeatures)


    if outlier == -1:
      Ypred.append(6)#remove_index)#15
    else:
      Ypred.append(pred[0])

  return Ygt, Ypred

In [None]:
X = []
Y = []

train_ids = np.arange(0,trainset.__len__(),1)
for i in train_ids:
  gt, features = trainset.__getitem__(i)
  features = features.reshape(-1)
  npfeatures = np.asarray(features)

  X.append(npfeatures)
  Y.append(gt)
  
print(len(X))
print(len(Y))

outlier_list = []
X_list = []
Y_list = []
for i in range(5):
  X_list.append([])
  Y_list.append([])

for i in range(5):
  for j in range(len(Y)):
    if i + 1 == int(Y[j]):
      Y_list[i].append(Y[j])
      X_list[i].append(X[j])


loading Complete
54782
54782
1
-----
2
-----
3
-----
4
-----
5
-----


In [None]:
for i in range(5):
  outlier_predictor = fitOutlierPredictor(X_list[i], Y_list[i], 40)
  outlier_list.append(outlier_predictor)

OutlierPredictor created
OutlierPredictor created
OutlierPredictor created
OutlierPredictor created
OutlierPredictor created


In [None]:
#open world klassifikation mit LOF Klassifikator

from sklearn.metrics import confusion_matrix as confusion

pred_list = []
gt_list = []

#matr_list = []
for i in range(5):
  print(i)
  X, Y, _, _ = getVectors(trainset, i+1)
  predictor = fitPredictor(X, Y, 40)
  #outlier_predictor = fitOutlierPredictor(X, Y, 20)
  Ygt, Ypred = other_predict(testset, i+1, predictor, outlier_list)

  #print(Ygt)
  #print(Ypred)

  pred_list = pred_list + Ypred
  gt_list = gt_list + Ygt

  Ygt.append(int(i+1))
  Ypred.append(int(i+1))

  matr = confusion(Ygt, Ypred)
  matr_list.append(matr)

0
loading Complete
Predictor created
{2: 0, 3: 1, 4: 2, 5: 3}
prediction completed
1
loading Complete
Predictor created
{1: 0, 3: 1, 4: 2, 5: 3}
prediction completed
2
loading Complete
Predictor created
{1: 0, 2: 1, 4: 2, 5: 3}
prediction completed
3
loading Complete
Predictor created
{1: 0, 2: 1, 3: 2, 5: 3}
prediction completed
4
loading Complete
Predictor created
{1: 0, 2: 1, 3: 2, 4: 3}
prediction completed


In [None]:
#open world klassifikation mit simpleDistKlassifikator

from sklearn.metrics import confusion_matrix as confusion #for normal prediction

pred_list = []
gt_list = []

matr_list = []
for i in range(5):
  print(i)
  X, Y, _, _ = getVectors(trainset, i+1)
  predictor = fitPredictor(X, Y, 250)
  Ygt, Ypred = predict(testset, i+1, predictor, threshold = 0.98)


  Ygt.append(int(i+1))
  Ypred.append(int(i+1))

  matr = confusion(Ygt, Ypred)
  matr_list.append(matr)

  pred_list = pred_list + Ypred
  gt_list = gt_list + Ygt

0
loading Complete
Predictor created
{2: 0, 3: 1, 4: 2, 5: 3}
prediction completed
[3, 3, 5, 4, 4, 4, 3, 3, 4, 3, 4, 6, 3, 3, 3, 5, 3, 5, 3, 3, 6, 3, 3, 6, 3, 4, 4, 6, 3, 3, 4, 4, 5, 4, 3, 3, 4, 6, 5, 3, 3, 3, 3, 4, 4, 3, 5, 4, 4, 3, 5, 5, 3, 3, 3, 4, 4, 3, 3, 6, 4, 4, 5, 4, 4, 5, 5, 3, 3, 3, 3, 4, 3, 4, 6, 3, 3, 5, 3, 5, 4, 3, 4, 4, 4, 6, 4, 3, 3, 4, 4, 3, 3, 4, 4, 4, 3, 3, 3, 3, 3, 5, 4, 4, 4, 5, 4, 4, 4, 3, 4, 5, 3, 4, 5, 3, 4, 5, 6, 5, 3, 4, 5, 4, 3, 3, 6, 3, 5, 4, 3, 2, 5, 5, 4, 4, 5, 4, 3, 4, 3, 5, 4, 4, 5, 3, 4, 3, 5, 6, 4, 4, 4, 3, 4, 5, 4, 4, 4, 4, 6, 4, 4, 4, 4, 4, 3, 5, 5, 3, 4, 3, 3, 4, 4, 5, 6, 3, 4, 3, 4, 4, 3, 5, 4, 2, 6, 3, 4, 4, 4, 5, 4, 3, 3, 4, 4, 4, 4, 5, 4, 3, 4, 5, 3, 4, 4, 3, 4, 3, 3, 4, 5, 5, 4, 4, 4, 3, 5, 3, 3, 4, 4, 4, 5, 4, 4, 4, 3, 6, 3, 4, 3, 2, 4, 3, 3, 5, 4, 4, 4, 3, 4, 3, 3, 3, 3, 3, 3, 4, 4, 3, 4, 4, 6, 4, 3, 4, 3, 4, 3, 3, 5, 4, 3, 3, 3, 3, 3, 3, 3, 4, 5, 3, 4, 6, 3, 3, 5, 4, 4, 4, 5, 3, 5, 4, 4, 6, 6, 3, 4, 3, 5, 4, 4, 5, 3, 4, 4, 4, 3, 4, 3, 4, 4, 2

In [None]:
from sklearn.metrics import confusion_matrix as confusion

matr = confusion(Ygt, Ypred)

In [None]:
whole_matrix = np.zeros(matr_list[0].shape)#, dtype=np.int16)

for number in range(len(matr_list)):
  for i in range(whole_matrix.shape[0]):
    for j in range(whole_matrix.shape[1]):
      whole_matrix[i,j] = whole_matrix[i,j] + matr_list[number][i,j]
    

In [None]:
norm_matrix = np.zeros(whole_matrix.shape)

for i in range(whole_matrix.shape[0]):
  for j in range(whole_matrix.shape[1]):
    norm_matrix[i,j] = whole_matrix[i,j]/sum(whole_matrix[i,:])


*Ausgabe der Confusionmatrix und den endgültigen ergebnissen*

In [None]:
import numpy as np
from sklearn.metrics import ConfusionMatrixDisplay, confusion_matrix
import matplotlib.pyplot as plt

cm = whole_matrix#confusion_matrix(np.arange(14), np.arange(14))
cmp = ConfusionMatrixDisplay(cm)#, display_labels=np.arange(14))
fig, ax = plt.subplots(figsize=(10,10))
cmp.plot(ax=ax)


cm = norm_matrix#confusion_matrix(np.arange(14), np.arange(14))
cmp = ConfusionMatrixDisplay(cm)#, display_labels=np.arange(14))
fig, ax = plt.subplots(figsize=(10,10))
cmp.plot(ax=ax)

In [None]:
from sklearn.metrics import accuracy_score, average_precision_score, f1_score, precision_score, recall_score

accuracy = accuracy_score(gt_list, pred_list)

macrof1 = f1_score(gt_list, pred_list, average = 'macro')
uf1 = f1_score(gt_list, pred_list, average = None)

In [None]:
print(accuracy)
print(macrof1)
print(uf1)
