In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import os
import numpy as np
import matplotlib.pyplot as plt
from sklearn.cluster import BisectingKMeans
from sklearn.metrics import silhouette_score

from tensorflow.keras.preprocessing.image import load_img, img_to_array
from sklearn.cluster import AgglomerativeClustering, FeatureAgglomeration

from keras.models import Model
from keras.layers import GlobalAveragePooling2D
from tensorflow.keras.applications.resnet import ResNet101

from scipy.spatial.distance import cdist
from sklearn.cluster import KMeans

from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score



In [None]:
# Load the ResNet50 model without the top dense layer
model = ResNet101(include_top=False, weights='imagenet', pooling='max')
model.summary()

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/resnet/resnet101_weights_tf_dim_ordering_tf_kernels_notop.h5
Model: "resnet101"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_1 (InputLayer)           [(None, None, None,  0           []                               
                                 3)]                                                              
                                                                                                  
 conv1_pad (ZeroPadding2D)      (None, None, None,   0           ['input_1[0][0]']                
                                3)                                                                
                                                                                                  
 conv1_conv (Conv2D)            (None, None, 

In [None]:

# Set the paths to the directories containing the training images
not_used_train_dir = "/content/drive/Shareddrives/GAN_CLEF_2023/GAN_CLEF_dataset/img_real/not_used"
used_train_dir = "/content/drive/Shareddrives/GAN_CLEF_2023/GAN_CLEF_dataset/img_real/used"
artificial_train_dir = "/content/drive/Shareddrives/GAN_CLEF_2023/GAN_CLEF_dataset/img_gen/generated"

# Create an empty array to hold the feature vectors for each image
feature_vectors = np.empty((660, 2048))
feature_vectors_real = np.empty((160, 2048))
feature_vectors_used = np.empty((80, 2048))
feature_vectors_notused = np.empty((80, 2048))
feature_vectors_artificial = np.empty((500,2048))

In [None]:

# Loop over the images in the training directory, preprocess them, and extract their feature vectors
i = 0
for subdir, dirs, files in os.walk(not_used_train_dir):
    for file in files:
        filepath = subdir + os.sep + file
        img = load_img(filepath, target_size=(224, 224))
        x = img_to_array(img)
        x = np.expand_dims(x, axis=0)
        x = x/255.0
        features = model.predict(x)
        feature_vectors[i,:] = features
        feature_vectors_real[i:] = features
        feature_vectors_notused[i:] = features
        i += 1



In [None]:
for subdir, dirs, files in os.walk(used_train_dir):
    j=0
    for file in files:
        filepath = subdir + os.sep + file
        img = load_img(filepath, target_size=(224, 224))
        x = img_to_array(img)
        x = np.expand_dims(x, axis=0)
        x = x/255.0
        features = model.predict(x)
        feature_vectors[i,:] = features
        feature_vectors_real[i:] = features
        feature_vectors_used[j:] = features
        i += 1
        j += 1



In [None]:
for subdir, dirs, files in os.walk(artificial_train_dir):
    j=0
    for file in files:
        filepath = subdir + os.sep + file
        img = load_img(filepath, target_size=(224, 224))
        x = img_to_array(img)
        x = np.expand_dims(x, axis=0)
        x = x/255.0
        features = model.predict(x)
        feature_vectors[i,:] = features
        feature_vectors_artificial[j:] = features
        i += 1
        j += 1



In [None]:
feature_vectors_train_used = np.empty((40000,4096))
feature_vectors_train_notused = np.empty((40000,4096))
k=0
for i in range(80):
  for j in range(500):
    feature_vectors_train_used[k] = np.concatenate((feature_vectors_used[i],feature_vectors_artificial[j]),axis=0)
    k+=1

k=0
for i in range(80):
  for j in range(500):
    feature_vectors_train_notused[k] = np.concatenate((feature_vectors_notused[i],feature_vectors_artificial[j]),axis=0)
    k+=1

In [None]:
print(np.count_nonzero(feature_vectors_train_notused))
print(feature_vectors_train_used.shape)

38979180
(40000, 4096)


In [None]:
y_used = np.ones(len(feature_vectors_train_used))
y_notused = np.zeros(len(feature_vectors_train_notused))

# concatenate the positive and negative samples
X = np.concatenate((feature_vectors_train_used, feature_vectors_train_notused), axis=0)
Y = np.concatenate((y_used, y_notused), axis=0)

In [None]:
svm = SVC(kernel='rbf')
svm.fit(X,Y)

In [None]:
test_dir = "/content/drive/Shareddrives/GAN_CLEF_2023/GAN_CLEF_dataset/img_test/real_unknown_1"

test_dir_2 = "/content/drive/Shareddrives/GAN_CLEF_2023/GAN_CLEF_dataset/test/test_1/generated_1"
j=0
feature_vectors_test_real = np.empty((200, 2048))
feature_vectors_test_artificial = np.empty((10000,2048))
ids = []
# Loop over the images in the test directory, preprocess them, extract their feature vectors,
# reduce the dimensionality of the feature vectors, and predict their cluster labels
results = []
for subdir, dirs, files in os.walk(test_dir):
    for file in files:
        filepath = subdir + os.sep + file
        img = load_img(filepath, target_size=(224, 224))
        ids.append(filepath)
        x = img_to_array(img)
        x = np.expand_dims(x, axis=0)
        x = x/255.0
        features = model.predict(x)
        #features_reduced = agglo.transform(features)
        feature_vectors_test_real[j,:] = features
        j+=1
j=0
for subdir, dirs, files in os.walk(test_dir_2):
    for file in files:
        filepath = subdir + os.sep + file
        img = load_img(filepath, target_size=(224, 224))
        ids.append(filepath)
        x = img_to_array(img)
        x = np.expand_dims(x, axis=0)
        x = x/255.0
        features = model.predict(x)
        #features_reduced = agglo.transform(features)
        feature_vectors_test_artificial[j,:] = features
        j+=1



[1;30;43mStreaming output truncated to the last 5000 lines.[0m


KeyboardInterrupt: ignored

feature_vectors_test = np.empty((2000000,4096))
k=0
for i in range(200):
  for j in range(10000):
    feature_vectors_train_used[k] = np.concatenate((feature_vectors_test_real[i],feature_vectors_test_artificial[j]),axis=0)
    k+=1

label  = svm.predict(feature_vectors_test)
results.append(label)

import numpy as np
eigen_used = np.empty(40000)
for i in range(40000):
  square_matrix = np.outer(feature_vectors_train_used[i],feature_vectors_train_used[i] )
  print(i)
  eigenvalues, eigenvectors = np.linalg.eig(square_matrix)
  max_eigenvalue = eigenvalues[0]
  np.append(eigen_used,max_eigenvalue)
  



print(eigen_used)
