In [None]:
import numpy as np
import os
from sklearn.metrics import confusion_matrix
import seaborn as sn; sn.set(font_scale=1.4)
from sklearn.utils import shuffle           
import matplotlib.pyplot as plt             
import cv2                                 
import tensorflow as tf                
from tqdm import tqdm

from tensorflow.compat.v1 import ConfigProto
from tensorflow.compat.v1 import InteractiveSession

In [None]:
DATASET_DIR = "./dataset-lite-augumented"
class_names = os.listdir(DATASET_DIR)
class_names_labels = {class_name:i for i, class_name in enumerate(class_names)}
num_classes = len(class_names)
IMAGE_SIZE = (150, 150)

In [None]:
from sklearn.model_selection import train_test_split

def load_data():

    dataset = './dataset-lite-augumented/'
    output = []
    
    images = []
    labels = []
    
    print("Loading {}".format(dataset))
    
    # Iterate through each folder corresponding to a category
    for folder in os.listdir(dataset):
        label = class_names_labels[folder]
        
        # Iterate through each image in our folder
        for file in tqdm(os.listdir(os.path.join(dataset, folder))):
            
            # Get the path name of the image
            img_path = os.path.join(os.path.join(dataset, folder), file)
            
            # Open and resize the img
            image = cv2.imread(img_path)
            image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
            image = cv2.resize(image, IMAGE_SIZE) 
            
            # Append the image and its corresponding label to the output
            images.append(image)
            labels.append(label)
            
    images = np.array(images, dtype = 'float32')
    labels = np.array(labels, dtype = 'int32')   

    # Split the dataset into train and validation sets (80% train, 20% validation)
    train_images, val_images, train_labels, val_labels = train_test_split(
        images, labels, test_size=0.2, random_state=42)

    output.append((train_images, train_labels))
    output.append((val_images, val_labels))

    return output

In [None]:
loaded_datasets = load_data()

In [None]:
(train_images, train_labels), (val_images, val_labels) = loaded_datasets
test_images, test_labels = val_images, val_labels

print ("Number of training examples: {}".format(train_images.shape[0]))
print ("Number of validation examples: {}".format(val_images.shape[0]))
print ("Number of testing examples: {}".format(test_images.shape[0]))

In [None]:
import os
import numpy as np
from sklearn.decomposition import PCA
from keras.preprocessing import image
from keras.applications.vgg16 import VGG16, preprocess_input
from keras.models import Model
from keras.preprocessing.image import ImageDataGenerator

from keras.preprocessing import image
from keras.applications.vgg16 import preprocess_input

model_vgg = tf.keras.applications.VGG16(weights='imagenet', include_top=False)

# perform PCA on the training features
train_features = model_vgg.predict(train_images)
val_features = model_vgg.predict(val_images)

# Reshape the feature vectors to 1D arrays
train_features_1d = train_features.reshape(train_features.shape[0], -1)
val_features_1d = val_features.reshape(val_features.shape[0], -1)

# Perform PCA on the training features
n_components = 2  # Choose the number of components you want
pca = PCA(n_components=n_components)

train_features_pca = pca.fit_transform(train_features_1d)
val_features_pca = pca.transform(val_features_1d)

print("Original training features shape:", train_features_1d.shape)
print("Reduced training features shape:", train_features_pca.shape)

print("Original validation features shape:", val_features_1d.shape)
print("Reduced validation features shape:", val_features_pca.shape)

In [None]:
# Plot the explained variances
features = range(pca.n_components_)
plt.bar(features, pca.explained_variance_ratio_)
plt.xlabel('PCA feature')
plt.ylabel('variance')
plt.xticks(features)
plt.show()

In [None]:
# Plot a scattered plot of data points in the reduced dimensionality space
plt.scatter(train_features_pca[:, 0], train_features_pca[:, 1], c=train_labels, cmap='plasma')
plt.legend()
plt.xlabel('First principal component')
plt.ylabel('Second principal component')
plt.show()

In [None]:
from sklearn import decomposition
n_train, x, y, z = train_features.shape
n_test, x, y, z = val_features.shape
numFeatures = x * y * z

pca = decomposition.PCA(n_components = 2)

X = train_features.reshape((n_train, x*y*z))
pca.fit(X)

C = pca.transform(X) # Représentation des individus dans les nouveaux axe
C1 = C[:,0]
C2 = C[:,1]

### Figures
plt.subplots(figsize=(10,10))

for i, class_name in enumerate(class_names):
    plt.scatter(C1[train_labels == i][:1000], C2[train_labels == i][:1000], label = class_name, alpha=0.4)
plt.legend()
plt.title("PCA Projection"+str(i))
plt.show()