In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import pandas as pd

##Preporocess images

In [None]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.utils import Sequence
from PIL import UnidentifiedImageError

class DataGenerator(Sequence):
    def __init__(self, image_paths, labels, batch_size=32, img_size=(150, 150), n_classes=5, shuffle=True):
        self.image_paths = image_paths
        self.labels = labels
        self.batch_size = batch_size
        self.img_size = img_size
        self.n_classes = n_classes
        self.shuffle = shuffle
        self.on_epoch_end()

    def __len__(self):
        return int(np.floor(len(self.image_paths) / self.batch_size))

    def __getitem__(self, index):
        indexes = self.indexes[index * self.batch_size:(index + 1) * self.batch_size]
        image_paths_temp = [self.image_paths[k] for k in indexes]
        labels_temp = [self.labels[k] for k in indexes]

        X, y = self.__data_generation(image_paths_temp, labels_temp)

        return X, y

    def on_epoch_end(self):
        self.indexes = np.arange(len(self.image_paths))
        if self.shuffle:
            np.random.shuffle(self.indexes)

    def __data_generation(self, image_paths_temp, labels_temp):
        X = np.empty((self.batch_size, *self.img_size, 3))
        y = np.empty((self.batch_size, self.n_classes), dtype=int)

        for i, (img_path, label) in enumerate(zip(image_paths_temp, labels_temp)):
            try:
                img = tf.keras.preprocessing.image.load_img(img_path, target_size=self.img_size)
                img = tf.keras.preprocessing.image.img_to_array(img)
                img = img / 255.0  # Normalize the image to [0, 1]
            except (UnidentifiedImageError, IOError) as e:
                print(f"Error loading image {img_path}: {e}")
                img = np.zeros((*self.img_size, 3))  # Placeholder image

            X[i,] = img
            y[i,] = label

        return X, y

In [None]:
import os
import numpy as np
import pandas as pd

# path = '/content/drive/Shareddrives/PFA_Dataset/data/dataset-classifier.csv'
# images_path = '/content/drive/Shareddrives/PFA_Dataset/images'

path = '/content/drive/MyDrive/PFA-Dataset/datas/final_data.csv'
images_path = '/content/drive/MyDrive/PFA-Dataset/Images'

# Load the labels CSV file
labels_df = pd.read_csv(path)

# Create lists of image paths and corresponding labels
image_paths = []
labels = []
not_found_users = []

for _, row in labels_df.iterrows():
    user_id = row['username']
    user_labels = row[['O', 'C', 'E', 'A', 'N']].values
    user_folder = f'{images_path}/{user_id}'

    if os.path.exists(user_folder):
        for image_name in os.listdir(user_folder):
            image_paths.append(os.path.join(user_folder, image_name))
            labels.append(user_labels)
    else:
        not_found_users.append(user_id)

labels = np.array(labels)

In [None]:
print(len(not_found_users))
print(image_paths[:10])
print(labels[:10])

In [None]:
from sklearn.model_selection import train_test_split

# Split data into training and validation sets
train_paths, val_paths, train_labels, val_labels = train_test_split(image_paths, labels, test_size=0.01, random_state=42)

# Create DataGenerator instances
train_generator = DataGenerator(train_paths, train_labels, batch_size=32, img_size=(224, 224), n_classes=5, shuffle=True)

val_generator = DataGenerator(val_paths, val_labels, batch_size=32, img_size=(224, 224), n_classes=5, shuffle=False)

##CNN simple

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout

def create_model(input_shape=(224, 224, 3)):
    model = Sequential([
        Conv2D(32, (3, 3), activation='relu', input_shape=input_shape),
        MaxPooling2D((2, 2)),
        Conv2D(64, (3, 3), activation='relu'),
        MaxPooling2D((2, 2)),
        Flatten(),
        Dense(128, activation='relu'),
        Dropout(0.5),
        Dense(5, activation='sigmoid')
    ])
    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
    return model

with strategy.scope():

    model = create_model()

    history = model.fit(
        train_generator,
        epochs=20,
        validation_data=val_generator
    )

In [None]:
loss, accuracy = model.evaluate(val_generator)
print(f'Validation Loss: {loss}')
print(f'Validation Accuracy: {accuracy}')

# model.save('/content/drive/My Drive/cnn.h5')

In [None]:
model.save('/content/drive/My Drive/cnn2.h5')

## Confusion matrix

In [None]:
import numpy as np
from tensorflow.keras.models import load_model
from sklearn.metrics import confusion_matrix, hamming_loss

# Load the trained model
model = load_model('/content/drive/MyDrive/PFA-Dataset/models/cnn.h5')

# Initialize lists to hold true labels and predictions
y_true = []
y_pred = []

# Collect true labels and predictions
for i in range(len(train_generator)):
    X, y = train_generator[i]
    predictions = model.predict(X)
    y_true.extend(y)
    y_pred.extend(predictions)

# Convert lists to numpy arrays
y_true = np.array(y_true)
y_pred = np.array(y_pred)

# Binarize the predictions
y_pred_binary = (y_pred > 0.5).astype(int)



# Compute the confusion matrix for each class and print them
confusion_matrices = [confusion_matrix(y_true[:, i], y_pred_binary[:, i]) for i in range(y_true.shape[1])]
print('Confusion Matrices')
for i, cm in enumerate(confusion_matrices):
    print(f'Class {["O", "C", "E", "A", "N"][i]}:')
    print(cm)

# Calculate Hamming Loss
hamming = hamming_loss(y_true, y_pred_binary)
print('Hamming Loss:', hamming)


Confusion Matrices
Class O:
[[  32  454]
 [ 120 1762]]
Class C:
[[595 917]
 [312 544]]
Class E:
[[1051  202]
 [ 878  237]]
Class A:
[[  14  382]
 [ 127 1845]]
Class N:
[[1868   12]
 [ 486    2]]
Hamming Loss: 0.3285472972972973


In [0]:
report = classification_report(y_true, y_pred_binary, target_names=['O', 'C', 'E', 'A', 'N'])
print('Classification Report')
print(report)

Classification Report
              precision    recall  f1-score   support

           O       0.80      0.94      0.86      1882
           C       0.37      0.64      0.47       856
           E       0.54      0.21      0.31      1115
           A       0.83      0.94      0.88      1972
           N       0.14      0.00      0.01       488

   micro avg       0.69      0.70      0.69      6313
   macro avg       0.54      0.54      0.50      6313
weighted avg       0.65      0.70      0.65      6313
 samples avg       0.70      0.68      0.67      6313


  _warn_prf(average, modifier, msg_start, len(result))


In [0]:
import numpy as np
from tensorflow.keras.models import load_model
from sklearn.metrics import confusion_matrix, hamming_loss

# Define a function to apply thresholds
def apply_thresholds(y_pred, thresholds):
    y_pred_binary = np.zeros_like(y_pred)
    for i in range(y_pred.shape[1]):
        y_pred_binary[:, i] = (y_pred[:, i] > thresholds[i]).astype(int)
    return y_pred_binary

# Thresholds for each class
thresholds = [0.7, 0.4, 0.4, 0.7, 0.3]
# Load the trained model
model = load_model('/content/drive/MyDrive/PFA-Dataset/models/cnn.h5')

# Initialize lists to hold true labels and predictions
y_true = []
y_pred = []

# Collect true labels and predictions
for i in range(len(train_generator)):
    X, y = train_generator[i]
    predictions = model.predict(X)
    y_true.extend(y)
    y_pred.extend(predictions)

# Convert lists to numpy arrays
y_true = np.array(y_true)
y_pred = np.array(y_pred)

# Binarize the predictions
y_pred_binary = apply_thresholds(y_pred, thresholds)

# Compute the confusion matrix for each class and print them
confusion_matrices = [confusion_matrix(y_true[:, i], y_pred_binary[:, i]) for i in range(y_true.shape[1])]
print('Confusion Matrices')
for i, cm in enumerate(confusion_matrices):
    print(f'Class {["O", "C", "E", "A", "N"][i]}:')
    print(cm)

# Calculate Hamming Loss
hamming = hamming_loss(y_true, y_pred_binary)
print('Hamming Loss:', hamming)

report = classification_report(y_true, y_pred_binary, target_names=['O', 'C', 'E', 'A', 'N'])
print('Classification Report')
print(report)


Confusion Matrices
Class O:
[[227 259]
 [892 990]]
Class C:
[[ 230 1282]
 [ 124  732]]
Class E:
[[711 542]
 [594 521]]
Class A:
[[ 283  113]
 [1199  773]]
Class N:
[[1757  123]
 [ 464   24]]
Hamming Loss: 0.4722972972972973
Classification Report
              precision    recall  f1-score   support

           O       0.79      0.53      0.63      1882
           C       0.36      0.86      0.51       856
           E       0.49      0.47      0.48      1115
           A       0.87      0.39      0.54      1972
           N       0.16      0.05      0.08       488

   micro avg       0.57      0.48      0.52      6313
   macro avg       0.54      0.46      0.45      6313
weighted avg       0.66      0.48      0.52      6313
 samples avg       0.53      0.46      0.47      6313


  _warn_prf(average, modifier, msg_start, len(result))


[0.7, 0.4, 0.4, 0.7, 0.3]

In [None]:
import numpy as np
from tensorflow.keras.models import load_model
from sklearn.metrics import confusion_matrix, hamming_loss

# Define a function to apply thresholds
def apply_thresholds(y_pred, thresholds):
    y_pred_binary = np.zeros_like(y_pred)
    for i in range(y_pred.shape[1]):
        y_pred_binary[:, i] = (y_pred[:, i] > thresholds[i]).astype(int)
    return y_pred_binary

# Thresholds for each class
thresholds = [0.7, 0.6, 0.6, 0.7, 0.2]
# Load the trained model
model = load_model('/content/drive/MyDrive/PFA-Dataset/models/cnn.h5')

# Initialize lists to hold true labels and predictions
y_true = []
y_pred = []

# Collect true labels and predictions
for i in range(len(train_generator)):
    X, y = train_generator[i]
    predictions = model.predict(X)
    y_true.extend(y)
    y_pred.extend(predictions)

# Convert lists to numpy arrays
y_true = np.array(y_true)
y_pred = np.array(y_pred)

# Binarize the predictions
y_pred_binary = apply_thresholds(y_pred, thresholds)

# Compute the confusion matrix for each class and print them
confusion_matrices = [confusion_matrix(y_true[:, i], y_pred_binary[:, i]) for i in range(y_true.shape[1])]
print('Confusion Matrices')
for i, cm in enumerate(confusion_matrices):
    print(f'Class {["O", "C", "E", "A", "N"][i]}:')
    print(cm)

# Calculate Hamming Loss
hamming = hamming_loss(y_true, y_pred_binary)
print('Hamming Loss:', hamming)

report = classification_report(y_true, y_pred_binary, target_names=['O', 'C', 'E', 'A', 'N'])
print('Classification Report')
print(report)


Confusion Matrices
Class O:
[[227 259]
 [892 990]]
Class C:
[[1175  337]
 [ 619  237]]
Class E:
[[1161   92]
 [1001  114]]
Class A:
[[ 283  113]
 [1199  773]]
Class N:
[[1468  412]
 [ 379  109]]
Hamming Loss: 0.4478885135135135
Classification Report
              precision    recall  f1-score   support

           O       0.79      0.53      0.63      1882
           C       0.41      0.28      0.33       856
           E       0.55      0.10      0.17      1115
           A       0.87      0.39      0.54      1972
           N       0.21      0.22      0.22       488

   micro avg       0.65      0.35      0.46      6313
   macro avg       0.57      0.30      0.38      6313
weighted avg       0.68      0.35      0.45      6313
 samples avg       0.56      0.35      0.41      6313


  _warn_prf(average, modifier, msg_start, len(result))


thresholds = [0.7, 0.6, 0.6, 0.7, 0.2]