In [2]:
import os
import numpy as np
import pandas as pd
from sklearn.metrics import confusion_matrix, classification_report
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from tensorflow.keras import Input
from tensorflow.keras.layers import Dense, MaxPooling2D, Conv2D, Flatten
from tensorflow.keras.models import Sequential
from keras.models import load_model
from faceRecon import FaceExtractorMultithread, FaceExtractor

def loadData(baseDir):
    videos = []
    labels = []
    # Iterate over the folders of videos inside Celeb-DF-v2
    for folder in os.listdir(baseDir):
        folder_path = os.path.join(baseDir, folder)
        if not os.path.isdir(folder_path):
            continue
        for video in os.listdir(folder_path):
            video_path = os.path.join(folder_path, video)
            videos.append(video_path)
            if (folder.split('-')[1] == 'real'):
                labels.append(1)
            else:
                labels.append(0)

    dataFrame = pd.DataFrame({'video': videos, 'label': labels})

    # Reduce el tamaño del dataset para que sea más fácil de manejar
    dataFrame = dataFrame.sample(20, random_state=42)

    face_extractor = FaceExtractorMultithread(n=30, max_workers=5)
    print('Extracting faces from videos...')
    dataFrame= face_extractor.transform(dataFrame)
    return dataFrame


df = loadData(baseDir='Datasets\CelebDB\Celeb-DF-v2')

print('Dividing dataset into train and test...')
# Dividir el dataset en train y test
X = df.drop(['label'], axis = 1)
y = df['label']


#comprobar tamaños 
print(X.shape)
print(y.shape)


Extracting faces from videos...


  row = row[1]


TypeError: 'int' object is not subscriptable

In [29]:
def loadData(baseDir):
    videos = []
    labels = []
    # Iterate over the folders of videos inside Celeb-DF-v2
    for folder in os.listdir(baseDir):
        folder_path = os.path.join(baseDir, folder)
        if not os.path.isdir(folder_path):
            continue
        for video in os.listdir(folder_path):
            video_path = os.path.join(folder_path, video)
            videos.append(video_path)
            if (folder.split('-')[1] == 'real'):
                labels.append(1)
            else:
                labels.append(0)

    dataFrame = pd.DataFrame({'video': videos, 'label': labels})

    # Reduce el tamaño del dataset para que sea más fácil de manejar
    dataFrame = dataFrame.sample(10, random_state=42)

    face_extractor = FaceExtractor(n=30)
    print('Extracting faces from videos...')
    dataFrame= face_extractor.transform(dataFrame)
    return dataFrame


df = loadData(baseDir='Datasets\CelebDB\Celeb-DF-v2')

print('Dividing dataset into train and test...')
# Dividir el dataset en train y test
X = df.drop(['label'], axis = 1)
y = df['label']

#comprobar tamaños 
print(X.shape)
print(y.shape)


Extracting faces from videos...
Dividing dataset into train and test...
(153, 1)
(153,)


In [30]:
X_train, X_test, y_train, y_test = train_test_split(X,y, test_size=0.2, random_state=42, stratify = y)
X_train = np.stack(X_train['face'], axis=0)
X_test = np.stack(X_test['face'], axis=0)


In [8]:
X_train

array([[[[ 32,  53,  75],
         [ 31,  54,  75],
         [ 27,  50,  73],
         ...,
         [ 35,  58,  88],
         [ 33,  55,  84],
         [ 33,  53,  79]],

        [[ 32,  52,  74],
         [ 30,  53,  74],
         [ 27,  51,  74],
         ...,
         [ 38,  58,  86],
         [ 35,  53,  80],
         [ 32,  50,  72]],

        [[ 30,  52,  74],
         [ 28,  52,  75],
         [ 25,  49,  73],
         ...,
         [ 37,  56,  85],
         [ 34,  52,  79],
         [ 31,  49,  72]],

        ...,

        [[ 48,  82, 114],
         [ 49,  83, 115],
         [ 50,  84, 116],
         ...,
         [ 32,  60,  92],
         [ 30,  57,  87],
         [ 33,  58,  89]],

        [[ 48,  82, 114],
         [ 49,  83, 115],
         [ 50,  84, 116],
         ...,
         [ 34,  64,  97],
         [ 32,  59,  92],
         [ 34,  59,  92]],

        [[ 48,  82, 114],
         [ 49,  83, 115],
         [ 49,  83, 115],
         ...,
         [ 36,  67,  98],
        

In [31]:

model = Sequential()
model.add(Input(shape=(64, 64, 3)))
model.add(Conv2D(32, (3, 3), activation='relu'))
model.add(MaxPooling2D((2, 2)))
model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(MaxPooling2D((2, 2)))
model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(Flatten())
model.add(Dense(64, activation='relu'))
model.add(Dense(1, activation='softmax'))

model.compile(optimizer='adam',
              loss='binary_crossentropy',
              metrics=['accuracy'])

print('Started training...')

model.fit(X_train, y_train, epochs=10, validation_data=(X_test, y_test))

#evaluamos el modelo
loss, acc = model.evaluate(X_test, y_test, verbose=0)
print('Test Accuracy: %.3f' % acc)
print('Test Loss: %.3f' % loss)

#exportamos el modelo
model.save('model.h5')





Started training...
Epoch 1/10


Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Test Accuracy: 0.065
Test Loss: 0.034


  saving_api.save_model(


In [None]:
from tensorflow.python.client import device_lib
import numpy as np

print(device_lib.list_local_devices())

AttributeError: 'NoneType' object has no attribute 'message_types_by_name'

In [47]:
#importamos el modelo y lo evaluamos con varias metricas
model = load_model(filepath=r'C:\Users\pablo\Desktop\model_full_dataset.h5')



Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d (Conv2D)             (None, 62, 62, 10)        280       
                                                                 
 max_pooling2d (MaxPooling2  (None, 31, 31, 10)        0         
 D)                                                              
                                                                 
 conv2d_1 (Conv2D)           (None, 29, 29, 20)        1820      
                                                                 
 flatten (Flatten)           (None, 16820)             0         
                                                                 
 dense (Dense)               (None, 64)                1076544   
                                                                 
 dense_1 (Dense)             (None, 1)                 65        
                                                        

In [48]:
#evaluamos el modelo 
loss, acc = model.evaluate(X_test, y_test, verbose=0)
print('Test Accuracy: %.3f' % acc)
print('Test Loss: %.3f' % loss)
# evaluamos el modelo usando todo tipo de metricas
y_pred = model.predict(X_test)
y_pred = np.round(y_pred)
print(classification_report(y_test, y_pred))    
print(confusion_matrix(y_test, y_pred))


Test Accuracy: 0.065
Test Loss: 0.380
              precision    recall  f1-score   support

           0       0.00      0.00      0.00        29
           1       0.06      1.00      0.12         2

    accuracy                           0.06        31
   macro avg       0.03      0.50      0.06        31
weighted avg       0.00      0.06      0.01        31

[[ 0 29]
 [ 0  2]]


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
