# Whole module

In [None]:
from sklearn.utils import shuffle
import os
import numpy as np
import pandas as pd
from sklearn.metrics import confusion_matrix, classification_report
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from tensorflow.keras import Input
from tensorflow.keras.layers import Dense, MaxPooling2D, Conv2D,MaxPooling3D, Conv3D, Flatten,Lambda, Dropout
from tensorflow.keras.models import Sequential
from keras.models import load_model
from faceRecon import FaceExtractorMultithread, FaceExtractor

route = '/home/pabloarga/Data'

print('Loading dataframes...')
fragments = []
for i in range(5):
    # Assuming each dataframe is stored as a separate table in the HDF5 file
    if i%10==0:
        print(f'{i} dataframes loaded')
    chunk = pd.read_hdf(f'{route}/dataframe{i}_FaceForensics.h5', key=f'df{i}')
    fragments.append(chunk)
#fragments = [pd.read_hdf(f'dataframes/CelebDB/dataframe{i}_600videos.h5', key=f'df{i}') for i in range(2)]#6
df = pd.concat(fragments)

print(df.describe())
print(df.dtypes)
print(df['label'].value_counts())

X = df.drop(['label'], axis = 1)
y = df['label']

print('Dividing dataset into train and test...')
X_train, X_test, y_train, y_test = train_test_split(X,y, test_size=0.2, random_state=42, stratify = y)
X_train = np.stack(X_train['face'], axis=0)
X_test = np.stack(X_test['face'], axis=0)


In [None]:
print('Creating model...')
model = Sequential()
model.add(Input(shape=(200, 200, 3)))
model.add(Lambda(lambda x: x/255.0)) #normalizamos los valores de los pixeles
model.add(Conv2D(32, (3, 3), activation='relu'))
model.add(MaxPooling2D((2, 2)))
model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(MaxPooling2D((2, 2)))
model.add(Conv2D(128, (3, 3), activation='relu'))
model.add(MaxPooling2D((2, 2)))
model.add(Conv2D(128, (3, 3), activation='relu'))
model.add(MaxPooling2D((2, 2)))
model.add(Flatten())
model.add(Dropout(0.5))  # Dropout for regularization
model.add(Dense(1024, activation='relu'))
model.add(Dropout(0.5))  # Dropout for regularization
model.add(Dense(1024, activation='relu'))
model.add(Dense(1, activation='softmax'))

model.compile(optimizer='adam',
              loss='binary_crossentropy',
              metrics=['accuracy'])


In [None]:
from MetricsModule import TrainingMetrics
#prueba de que imprime las stats
metrics = TrainingMetrics(model,"/home/pabloarga/Results")
metrics.train(X_train, y_train, X_test, y_test, epochs=10)

# Test de bach training

In [3]:
import os
from tensorflow.keras.layers import Dense, MaxPooling2D, Conv2D, Flatten,Lambda,  Input,Dropout
from tensorflow.keras.models import Sequential
from MetricsModule import TrainingMetrics

route =  'P:\TFG\Datasets\dataframes_small' #'/home/pabloarga/Data' 
resultsPath = 'P:\TFG\Datasets\dataframes_small\\results' #'/home/pabloarga/Results'

model = Sequential()
model.add(Input(shape=(200, 200, 3)))
model.add(Lambda(lambda x: x/255.0)) #normalizamos los valores de los pixeles -> mejora la eficiencia
model.add(Conv2D(32, (3, 3), activation='relu'))
model.add(MaxPooling2D((2, 2)))
model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(MaxPooling2D((2, 2)))
model.add(Conv2D(128, (3, 3), activation='relu'))
model.add(MaxPooling2D((2, 2)))
model.add(Conv2D(128, (3, 3), activation='relu'))
model.add(MaxPooling2D((2, 2)))
model.add(Flatten())
model.add(Dropout(0.2))  # Dropout for regularization
model.add(Dense(512, activation='relu'))
model.add(Dropout(0.3))  # Dropout for regularization
model.add(Dense(1024, activation='relu'))
model.add(Dense(1, activation='softmax'))

model.compile(optimizer='adam',
              loss='binary_crossentropy',
              metrics=['accuracy'])

model2 = Sequential()
model2.add(Input(shape=(200, 200, 3)))
model2.add(Lambda(lambda x: x/255.0)) #normalizamos los valores de los pixeles -> mejora la eficiencia
model2.add(Conv2D(32, (3, 3), activation='relu'))
model2.add(MaxPooling2D((2, 2)))
model2.add(Conv2D(64, (3, 3), activation='relu'))
model2.add(MaxPooling2D((2, 2)))
model2.add(Conv2D(128, (3, 3), activation='relu'))
model2.add(MaxPooling2D((2, 2)))
model2.add(Flatten())
model2.add(Dropout(0.2))  # Dropout for regularization
model2.add(Dense(512, activation='relu'))
model2.add(Dense(1, activation='softmax'))

model2.compile(optimizer='adam',
              loss='binary_crossentropy',
              metrics=['accuracy'])


model3 = Sequential()
model3.add(Input(shape=(200, 200, 3)))
model3.add(Lambda(lambda x: x/255.0)) #normalizamos los valores de los pixeles -> mejora la eficiencia
model3.add(Conv2D(64, (3, 3), activation='relu'))
model3.add(MaxPooling2D((2, 2)))
model3.add(Conv2D(128, (3, 3), activation='relu'))
model3.add(MaxPooling2D((2, 2)))
model3.add(Conv2D(128, (3, 3), activation='relu'))
model3.add(MaxPooling2D((2, 2)))
model3.add(Flatten())
model3.add(Dropout(0.2))  # Dropout for regularization
model3.add(Dense(512, activation='relu'))
model3.add(Dropout(0.3))  # Dropout for regularization
model3.add(Dense(512, activation='relu'))
model3.add(Dense(1, activation='softmax'))

model3.compile(optimizer='adam',
              loss='binary_crossentropy',
              metrics=['accuracy'])



In [8]:
from MetricsModule import TrainingMetrics
import tensorflow as tf
from tensorflow.keras import layers
base_model = tf.keras.applications.VGG16(weights='imagenet', include_top=False, input_shape=(200, 200, 3))

# Congelar las capas del modelo base
for layer in base_model.layers:
    layer.trainable = False

# Crear un nuevo modelo encima del modelo base
model = tf.keras.models.Sequential()
model.add(base_model)
model.add(layers.Flatten())
model.add(layers.Dense(256, activation='relu'))
model.add(layers.Dense(1, activation='sigmoid'))

model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.0001), loss='binary_crossentropy', metrics=['accuracy'])

metrics = TrainingMetrics(model, resultsPath)
metrics.batches_train(route,nBatches = 1 , epochs = 2)



Training the model with batch: 1/1
Epoch 1/2
Epoch 2/2


# Importamos el modelo y probamos con un ejemplo

In [None]:
from keras.models import load_model
from sklearn.metrics import confusion_matrix
from FaceReconModule import FaceExtractorMultithread
import pandas as pd
import numpy as np
import os
modelPath = os.path.join(resultsPath,'model.keras')

#cargamos un video y lo procesamos frame por frame
videoPath = 'P:\TFG\Datasets\FaceForensics\manipulated_sequences-fake\DeepFakeDetection\c23\\videos\\01_15__outside_talking_pan_laughing__02HILKYO.mp4'
#creamos un dataframe con el path al video y con la label 0 de fake
df = pd.DataFrame({'video': [videoPath], 'label': [0]})
faceExtractor = FaceExtractorMultithread(400) #cada 2 frames
imagesDataset = faceExtractor.transform(df)

print(imagesDataset['face'])

#Cargamos el modelo
model = load_model(modelPath,safe_mode=False)
#probamos el modelo manualmente
y_pred = model.predict(np.stack(imagesDataset['face'], axis=0))
#matriz de confusión
y_real = imagesDataset['label']
print(confusion_matrix(y_real, y_pred))


# Tests on datasets

In [None]:
from sklearn.utils import shuffle
import os
import numpy as np
from scipy import ndimage
import pandas as pd
import tensorflow as tf
from sklearn.metrics import confusion_matrix, classification_report
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from tensorflow.keras import Input
from tensorflow.keras.layers import Dense, MaxPooling2D, Conv2D,MaxPooling3D, Conv3D, Flatten,Lambda, Dropout
from tensorflow.keras.models import Sequential
from keras.models import load_model
from FaceReconModule import FaceExtractorMultithread, FaceExtractor




"""
TODO comentar
"""
def augment(row):
    image = np.array(row['face']).reshape((200, 200, 3))  # Replace height and width with the dimensions of your images

    # Perform augmentations only on fake images
    if row['label'] == 1:
        # Flip the image
        flipped_image = np.fliplr(image)
        # Rotate the image
        rotated_image = ndimage.rotate(image, 15)  # Adjust the angle as needed

        # Add the augmented images as new examples
        new_row_flipped = row.copy()
        new_row_flipped['face'] = flipped_image.flatten().tolist()
        new_row_flipped['label'] = 1
        new_row_rotated = row.copy()
        new_row_rotated['face'] = rotated_image.flatten().tolist()
        new_row_rotated['label'] = 1

        return pd.DataFrame([row, new_row_flipped, new_row_rotated])

    return pd.DataFrame([row])





folderPath = 'E:\TFG\Datasets\dataframes\\valid\\dataframes_combined'

nBatches = 10

numDataframes = len([name for name in os.listdir(folderPath) if os.path.isfile(os.path.join(folderPath, name))])
#Calculamos el tamaño de cada fragmento
fragmentSize = int(numDataframes/nBatches)

for i in range(nBatches):
    fragments = [pd.read_hdf(f'{folderPath}/dataframe{j}_FaceForensics.h5', key=f'df{j}') for j in range(fragmentSize*i,fragmentSize*(i+1))]
    df = pd.concat(fragments)
    #contamos el número de fakes y reales en el dataframe
    print(df['label'].value_counts())
    #print("After augmentation")
    #df = pd.concat(df.apply(augment, axis=1).tolist(), ignore_index=True)
    #print(df['label'].value_counts())
    print('-------')

In [4]:
from keras.models import load_model
import cv2

#cargamos el modelo del escritorio
modelPath = '/home/pabloarga/Results/2024-03-04 14.39.53/model2024-03-04 14.39.53.keras'
videoTest = '/home/pabloarga/testVideos/testVideo.mp4'

#Cargamos el modelo
model = load_model(modelPath,safe_mode=False,compile=False)
#obtenemos el número de frames del video
cap = cv2.VideoCapture(videoTest)
nFramesVideo = cap.get(cv2.CAP_PROP_FRAME_COUNT)
#Procesamos el video frame por frame
faceExtractor = FaceExtractorMultithread(nFramesVideo) #cada 2 frames
frames = faceExtractor.process_video(videoTest,1)[0]

#probamos el modelo manualmente
y_pred = model.predict(np.stack(frames, axis=0))
print(y_pred)

ValueError: Layer 'conv2d_5' expected 2 variables, but received 0 variables during loading. Expected: ['conv2d_5/kernel:0', 'conv2d_5/bias:0']