# Whole module

In [None]:
from sklearn.utils import shuffle
import os
import numpy as np
import pandas as pd
from sklearn.metrics import confusion_matrix, classification_report
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from tensorflow.keras import Input
from tensorflow.keras.layers import Dense, MaxPooling2D, Conv2D,MaxPooling3D, Conv3D, Flatten,Lambda, Dropout
from tensorflow.keras.models import Sequential
from keras.models import load_model
from faceRecon import FaceExtractorMultithread, FaceExtractor

route = '/home/pabloarga/Data'

print('Loading dataframes...')
fragments = []
for i in range(5):
    # Assuming each dataframe is stored as a separate table in the HDF5 file
    if i%10==0:
        print(f'{i} dataframes loaded')
    chunk = pd.read_hdf(f'{route}/dataframe{i}_FaceForensics.h5', key=f'df{i}')
    fragments.append(chunk)
#fragments = [pd.read_hdf(f'dataframes/CelebDB/dataframe{i}_600videos.h5', key=f'df{i}') for i in range(2)]#6
df = pd.concat(fragments)

print(df.describe())
print(df.dtypes)
print(df['label'].value_counts())

X = df.drop(['label'], axis = 1)
y = df['label']

print('Dividing dataset into train and test...')
X_train, X_test, y_train, y_test = train_test_split(X,y, test_size=0.2, random_state=42, stratify = y)
X_train = np.stack(X_train['face'], axis=0)
X_test = np.stack(X_test['face'], axis=0)


In [None]:
print('Creating model...')
model = Sequential()
model.add(Input(shape=(200, 200, 3)))
model.add(Lambda(lambda x: x/255.0)) #normalizamos los valores de los pixeles
model.add(Conv2D(32, (3, 3), activation='relu'))
model.add(MaxPooling2D((2, 2)))
model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(MaxPooling2D((2, 2)))
model.add(Conv2D(128, (3, 3), activation='relu'))
model.add(MaxPooling2D((2, 2)))
model.add(Conv2D(128, (3, 3), activation='relu'))
model.add(MaxPooling2D((2, 2)))
model.add(Flatten())
model.add(Dropout(0.5))  # Dropout for regularization
model.add(Dense(1024, activation='relu'))
model.add(Dropout(0.5))  # Dropout for regularization
model.add(Dense(1024, activation='relu'))
model.add(Dense(1, activation='softmax'))

model.compile(optimizer='adam',
              loss='binary_crossentropy',
              metrics=['accuracy'])


In [None]:
from MetricsModule import TrainingMetrics
#prueba de que imprime las stats
metrics = TrainingMetrics(model,"/home/pabloarga/Results")
metrics.train(X_train, y_train, X_test, y_test, epochs=10)

# Test de bach training

In [3]:
import os
from tensorflow.keras.layers import Dense, MaxPooling2D, Conv2D, Flatten,Lambda,  Input,Dropout
from tensorflow.keras.models import Sequential
from MetricsModule import TrainingMetrics

route =  'P:\TFG\Datasets\dataframes_small' #'/home/pabloarga/Data' 
resultsPath = 'P:\TFG\Datasets\dataframes_small\\results' #'/home/pabloarga/Results'

model = Sequential()
model.add(Input(shape=(200, 200, 3)))
model.add(Lambda(lambda x: x/255.0)) #normalizamos los valores de los pixeles -> mejora la eficiencia
model.add(Conv2D(32, (3, 3), activation='relu'))
model.add(MaxPooling2D((2, 2)))
model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(MaxPooling2D((2, 2)))
model.add(Conv2D(128, (3, 3), activation='relu'))
model.add(MaxPooling2D((2, 2)))
model.add(Conv2D(128, (3, 3), activation='relu'))
model.add(MaxPooling2D((2, 2)))
model.add(Flatten())
model.add(Dropout(0.2))  # Dropout for regularization
model.add(Dense(512, activation='relu'))
model.add(Dropout(0.3))  # Dropout for regularization
model.add(Dense(1024, activation='relu'))
model.add(Dense(1, activation='softmax'))

model.compile(optimizer='adam',
              loss='binary_crossentropy',
              metrics=['accuracy'])







In [None]:
from MetricsModule import TrainingMetrics
metrics = TrainingMetrics(model, resultsPath)
metrics.batches_train(route,2,1)


#exportamos el modelo
model.save(os.path.join(resultsPath,'model.h5'))

# Importamos el modelo y probamos con un ejemplo

In [10]:
from keras.models import load_model
from sklearn.metrics import confusion_matrix
from FaceReconModule import FaceExtractorMultithread
import pandas as pd
import numpy as np
import os
modelPath = os.path.join(resultsPath,'model.keras')

#cargamos un video y lo procesamos frame por frame
videoPath = 'P:\TFG\Datasets\FaceForensics\manipulated_sequences-fake\DeepFakeDetection\c23\\videos\\01_15__outside_talking_pan_laughing__02HILKYO.mp4'
#creamos un dataframe con el path al video y con la label 0 de fake
df = pd.DataFrame({'video': [videoPath], 'label': [0]})
faceExtractor = FaceExtractorMultithread(400) #cada 2 frames
imagesDataset = faceExtractor.transform(df)

print(imagesDataset['face'])

#Cargamos el modelo
model = load_model(modelPath,safe_mode=False)
#probamos el modelo manualmente
y_pred = model.predict(np.stack(imagesDataset['face'], axis=0))
#matriz de confusión
y_real = imagesDataset['label']
print(confusion_matrix(y_real, y_pred))


Processing video 1/1 ----> 100%
0      [[[18, 15, 16], [20, 17, 18], [24, 21, 22], [2...
1      [[[25, 22, 21], [25, 22, 23], [24, 21, 22], [2...
2      [[[85, 82, 83], [86, 83, 84], [89, 86, 86], [9...
3      [[[112, 109, 108], [112, 110, 109], [113, 111,...
4      [[[29, 26, 27], [29, 26, 27], [31, 31, 31], [3...
                             ...                        
438    [[[26, 32, 36], [26, 32, 36], [23, 30, 36], [2...
439    [[[59, 57, 54], [58, 56, 52], [59, 57, 53], [6...
440    [[[30, 33, 33], [31, 33, 35], [33, 36, 37], [3...
441    [[[46, 41, 40], [46, 41, 40], [46, 41, 40], [4...
442    [[[24, 26, 33], [24, 26, 33], [24, 26, 36], [2...
Name: face, Length: 443, dtype: object
[[  0 443]
 [  0   0]]
