# *Imports*

In [5]:
import os
import cv2
import sys
import math
import random
import joblib
import numpy as np
import pandas as pd
import tensorflow as tf
import functools, operator
from matplotlib import pyplot as plt
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.applications.inception_v3 import InceptionV3

# *Face Extraction From The frames*

In [6]:
#apply this function on data after pre-processing and frames sampling here every 15th frame is considered
def extract_faces_from_preprocessed_data():
    rootdir = '/kaggle/input/processed-dataset'

    for subdir, dirs, files in os.walk(rootdir):
        for file in files:
            print(os.path.join(subdir, file))

            path = "/".join(subdir.strip("/").split('/')[2:])
            # read video from file
            cap = cv2.VideoCapture(os.path.join(subdir, file)) 
            ret, frame = cap.read()  #ret: A Boolean value indicating whether the frame was read successfully
            try : 
              os.makedirs(path)
            except :
                a = 1

    #         os.mkdir('Output')

            while ret :
                RGB_img = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            #     plt.imshow(RGB_img)
                image = RGB_img
                gray = cv2.cvtColor(RGB_img, cv2.COLOR_BGR2GRAY)

                faceCascade = cv2.CascadeClassifier(cv2.data.haarcascades + "haarcascade_frontalface_default.xml")
                faces = faceCascade.detectMultiScale(
                    gray,
                    scaleFactor=1.3,  # This value determines how much the image is resized before applying the classifier.
                    minNeighbors=3,  #The minNeighbors parameter works by considering each detected rectangle and its surrounding rectangles at different scales. If the number of rectangles that overlap with a given rectangle exceeds the minNeighbors threshold, the rectangle is retained as a valid detection.
                    minSize=(30, 30) #The minimum size of the window to be searched for faces
                )


                if len(faces)>0:
                    for (x, y, w, h) in faces[:1]:
                        face = cv2.rectangle(RGB_img, (x, y), (x + w, y + h), (0, 255, 0), 2) #draws a rectangle on image
                        face = face[y:y + h, x:x + w]
                        face = cv2.cvtColor(face, cv2.COLOR_BGR2RGB)
                        cv2.imwrite(path+'/'+file, face)
                ret, frame = cap.read()


extract_faces_from_preprocessed_data()

# *Applying Genetic Algorithm For Frames Selection*

In [9]:


def pad_frames(frames, limit, jpegs=False):
        last_frame = frames[-1]
        if jpegs:
            frames_padded = frames + [last_frame]*(limit-len(frames))
        else:
            padding = np.asarray([last_frame * 1.]*(limit-len(frames)))
            frames_padded = np.concatenate([frames, padding], axis=0)
        return frames_padded

    
    
def resize_frames(frames):
  new_frames = []
  for frame in frames:
    new_frame = cv2.resize(frame,(160,160))
    new_frames.append(new_frame)
  
  return new_frames


#sends frames based on the string
def get_frames(frames,solution):
    final_frames = []
    for i in range(len(frames)):
        if(solution[i]=='1'):
           final_frames.append(frames[i])
    return final_frames
    
def calc_histscore(frames):
    hist_score = dict()
    for i in range(0,len(frames)):
        for j in range (i+1,len(frames)):
            hist_img1 = cv2.calcHist([frames[i]], [0, 1, 2], None, [256, 256, 256], [0, 256, 0, 256, 0, 256])  #frame,all channels include, mask, bin size of hist, range of each channel
            cv2.normalize(hist_img1, hist_img1, alpha=0, beta=1, norm_type=cv2.NORM_MINMAX)
            # dst: The output image or array that will contain the normalized values.
            # alpha: The lower end of the normalization range.
            # beta: The upper end of the normalization range


            hist_img2 = cv2.calcHist([frames[j]], [0, 1, 2], None, [256, 256, 256], [0, 256, 0, 256, 0, 256])
            cv2.normalize(hist_img2, hist_img2, alpha=0, beta=1, norm_type=cv2.NORM_MINMAX)
            metric_val = cv2.compareHist(hist_img1, hist_img2, cv2.HISTCMP_CHISQR)
            hist_score[(i,j)] = metric_val
    return hist_score
    

#main fitness function

def fitness_func(frames,delta,fit_idx,hist_score ):
#     print('HIST_SCORE : ',hist_score)
    hist_data = []
    score1 = 0
    score2 = []  #importance score
    final_score = 0
#     print('LEN = ',len(frames))

    for i in range(0,len(frames)):
        for j in range (i+1,len(frames)):
            metric_val = hist_score[(fit_idx[i],fit_idx[j])]
            hist_data.append(metric_val)
            
    mean = np.mean(hist_data)
    std = np.std(hist_data)
            
    for i in range(0,len(frames)):
        net_mean = 0
        c = 0
        for j in range (i+1,len(frames)):
            metric_val = hist_score[(fit_idx[i],fit_idx[j])]
#             print('Metric',metric_val)
            if metric_val < (mean+std):
                net_mean +=metric_val
                c+=1
        if c>0:
           net_mean = net_mean/(c)
        if mean>0:
           net_mean/=mean
        # score2.append(math.log(delta[i])*(math.log(1/(net_mean+1))))
        score2.append(1*(math.log(1/(net_mean+1))))   #Importance Function
        #Logarithmic scaling helps to reduce the impact of outliers or extreme values in the data. 
#         print('SCORE 2 : ',score2)
    
#     print('NET_MEAN : ',net_mean)
    
    for i in range(0,len(frames)):
        net_mean = 0
        c = 0
        for j in range (i+1,len(frames)):
            metric_val  = hist_score[(i,j)]
            final_score += (metric_val*score2[i]*score2[j])/((i-j)*(i-j))
            
    return final_score


def crossover(l, q):
#     print(len(l),len(q))
# generating the random number to perform crossover
    k = random.randint(0, len(l))
    print("Crossover point :", k)
# interchanging the genes
    for i in range(k, len(q)):
        l[i], q[i] = q[i], l[i]
    return l, q


def mutation(l):
    count0 = []
    count1 = []
    for i in range(len(l)):
        if(l[i]=='0'):
            count0.append(i)
        else:
            count1.append(i)
            
    mutation_points = []
    change = ''
    req = min(len(l),25)
    if len(count1) <req:
        mutation_points = random.sample(count0,req-len(count1))
        change = '1'
    elif len(count1) >req:
        mutation_points = random.sample(count1,len(count1)-req)
        change = '0'
        
    for i in mutation_points:
        l[i] = change
    
    return l
#     print(mutation_points)
def select_parents(population,fitness_values):
  parents = []
  total = sum(fitness_values)
  if total == 0:
    return population
  norm_fitness_values = [x/total for x in fitness_values]

  #find cumulative fitness values for roulette wheel selection
  cumulative_fitness = []
  start = 0
  for norm_value in norm_fitness_values:
    start+=norm_value
    cumulative_fitness.append(start)

  population_size = len(population)
  for count in range(population_size):
    random_number = random.uniform(0, 1)
    individual_number = 0
    for score in cumulative_fitness:
      if(random_number<=score):
        parents.append(population[individual_number])
        break
      individual_number+=1
      
  return parents


In [10]:
K = 25
rootdir = '/kaggle/input/celeb-df'
from PIL import Image as im

for subdir, dirs, files in os.walk(rootdir):
        files  = [i[6:-4] for i in files]
        files.sort(key = int)
        files = ['frame_'+i+'.jpg' for i in files]
#         print(subdir,'->',files)
        print(subdir)
        frames = []
        
        for file in files :
            img = plt.imread(os.path.join(subdir,file))
# #             img = im.open(os.path.join(subdir,file))
#             img = np.array(im.open(os.path.join(subdir,file)))
            
            frames.append(img)
#             plt.figure()
#             plt.imshow(img)
#             print(img)

        if len(frames) < K:
                frames = pad_frames(frames,K,True)
        
        print(len(frames))
        frames = resize_frames(frames)
      
        
        metric_values = []
        for i in range(1,len(frames)):
        #         hist_img1 = cv2.calcHist(frames[i], [0, 1, 2], None, [256, 256, 256], [0, 256, 0, 256, 0, 256])
                hist_img1 = cv2.calcHist([frames[i]], [0, 1, 2], None, [256, 256, 256], [0, 256, 0, 256, 0, 256])
                cv2.normalize(hist_img1, hist_img1, alpha=0, beta=1, norm_type=cv2.NORM_MINMAX)
        #         hist_img2 = cv2.calcHist(frames[i-1], [0, 1, 2], None, [256, 256, 256], [0, 256, 0, 256, 0, 256])
                hist_img2 = cv2.calcHist([frames[i-1]], [0, 1, 2], None, [256, 256, 256], [0, 256, 0, 256, 0, 256])
                cv2.normalize(hist_img2, hist_img2, alpha=0, beta=1, norm_type=cv2.NORM_MINMAX)
                metric_val1 = cv2.compareHist(hist_img1, hist_img2, cv2.HISTCMP_CHISQR)
                metric_values.append(metric_val1)
                
        delta = []
        
        mean = np.mean(metric_values)
        std_ = np.std(metric_values)
#         print('Mean =' + str(mean))
        processed_frames = []
    
        for i in range(0,len(metric_values)):
            if metric_values[i] >= std_ :  #put mean or std here
                processed_frames.append(frames[i])
                delta.append(i+1)
                
        print('Processed Frames Length ',len(processed_frames))
#         print(processed_frames)
        hist_score = dict()

            
        hist_score = calc_histscore(processed_frames)
#         print('HIST',hist_score)
        
        txt = ""
        chromosome = txt.zfill(len(processed_frames))
        # print(len(chromosome))
        # print(chromosome)

        p1 = list(chromosome)
        p2 = list(chromosome)
        p3 = list(chromosome)
        p4 = list(chromosome)

        c1 = min(25,len(processed_frames))

        for i in range(c1):
            p1[i]='1'
            p2[i]='1'
            p3[i]='1'
            p4[i]='1'

        #generating random population
        random.shuffle(p1)
        random.shuffle(p2)
        random.shuffle(p3)
        random.shuffle(p4)        
        
        iteration = 10
        threshold = 300
        solution = []


        iteration = 4#10 
        threshold = 300
        solution = []

        prev_generation = [p1,p2,p3,p4]
        curr_generation = []
        max_fitness_idx = 0

        for i in range(iteration):
                p1,p2,p3,p4 = prev_generation[0],prev_generation[1],prev_generation[2],prev_generation[3]
                fitness1 = [processed_frames[j] for j in range(len(p1)) if p1[j] == '1']
                fitness2 = [processed_frames[j] for j in range(len(p2)) if p2[j] == '1']
                fitness3 = [processed_frames[j] for j in range(len(p3)) if p3[j] == '1']
                fitness4 = [processed_frames[j] for j in range(len(p4)) if p4[j] == '1']


                fit1_idx = [j for j in range(len(p1)) if p1[j] == '1']
                fit2_idx = [j for j in range(len(p2)) if p2[j] == '1']
                fit3_idx = [j for j in range(len(p3)) if p3[j] == '1']
                fit4_idx = [j for j in range(len(p4)) if p4[j] == '1']

            #     print(fit1_idx)
            #     print(fit2_idx)

                fit1_score = fitness_func(fitness1,delta,fit1_idx,hist_score )
                fit2_score = fitness_func(fitness2,delta,fit2_idx,hist_score )
                fit3_score = fitness_func(fitness3,delta,fit3_idx,hist_score )
                fit4_score = fitness_func(fitness4,delta,fit4_idx,hist_score )

#                 print('NetFitnessScore : ',sum([fit1_score,fit2_score,fit3_score,fit4_score]))
                while len(curr_generation)<4:
                    parent1,parent2,parent3,parent4 = select_parents(prev_generation,[fit1_score,fit2_score,fit3_score,fit4_score])

            #     print(fit1_score)
                    parent1,parent2 = crossover(parent1,parent2)
                    parent1 = mutation(parent1)
                    parent2 = mutation(parent2)

                    curr_generation.append(parent1)
                    curr_generation.append(parent2)

                prev_generation = curr_generation
                curr_generation = []
#                 print('---------------------------------------------------')

        #     print('NET SCORE : ',abs(fit1_score-fit2_score))

        # if(abs(fit1_score-fit2_score)<threshold or i+1 == iteration):
        solution = prev_generation[0]
        print(solution)
        solution = get_frames(processed_frames,solution)
        solution = resize_frames(solution)
        if(len(solution)<25):
            solution =  pad_frames(solution,25)
        solution = np.array(solution)
        
 
        path = "/".join(subdir.strip("/").split('/')[2:])
        try : 
              os.makedirs(path)
        except :
                a = 1
        
        c = 1
        for frame in solution:
            data = im.fromarray((frame).astype(np.uint8),'RGB')
            data.save(path+'/'+str(c)+'.png')
            c+=1


/kaggle/input/celeb-df/processed-dataset/data/Celeb-real/id12_0001
27
Processed Frames Length  26
Crossover point : 6
Crossover point : 16
Crossover point : 24
Crossover point : 0
Crossover point : 19
Crossover point : 3
Crossover point : 0
Crossover point : 12
['1', '1', '1', '1', '1', '1', '1', '1', '1', '1', '1', '1', '1', '1', '1', '1', '1', '1', '1', '1', '1', '1', '0', '1', '1', '1']
/kaggle/input/celeb-df/processed-dataset/data/Celeb-real/id0_0006
36
Processed Frames Length  35
Crossover point : 23
Crossover point : 28
Crossover point : 21
Crossover point : 6
Crossover point : 3
Crossover point : 1
Crossover point : 7
Crossover point : 28
['0', '0', '1', '1', '1', '1', '1', '1', '0', '1', '1', '1', '1', '1', '0', '0', '0', '1', '0', '1', '1', '1', '1', '0', '1', '1', '1', '1', '1', '0', '1', '1', '1', '1', '0']
/kaggle/input/celeb-df/processed-dataset/data/Celeb-real/id4_0006
25
Processed Frames Length  8
Crossover point : 1
Crossover point : 7
Crossover point : 7
Crossover poin

  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  keepdims=keepdims, where=where)
  subok=False)
  ret = ret.dtype.type(ret / rcount)


/kaggle/input/celeb-df/processed-dataset/data/Celeb-real/id4_0001
30
Processed Frames Length  27
Crossover point : 15
Crossover point : 24
Crossover point : 0
Crossover point : 3
Crossover point : 11
Crossover point : 27
Crossover point : 2
Crossover point : 11
['1', '1', '1', '1', '1', '1', '1', '1', '1', '1', '1', '1', '1', '1', '1', '1', '1', '1', '1', '1', '1', '0', '1', '1', '0', '1', '1']
/kaggle/input/celeb-df/processed-dataset/data/Celeb-real/id17_0009
25
Processed Frames Length  19
Crossover point : 4
Crossover point : 4
Crossover point : 19
Crossover point : 0
Crossover point : 13
Crossover point : 15
Crossover point : 7
Crossover point : 7
['1', '1', '1', '1', '1', '1', '1', '1', '1', '1', '1', '1', '1', '1', '1', '1', '1', '1', '1']
/kaggle/input/celeb-df/processed-dataset/data/Celeb-real/id17_0002
25
Processed Frames Length  22
Crossover point : 2
Crossover point : 18
Crossover point : 12
Crossover point : 22
Crossover point : 15
Crossover point : 13
Crossover point : 3
Cr