# Install Packages

In [None]:
# Install WhisperX
! pip install git+https://github.com/m-bain/whisperx.git


In [None]:
!pip install dlib

In [None]:
!pip install moviepy

In [4]:
import os
# Get the list of all files and directories
# Grayscale - REAL 0 1 | FAKE 
path = "/kaggle/input/deepfake-detection/REAL/REAL/dfdc_train_part_2"
dataset = os.listdir(path)

In [5]:
len(dataset)

230

In [6]:
import pandas as pd

metadata = pd.read_csv("/kaggle/input/deepfake-detection/selected_video.csv")

# Preprocessing


## Import Packages

In [8]:
import librosa
import moviepy.editor as mp
import tempfile
import numpy as np

import cv2
import dlib

import whisperx
import gc

2024-07-07 06:03:22.812517: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-07-07 06:03:22.812635: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-07-07 06:03:22.983813: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


## Word Alignment Processing


In [None]:
device = "cuda" 
batch_size = 16
compute_type = "float16"

# Speech to text transcription model
model = whisperx.load_model("large-v2", device, compute_type=compute_type)

def getWordAlignment(audio_file):
    audio = whisperx.load_audio(audio_file)
    result = model.transcribe(audio, batch_size=batch_size)

    # Word alignment model
    word_model, metadata = whisperx.load_align_model(language_code = result["language"], device = device)

    result = whisperx.align(result["segments"],
                        word_model,
                        metadata,
                        audio,
                        device,
                        return_char_alignments = False)

    print(result["segments"])
    return result

In [10]:
# Get start and end timestamp for each word in a MP4 file
def getTimestamp(result):
    word_timestamp = []

    for item in result["segments"]:
        for word in item.get("words"):
            start_end_timestamp = [word.get("start"),word.get("end")]
            word_timestamp.append(start_end_timestamp)

    return word_timestamp

## Video Processing


In [11]:
face_detector = dlib.get_frontal_face_detector()

dlib_facelandmark = dlib.shape_predictor("/kaggle/input/deepfake-detection/shape_predictor_68_face_landmarks.dat")

# Define fixed size for the mouth ROI
fixed_width = 100
fixed_height = 50

def mouth_extractor(cap, start_timestamp, end_timestamp, fps):
    # Get frame rate
    start_frame = int(start_timestamp * fps)
    end_frame = int(end_timestamp * fps)

    # Set the starting frame
    cap.set(cv2.CAP_PROP_POS_FRAMES, start_frame)

    processed_frame = [] # final processed frame

    missing_frame_thres = (end_frame - start_frame) / 2 # threshold for termination based on missing frame

    missing_frame = 0 # number of missing frame

    first_frame = True
    num_face_registered = 0 # number of face detected across frame

    all_face = {} # Save all extracted mouth region for each person in the video
    mouth_open_close_diff = {} # Save the difference between mouth top (y) and mouth bottom (y) to determine which person is speaking

    for frame_number in range(start_frame, end_frame+1):
        ret, frame = cap.read()

        if not ret:
            print("Failed to read frame from video.")
            return []

        if (missing_frame >= missing_frame_thres):
            print("Too many missing frame at this section")
            return []

        gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)

        faces = face_detector(gray)

        num_face_detected = len(faces)

        # Check if no faces were detected
        if num_face_detected == 0:
            missing_frame = missing_frame + 1
            continue

        # detected new face in the following frame (after first frame), register new mouth x coordinate
        if (num_face_detected > num_face_registered) and not first_frame:

            # repeat the process by the number of new face detected
            for i in range(num_face_detected - num_face_registered):
                mouth_distance = {}
                
                for face in faces:
                    # get face landmarks
                    landmarks = dlib_facelandmark(gray, face)

                    # extract mouth landmark at 48 as ID
                    mouth_x = landmarks.part(48).x - 15
                    
                    # Find the nearest mouth in the registered face
                    nearest_distance = [0,100000]
                    for registered_mouth in all_face:
                        # Find the distance between two mouth in x coordinate
                        difference = abs(registered_mouth - mouth_x)
                        
                        # Identify the nearest mouth
                        if (difference < nearest_distance[1]):
                            nearest_distance = [mouth_x, difference]
                    
                    mouth_distance[nearest_distance[0]] = nearest_distance[1]
                
                farthest = [0,0]
                # Find the mouth with the largest difference in distance
                for key, distance in mouth_distance.items():
                    if distance > farthest[1]:
                        farthest[0] = key
                        farthest[1] = distance
                
                # register new mouth with empty list
                all_face[farthest[0]] = [] # mouth region
                mouth_open_close_series[farthest[0]] = 0 # difference between top lip (y) and bottom lip (y)

            # update number of face registered
            num_face_registered = num_face_detected


        for face in faces:
            # get face landmarks
            landmarks = dlib_facelandmark(gray, face)

            # extract mouth region
            mouth_x = landmarks.part(48).x - 15
            mouth_y = landmarks.part(51).y - 15
            mouth_w = landmarks.part(54).x - mouth_x + 15
            mouth_h = landmarks.part(57).y - mouth_y + 15


            # When first frame, register the mouth x coordinate to the dictionary
            if first_frame:
                num_face_registered = num_face_detected
                # mouth region
                all_face[mouth_x] = [gray[mouth_y:mouth_y + mouth_h, mouth_x:mouth_x + mouth_w]] 
                
                # difference between top lip (y) and bottom lip (y)
                mouth_open_close_diff[mouth_x] = abs(landmarks.part(62).y - landmarks.part(66).y) 

            else:
                # Append to the nearest mouth_x list
                nearest_x = [99999,0]

                for mouth in all_face:
                    difference = abs(mouth - mouth_x)
                    if (difference < nearest_x[0]):
                        nearest_x[0] = difference # difference between two mouth x coordinate
                        nearest_x[1] = mouth # key

                # Append the frame to the nearest mouth (key)
                all_face[nearest_x[1]].append(gray[mouth_y:mouth_y + mouth_h, mouth_x:mouth_x + mouth_w]) # mouth region
                
                # add the difference between top lip (y) and bottom lip (y)
                mouth_open_close_diff[nearest_x[1]] = mouth_open_close_diff[nearest_x[1]] + abs(landmarks.part(62).y - landmarks.part(66).y)                                          


                # update the mouth x coordinate (key) with the latest position
                all_face[mouth_x] = all_face.pop(nearest_x[1])
                mouth_open_close_diff[mouth_x] = mouth_open_close_diff.pop(nearest_x[1])

        first_frame = False
    
    # Determine the current speaker by the difference between mouth top and bottom position across frame
    # The speaker will have the largest the difference across frame because they will open and close their mouth
    speaker_key = max(mouth_open_close_diff, key=mouth_open_close_diff.get)

    # check number of frame. Return empty list when too many missing frame
    if len(all_face[speaker_key]) <= missing_frame_thres:
        return []

    # Resize frame
    for mouth_roi in all_face[speaker_key]:
        if mouth_roi.shape[1] < fixed_width or mouth_roi.shape[0] < fixed_height:
            interpolation_set = cv2.INTER_CUBIC # For upsampling
        else:
            interpolation_set = cv2.INTER_AREA # For downsampling

        resized_mouth_roi = cv2.resize(mouth_roi, (fixed_width, fixed_height), interpolation=interpolation_set)

        # Normalize the resized frame by dividing it with 255 so that each pixel value then lies between 0 and 1
        normalized_frame = resized_mouth_roi / 255

        processed_frame.append(normalized_frame)

    return processed_frame



## Audio Processing


In [12]:
def generate_spectrogram(audio_signal, sampling_rate, start_time, end_time):
    
    # Extract the desired clip
    clip = audio_signal[int(start_time*sampling_rate):int(end_time*sampling_rate)]
    
    # Compute the spectrogram
    Mel_spectrogram = librosa.feature.melspectrogram(y=clip)
    
    log_spectrogram = librosa.power_to_db(Mel_spectrogram, ref=np.max)

        
    return log_spectrogram


## Start preprocessing

In [13]:
# Create folder to output processed data
import os

folder_path = '/kaggle/working/'

for item in ["REAL","FAKE"]:
    temp_path = folder_path+item
    
    if not os.path.exists(temp_path):
        os.mkdir(temp_path)


In [None]:
video_counter = 0
total_video = len(dataset)

for file in dataset:    
    video_counter = video_counter + 1
    print("Processing File: ", file, f"  |   Progress---> {video_counter}/{total_video}")
    video_path = f"{path}/{file}"
e
    # load the video file
    video = mp.VideoFileClip(video_path)

    audio = video.audio

    # Save the audio as a temporary WAV file
    with tempfile.NamedTemporaryFile(suffix=".wav", delete=True) as tmp_audio_file:
        audio.write_audiofile(tmp_audio_file.name)
        tmp_audio = tmp_audio_file.name

        # get word alignment result
        alignment_result = getWordAlignment(tmp_audio)
        
        # Load the audio using librosa
        y, sr = librosa.load(tmp_audio)
        
    # Get start and end timestamp for each word in the audio file
    word_timestamp = getTimestamp(alignment_result)

    for timestamp in word_timestamp:
        # ensure the timestamp is not empty
        if (timestamp[0] == None ):
            continue
            
        print(f"Timestamp -> {timestamp[0]} - {timestamp[1]}")
        # Read video MP4 File
        cap = cv2.VideoCapture(video_path)
        fps = cap.get(cv2.CAP_PROP_FPS)
        print("FPS: ", fps)

        # timestamp[0] -> start time | timestamp[1] -> end time
        processed_video = mouth_extractor(cap, timestamp[0], timestamp[1], fps)

        # Process and convert audio data to log mel spectrogram
        processed_audio = generate_spectrogram(y, sr, timestamp[0], timestamp[1])
        print('Shape: ', processed_audio.shape)
        
        if not processed_video:
            print("empty frame")
            continue

        # obtain the filename
        filename = os.path.splitext(file)[0]

        # Determine if the MP4 is real or fake
        label = metadata[metadata["file_name"] == file]["label"].values[0]
        
        # Create new folder for the data
        folder_path = f"/kaggle/working/{label}/{filename}_{timestamp[0]}_{timestamp[1]}/"
        os.mkdir(folder_path)
        
        np.save(folder_path+"video", processed_video)
        np.save(folder_path+"audio", processed_audio)

        print("="*100)
        print(f"Saved Data")
        print("="*100)

    cap.release()


Processing File:  jjycletzyl.mp4   |   Progress---> 1/230
MoviePy - Writing audio in /tmp/tmpm3a06xx3.wav


                                                                   

MoviePy - Done.
Detected language: en (0.99) in first 30s of audio...


Downloading: "https://download.pytorch.org/torchaudio/models/wav2vec2_fairseq_base_ls960_asr_ls960.pth" to /root/.cache/torch/hub/checkpoints/wav2vec2_fairseq_base_ls960_asr_ls960.pth
100%|██████████| 360M/360M [00:01<00:00, 259MB/s]  


[{'start': 0.049, 'end': 7.041, 'text': ' So, the movie did a good job of showing what happened, why it happened, and his role in it and that they were freed and how they were freed.', 'words': [{'word': 'So,', 'start': 0.049, 'end': 0.089, 'score': 0.012}, {'word': 'the', 'start': 0.65, 'end': 0.75, 'score': 0.903}, {'word': 'movie', 'start': 0.77, 'end': 0.991, 'score': 0.565}, {'word': 'did', 'start': 1.011, 'end': 1.111, 'score': 0.705}, {'word': 'a', 'start': 1.131, 'end': 1.151, 'score': 0.582}, {'word': 'good', 'start': 1.191, 'end': 1.311, 'score': 0.916}, {'word': 'job', 'start': 1.331, 'end': 1.572, 'score': 0.872}, {'word': 'of', 'start': 1.612, 'end': 1.652, 'score': 0.759}, {'word': 'showing', 'start': 1.692, 'end': 1.932, 'score': 0.872}, {'word': 'what', 'start': 1.972, 'end': 2.113, 'score': 0.867}, {'word': 'happened,', 'start': 2.173, 'end': 2.573, 'score': 0.882}, {'word': 'why', 'start': 2.674, 'end': 2.914, 'score': 0.764}, {'word': 'it', 'start': 2.974, 'end': 3.0

                                                                    

MoviePy - Done.
Detected language: en (0.98) in first 30s of audio...
[{'start': 5.353, 'end': 9.323, 'text': ' something that I think we should dwell on but I do feel that', 'words': [{'word': 'something', 'start': 5.353, 'end': 5.634, 'score': 0.542}, {'word': 'that', 'start': 5.674, 'end': 5.834, 'score': 0.926}, {'word': 'I', 'start': 5.914, 'end': 5.975, 'score': 0.977}, {'word': 'think', 'start': 6.035, 'end': 6.255, 'score': 0.888}, {'word': 'we', 'start': 6.275, 'end': 6.336, 'score': 0.082}, {'word': 'should', 'start': 6.396, 'end': 6.636, 'score': 0.741}, {'word': 'dwell', 'start': 6.737, 'end': 7.097, 'score': 0.722}, {'word': 'on', 'start': 7.338, 'end': 7.458, 'score': 0.936}, {'word': 'but', 'start': 7.559, 'end': 7.779, 'score': 0.871}, {'word': 'I', 'start': 7.92, 'end': 8.02, 'score': 0.943}, {'word': 'do', 'start': 8.16, 'end': 8.381, 'score': 0.996}, {'word': 'feel', 'start': 8.501, 'end': 8.802, 'score': 0.992}, {'word': 'that', 'start': 8.822, 'end': 9.323, 'score'

                                                                    

MoviePy - Done.




Detected language: en (0.99) in first 30s of audio...
[{'start': 0.63, 'end': 8.216, 'text': " When you say what are the easiest, I mean cheapest, places to eat in town, I think what you really mean is where can I go out to eat and it's really cheap.", 'words': [{'word': 'When', 'start': 0.63, 'end': 0.75, 'score': 0.83}, {'word': 'you', 'start': 0.79, 'end': 0.87, 'score': 0.895}, {'word': 'say', 'start': 0.91, 'end': 1.03, 'score': 0.883}, {'word': 'what', 'start': 1.05, 'end': 1.19, 'score': 0.57}, {'word': 'are', 'start': 1.23, 'end': 1.31, 'score': 0.693}, {'word': 'the', 'start': 1.33, 'end': 1.41, 'score': 0.836}, {'word': 'easiest,', 'start': 1.55, 'end': 2.051, 'score': 0.771}, {'word': 'I', 'start': 2.171, 'end': 2.211, 'score': 0.787}, {'word': 'mean', 'start': 2.251, 'end': 2.431, 'score': 0.823}, {'word': 'cheapest,', 'start': 2.511, 'end': 3.032, 'score': 0.779}, {'word': 'places', 'start': 3.132, 'end': 3.452, 'score': 0.917}, {'word': 'to', 'start': 3.492, 'end': 3.572,

                                                                    

MoviePy - Done.




Detected language: en (0.91) in first 30s of audio...
[{'start': 0.214, 'end': 6.342, 'text': ' And I know the teenagers, they play more and they spend a lot of time.', 'words': [{'word': 'And', 'start': 0.214, 'end': 0.535, 'score': 0.631}, {'word': 'I', 'start': 0.975, 'end': 1.055, 'score': 0.765}, {'word': 'know', 'start': 1.095, 'end': 1.396, 'score': 0.763}, {'word': 'the', 'start': 1.476, 'end': 1.876, 'score': 0.928}, {'word': 'teenagers,', 'start': 2.076, 'end': 2.637, 'score': 0.52}, {'word': 'they', 'start': 2.657, 'end': 2.897, 'score': 0.644}, {'word': 'play', 'start': 2.957, 'end': 3.158, 'score': 0.348}, {'word': 'more', 'start': 3.218, 'end': 4.339, 'score': 0.821}, {'word': 'and', 'start': 4.359, 'end': 4.419, 'score': 0.003}, {'word': 'they', 'start': 5.0, 'end': 5.18, 'score': 0.354}, {'word': 'spend', 'start': 5.2, 'end': 5.46, 'score': 0.543}, {'word': 'a', 'start': 5.481, 'end': 5.521, 'score': 0.051}, {'word': 'lot', 'start': 5.541, 'end': 5.721, 'score': 0.629},

                                                                    

MoviePy - Done.




Detected language: en (0.98) in first 30s of audio...
[{'start': 0.189, 'end': 9.986, 'text': " you so cats cats are a lot easier I'm just saying that they're a lot easier I mean how many times has a cat come over and just said oh give me", 'words': [{'word': 'you', 'start': 0.189, 'end': 0.31, 'score': 0.841}, {'word': 'so', 'start': 0.67, 'end': 0.81, 'score': 0.876}, {'word': 'cats', 'start': 0.87, 'end': 1.231, 'score': 0.763}, {'word': 'cats', 'start': 1.732, 'end': 1.912, 'score': 0.98}, {'word': 'are', 'start': 1.972, 'end': 2.093, 'score': 0.396}, {'word': 'a', 'start': 2.113, 'end': 2.133, 'score': 0.707}, {'word': 'lot', 'start': 2.173, 'end': 2.353, 'score': 0.988}, {'word': 'easier', 'start': 2.413, 'end': 2.734, 'score': 0.834}, {'word': "I'm", 'start': 2.834, 'end': 2.934, 'score': 0.962}, {'word': 'just', 'start': 2.974, 'end': 3.134, 'score': 0.868}, {'word': 'saying', 'start': 3.154, 'end': 3.455, 'score': 0.808}, {'word': 'that', 'start': 3.595, 'end': 3.715, 'score':

                                                        

MoviePy - Done.
Detected language: en (0.97) in first 30s of audio...
[{'start': 6.014, 'end': 8.347, 'text': ' If you want a tattoo, express yourself with it.', 'words': [{'word': 'If', 'start': 6.014, 'end': 6.074, 'score': 0.492}, {'word': 'you', 'start': 6.335, 'end': 6.516, 'score': 0.81}, {'word': 'want', 'start': 6.537, 'end': 6.617, 'score': 0.01}, {'word': 'a', 'start': 6.718, 'end': 6.838, 'score': 0.505}, {'word': 'tattoo,', 'start': 6.979, 'end': 7.301, 'score': 0.619}, {'word': 'express', 'start': 7.401, 'end': 7.804, 'score': 0.81}, {'word': 'yourself', 'start': 7.844, 'end': 8.166, 'score': 0.939}, {'word': 'with', 'start': 8.206, 'end': 8.286, 'score': 0.21}, {'word': 'it.', 'start': 8.307, 'end': 8.347, 'score': 0.0}]}]
Timestamp -> 6.014 - 6.074
FPS:  29.97
Shape:  (128, 3)
Saved Data
Timestamp -> 6.335 - 6.516
FPS:  29.97
Shape:  (128, 8)
Saved Data
Timestamp -> 6.537 - 6.617
FPS:  29.97
Shape:  (128, 4)
Saved Data
Timestamp -> 6.718 - 6.838
FPS:  29.97
Shape:  (128,

                                                                    

MoviePy - Done.




Detected language: en (0.97) in first 30s of audio...
[{'start': 0.069, 'end': 7.141, 'text': " So I'm like, oh my God, and then as soon as we get to the point where I get the next gas station, I'm stuck in traffic.", 'words': [{'word': 'So', 'start': 0.069, 'end': 0.31, 'score': 0.772}, {'word': "I'm", 'start': 0.55, 'end': 0.991, 'score': 0.525}, {'word': 'like,', 'start': 1.031, 'end': 1.351, 'score': 0.372}, {'word': 'oh', 'start': 1.532, 'end': 1.652, 'score': 0.746}, {'word': 'my', 'start': 1.692, 'end': 1.872, 'score': 0.882}, {'word': 'God,', 'start': 1.912, 'end': 2.193, 'score': 0.944}, {'word': 'and', 'start': 2.293, 'end': 2.373, 'score': 0.885}, {'word': 'then', 'start': 2.413, 'end': 2.593, 'score': 0.696}, {'word': 'as', 'start': 2.694, 'end': 2.814, 'score': 0.372}, {'word': 'soon', 'start': 2.874, 'end': 3.174, 'score': 0.767}, {'word': 'as', 'start': 3.234, 'end': 3.295, 'score': 0.275}, {'word': 'we', 'start': 3.315, 'end': 3.415, 'score': 0.71}, {'word': 'get', 'sta

                                                                    

MoviePy - Done.




Detected language: en (0.95) in first 30s of audio...
[{'start': 0.069, 'end': 4.917, 'text': ' really a great idea at the current moment due to the fact that technology is still evolving.', 'words': [{'word': 'really', 'start': 0.069, 'end': 0.269, 'score': 0.539}, {'word': 'a', 'start': 0.289, 'end': 0.41, 'score': 0.646}, {'word': 'great', 'start': 0.45, 'end': 0.77, 'score': 0.7}, {'word': 'idea', 'start': 0.87, 'end': 1.231, 'score': 0.65}, {'word': 'at', 'start': 1.331, 'end': 1.391, 'score': 0.746}, {'word': 'the', 'start': 1.431, 'end': 1.512, 'score': 0.979}, {'word': 'current', 'start': 1.552, 'end': 1.812, 'score': 0.404}, {'word': 'moment', 'start': 1.872, 'end': 2.153, 'score': 0.877}, {'word': 'due', 'start': 2.173, 'end': 2.313, 'score': 0.298}, {'word': 'to', 'start': 2.333, 'end': 2.453, 'score': 0.633}, {'word': 'the', 'start': 2.473, 'end': 2.533, 'score': 0.03}, {'word': 'fact', 'start': 2.573, 'end': 2.834, 'score': 0.611}, {'word': 'that', 'start': 2.874, 'end': 3

                                                                    

MoviePy - Done.
Detected language: en (1.00) in first 30s of audio...
[{'start': 0.029, 'end': 7.742, 'text': ' Going back to eating healthy, being active, calorie deficit, if you want to lose weight', 'words': [{'word': 'Going', 'start': 0.029, 'end': 0.289, 'score': 0.902}, {'word': 'back', 'start': 0.33, 'end': 0.57, 'score': 0.985}, {'word': 'to', 'start': 0.67, 'end': 1.071, 'score': 0.756}, {'word': 'eating', 'start': 1.932, 'end': 2.173, 'score': 0.868}, {'word': 'healthy,', 'start': 2.233, 'end': 2.694, 'score': 0.858}, {'word': 'being', 'start': 5.198, 'end': 5.398, 'score': 0.839}, {'word': 'active,', 'start': 5.518, 'end': 5.899, 'score': 0.889}, {'word': 'calorie', 'start': 5.919, 'end': 6.58, 'score': 0.424}, {'word': 'deficit,', 'start': 6.6, 'end': 6.901, 'score': 0.397}, {'word': 'if', 'start': 7.001, 'end': 7.101, 'score': 0.727}, {'word': 'you', 'start': 7.121, 'end': 7.181, 'score': 0.001}, {'word': 'want', 'start': 7.201, 'end': 7.301, 'score': 0.352}, {'word': 'to'

                                                                    

MoviePy - Done.
Detected language: en (0.95) in first 30s of audio...
[{'start': 0.049, 'end': 5.939, 'text': " is fun, not only to kill time, but it's kind of a sport.", 'words': [{'word': 'is', 'start': 0.049, 'end': 0.169, 'score': 0.664}, {'word': 'fun,', 'start': 0.33, 'end': 0.69, 'score': 0.728}, {'word': 'not', 'start': 1.351, 'end': 1.672, 'score': 0.784}, {'word': 'only', 'start': 1.732, 'end': 1.972, 'score': 0.647}, {'word': 'to', 'start': 1.992, 'end': 2.273, 'score': 0.877}, {'word': 'kill', 'start': 2.413, 'end': 2.674, 'score': 0.974}, {'word': 'time,', 'start': 2.714, 'end': 3.094, 'score': 0.922}, {'word': 'but', 'start': 3.114, 'end': 3.174, 'score': 0.0}, {'word': "it's", 'start': 3.194, 'end': 5.058, 'score': 0.628}, {'word': 'kind', 'start': 5.138, 'end': 5.338, 'score': 0.607}, {'word': 'of', 'start': 5.358, 'end': 5.398, 'score': 0.28}, {'word': 'a', 'start': 5.478, 'end': 5.558, 'score': 0.678}, {'word': 'sport.', 'start': 5.578, 'end': 5.939, 'score': 0.812}]}

                                                        

MoviePy - Done.




Detected language: en (0.92) in first 30s of audio...
[{'start': 0.049, 'end': 5.418, 'text': " side of school, which is actually causing the struggle as to how they're going to eat, how they're going to receive.", 'words': [{'word': 'side', 'start': 0.049, 'end': 0.43, 'score': 0.882}, {'word': 'of', 'start': 0.83, 'end': 0.931, 'score': 0.871}, {'word': 'school,', 'start': 0.991, 'end': 1.391, 'score': 0.744}, {'word': 'which', 'start': 1.472, 'end': 1.712, 'score': 0.873}, {'word': 'is', 'start': 1.772, 'end': 1.852, 'score': 0.589}, {'word': 'actually', 'start': 1.972, 'end': 2.293, 'score': 0.552}, {'word': 'causing', 'start': 2.313, 'end': 2.634, 'score': 0.444}, {'word': 'the', 'start': 2.654, 'end': 2.714, 'score': 0.02}, {'word': 'struggle', 'start': 2.734, 'end': 3.034, 'score': 0.908}, {'word': 'as', 'start': 3.094, 'end': 3.255, 'score': 0.625}, {'word': 'to', 'start': 3.375, 'end': 3.615, 'score': 0.618}, {'word': 'how', 'start': 3.675, 'end': 3.836, 'score': 0.906}, {'wor

                                                                    

MoviePy - Done.




Detected language: en (0.95) in first 30s of audio...
[{'start': 0.31, 'end': 1.351, 'text': ' at the current moment.', 'words': [{'word': 'at', 'start': 0.31, 'end': 0.39, 'score': 0.667}, {'word': 'the', 'start': 0.41, 'end': 0.49, 'score': 0.846}, {'word': 'current', 'start': 0.53, 'end': 0.85, 'score': 0.928}, {'word': 'moment.', 'start': 0.891, 'end': 1.351, 'score': 0.81}]}, {'start': 1.431, 'end': 4.296, 'text': "There's a black car standing right in front of me.", 'words': [{'word': "There's", 'start': 1.431, 'end': 1.772, 'score': 0.823}, {'word': 'a', 'start': 1.792, 'end': 1.812, 'score': 0.001}, {'word': 'black', 'start': 2.313, 'end': 2.593, 'score': 0.948}, {'word': 'car', 'start': 2.633, 'end': 2.974, 'score': 0.942}, {'word': 'standing', 'start': 2.994, 'end': 3.375, 'score': 0.402}, {'word': 'right', 'start': 3.395, 'end': 3.595, 'score': 0.466}, {'word': 'in', 'start': 3.615, 'end': 3.655, 'score': 0.193}, {'word': 'front', 'start': 3.795, 'end': 3.976, 'score': 0.778

                                                                    

MoviePy - Done.
Detected language: en (0.99) in first 30s of audio...
[{'start': 1.993, 'end': 2.615, 'text': ' Natural.', 'words': [{'word': 'Natural.', 'start': 1.993, 'end': 2.615, 'score': 0.921}]}, {'start': 3.777, 'end': 4.759, 'text': 'Surprised.', 'words': [{'word': 'Surprised.', 'start': 3.777, 'end': 4.759, 'score': 0.912}]}, {'start': 7.105, 'end': 7.786, 'text': 'Natural.', 'words': [{'word': 'Natural.', 'start': 7.105, 'end': 7.786, 'score': 0.837}]}, {'start': 9.309, 'end': 9.911, 'text': 'Open mouth.', 'words': [{'word': 'Open', 'start': 9.309, 'end': 9.59, 'score': 0.942}, {'word': 'mouth.', 'start': 9.65, 'end': 9.911, 'score': 0.928}]}]
Timestamp -> 1.993 - 2.615
FPS:  29.97
Shape:  (128, 27)
Saved Data
Timestamp -> 3.777 - 4.759
FPS:  29.97
Shape:  (128, 43)
Saved Data
Timestamp -> 7.105 - 7.786
FPS:  29.97
Shape:  (128, 30)
Saved Data
Timestamp -> 9.309 - 9.59
FPS:  29.97
Shape:  (128, 13)
Saved Data
Timestamp -> 9.65 - 9.911
FPS:  29.97
Shape:  (128, 12)
Saved Data

                                                                    

MoviePy - Done.




Detected language: en (0.98) in first 30s of audio...
[{'start': 0.009, 'end': 10.026, 'text': ' Caucasian and African American so you do see on the things that they go through and the things that they experience in in America I do believe that we should shed light', 'words': [{'word': 'Caucasian', 'start': 0.009, 'end': 0.57, 'score': 0.728}, {'word': 'and', 'start': 0.59, 'end': 0.67, 'score': 0.166}, {'word': 'African', 'start': 0.79, 'end': 1.131, 'score': 0.593}, {'word': 'American', 'start': 1.151, 'end': 1.612, 'score': 0.912}, {'word': 'so', 'start': 1.652, 'end': 1.832, 'score': 0.94}, {'word': 'you', 'start': 1.872, 'end': 2.012, 'score': 0.806}, {'word': 'do', 'start': 2.032, 'end': 2.273, 'score': 0.845}, {'word': 'see', 'start': 2.393, 'end': 2.674, 'score': 0.782}, {'word': 'on', 'start': 3.054, 'end': 3.134, 'score': 0.983}, {'word': 'the', 'start': 3.174, 'end': 3.275, 'score': 0.986}, {'word': 'things', 'start': 3.335, 'end': 3.595, 'score': 0.929}, {'word': 'that', 's

                                                        

MoviePy - Done.




Detected language: en (0.98) in first 30s of audio...
[{'start': 1.572, 'end': 9.866, 'text': ' I just feel like bad diets have become a marketing gimmick because people nowadays with technology I feel that plays a huge part of it.', 'words': [{'word': 'I', 'start': 1.572, 'end': 1.732, 'score': 0.213}, {'word': 'just', 'start': 1.772, 'end': 1.972, 'score': 0.57}, {'word': 'feel', 'start': 1.992, 'end': 3.295, 'score': 0.428}, {'word': 'like', 'start': 3.335, 'end': 3.495, 'score': 0.88}, {'word': 'bad', 'start': 3.595, 'end': 3.795, 'score': 0.693}, {'word': 'diets', 'start': 3.835, 'end': 4.076, 'score': 0.49}, {'word': 'have', 'start': 4.096, 'end': 4.216, 'score': 0.271}, {'word': 'become', 'start': 4.256, 'end': 4.517, 'score': 0.826}, {'word': 'a', 'start': 4.537, 'end': 4.557, 'score': 0.971}, {'word': 'marketing', 'start': 4.617, 'end': 4.997, 'score': 0.837}, {'word': 'gimmick', 'start': 5.038, 'end': 5.438, 'score': 0.782}, {'word': 'because', 'start': 5.619, 'end': 5.959, '

## Zip folder to download

In [None]:
# !zip -r FAKE_02.zip /kaggle/working/FAKE

In [None]:
import shutil

def remove_directory_and_contents(directory_path):
    try:
        shutil.rmtree(directory_path)
        print(f"Successfully removed the directory and all its contents: {directory_path}")
    except Exception as e:
        print(f"Failed to remove the directory {directory_path}. Reason: {e}")

# Example usage
directory_path = "/kaggle/working/"
remove_directory_and_contents(directory_path)

In [None]:
!zip -r REAL_GS_02.zip /kaggle/working/REAL