In [1]:
import os
import sys
import cv2
import librosa
import numpy as np
import tensorflow as tf
import tensorflow_hub as hub
from moviepy.editor import VideoFileClip
import imageio
import matplotlib.pyplot as plt
import wave
import resampy
import soundfile as sf
import sounddevice as sd

In [2]:
img_height, img_width = 299, 299

In [3]:
# Load pre-trained models
inception_model = tf.keras.applications.InceptionV3(include_top=False, weights='imagenet',pooling='avg' ,input_shape=(img_height, img_width, 3))
audiovgg_model = hub.load('https://kaggle.com/models/google/vggish/frameworks/TensorFlow2/variations/vggish/versions/1')


In [11]:
def preprocess_frame(frame):
    # Resize frame to (299, 299)
    resized_frame = cv2.resize(frame, (299, 299))
    # Scale pixel values to the range [0, 1]
    processed_frame = resized_frame.astype("float32") / 255.0
    # Expand dimensions to match the expected shape
    processed_frame = np.expand_dims(processed_frame, axis=0)
    return processed_frame

def preprocess_audio(audio):
    try:
        audio_data = audio.to_soundarray()
        
        # Check for NaN or Infinite values and handle them
        audio_data[np.isnan(audio_data) | np.isinf(audio_data)] = 0
        
        # Convert stereo to mono
        if len(audio_data.shape) > 1:
            audio_data = np.mean(audio_data, axis=1)
        
        # Normalize audio to range [-1.0, +1.0]
        max_abs_value = np.max(np.abs(audio_data))
        if max_abs_value > 0:
            audio_data /= max_abs_value

        # Pad if duration is less than 0.96 seconds
        target_length = int(audio.fps * 0.96)
        audio_data = np.pad(audio_data, (0, max(0, target_length - len(audio_data))), mode='constant')

        # Resample the audio to the target sample rate
        preprocessed_audio = librosa.resample(y=audio_data, orig_sr=audio.fps, target_sr=44100)

        return preprocessed_audio.astype(np.float32)

    except Exception as e:
        print("Error preprocessing audio:", e)
        return np.zeros((44100,), dtype=np.float32)

In [12]:
# Function to extract frames and audio from a video segment
def extract_frames_and_audio(video, output_dir, start_time, end_time, segment_index,clip_index):
    if end_time - start_time <= 0:
        return None, None  # Return None for both visual and audio if segment duration is invalid
    
    frame = video.get_frame((start_time+end_time)/2)
    #plt.imshow(frame)
    #plt.axis('off')  
    #plt.show()
    
    audio = video.subclip(start_time, end_time).audio
    if audio is None:
        return inception_model.predict(preprocess_frame(frame), verbose=0), None  # Return None for audio if it's not available
    if frame is None:
        print('NO FRAME HERE !!!!!!!!!!!')
    
    #audio_np = np.array(audio.to_soundarray())
    #sample_rate = int(audio.fps)  
    #sd.play(audio_np, samplerate=sample_rate)

    return inception_model.predict(preprocess_frame(frame), verbose=0),audiovgg_model(preprocess_audio(audio))[0]

In [13]:
def extract_visual_and_audio_features(folder_path, output_dir, output_filename, segment_duration=1, num_segments=10, clip_duration=10):
    VAn=0
    VnA=0
    VnAn=0
    VA=0

    # Loop through each video file in the folder
    for filename in os.listdir(folder_path):
        if filename.endswith('.mp4'):  # Assuming all video files are in mp4 format
            # Initialize a list to store visual and audio data
            all_visual_and_audio_data = []
                        
            video_path = os.path.join(folder_path, filename)
            # Load the video file
            video = VideoFileClip(video_path)

            videoName=os.path.splitext(os.path.basename(video.filename))[0]
            print('Video : ',videoName)
            # Total duration of the video clip
            total_duration = video.duration
            
            # Number of clips to extract
            total_clips = int(total_duration // clip_duration)
            remaining_duration = total_duration % clip_duration
            
            if remaining_duration > 0:
                total_clips += 1
             
            # Determine the rounded clip duration
            rounded_clip_duration = int(total_duration / total_clips)
            
            # Loop through each complete clip
            for clip_index in range(total_clips):
                # Set the start and end time of the clip
                start_time = clip_index * rounded_clip_duration
                end_time = min(start_time + rounded_clip_duration, total_duration)
                 
                # Initialize array for visual and audio features for this clip
                clip_visual_and_audio = []

                # Loop through each segment in the clip
                for segment_index in range(num_segments):
                    # Set the start and end time of the segment
                    segment_start_time = start_time + segment_index * segment_duration
                    segment_end_time = min(segment_start_time + segment_duration, end_time)
                    
                    # Extract frames and audio from the segment
                    visual, audio = extract_frames_and_audio(video, output_dir, segment_start_time, segment_end_time, segment_index, clip_index)
                    # Check if visual and audio are None
                    if visual is None and audio is not None:
                        # Set visual to zeros
                        visual_flat = np.zeros((2048,))
                        # Flatten the audio array
                        audio_flat = audio.numpy().flatten()
                        
                        VnA+=1
                    elif visual is not None and audio is None:
                        # Flatten the visual array
                        visual_flat = visual.flatten()
                        # Set audio to zeros
                        audio_flat = np.zeros((128,))
                        
                        VAn+=1
                    elif visual is None and audio is None:
                        # Both visual and audio are None, handle accordingly (here both are set to zeros)
                        visual_flat = np.zeros((2048,))
                        audio_flat = np.zeros((128,))
                        
                        VnAn+=1
                    else:
                        # Both visual and audio are not None, handle accordingly (flattening both)
                        visual_flat = visual.flatten()
                        audio_flat = audio.numpy().flatten()
                        
                        VA+=1
                    # Concatenate visual and audio features and append to clip array
                    visual_and_audio = np.concatenate((visual_flat, audio_flat), axis=0)
                    clip_visual_and_audio.append(visual_and_audio)
                
                # Append visual and audio features for this clip to the main array
                all_visual_and_audio_data.append(clip_visual_and_audio)
                
                        
            # Close the video file
            video.close()
        
            # Convert the list to numpy array
            all_visual_and_audio_data = np.array(all_visual_and_audio_data)
        
            # Save visual and audio data to numpy array
            array_path = os.path.join(output_dir, videoName+'.npy')
            np.save(array_path, all_visual_and_audio_data)
            print(f"Visual and audio data saved to {array_path}, shape is : {all_visual_and_audio_data.shape}")

    total=VnA+VAn+VnAn+VA
    print('Total features traiter : ',str(VnA+VAn+VnAn+VA))
    print('no audio no visual     : ',str(VnAn) ,' Pourcentage : ',str(VnAn/total))
    print('audio and visual       : ',str(VA),' Pourcentage : ',str(VA/total))
    print('visual but no audio    : ',str(VAn),' Pourcentage : ',str(VAn/total))
    print('audio but no visual    : ',str(VnA)),' Pourcentage : ',str(VnA/total)

In [None]:
# Path to the folder containing video files
folder_path = r'A:\AI DB\LSPD\Videos\porn_final'
output_dir = r'A:\AI DB\LSPD\Videos\Features_Porn'
output_filename = 'porn_features.npy'

extract_visual_and_audio_features(folder_path, output_dir, output_filename)

In [14]:
# Path to the folder containing video files
folder_path2 = r'A:\AI DB\LSPD\Videos\normal_final'
output_dir2 = r'A:\AI DB\LSPD\Videos\Features_Normal'
output_filename2 = 'normal_features.npy'

extract_visual_and_audio_features(folder_path2, output_dir2, output_filename2)

Video :  nonporn_1
Visual and audio data saved to A:\AI DB\LSPD\Videos\Features_Normal\nonporn_1.npy, shape is : (2, 10, 2176)
Video :  nonporn_10
Visual and audio data saved to A:\AI DB\LSPD\Videos\Features_Normal\nonporn_10.npy, shape is : (36, 10, 2176)
Video :  nonporn_100
Visual and audio data saved to A:\AI DB\LSPD\Videos\Features_Normal\nonporn_100.npy, shape is : (6, 10, 2176)
Video :  nonporn_1000
Visual and audio data saved to A:\AI DB\LSPD\Videos\Features_Normal\nonporn_1000.npy, shape is : (10, 10, 2176)
Video :  nonporn_1001
Visual and audio data saved to A:\AI DB\LSPD\Videos\Features_Normal\nonporn_1001.npy, shape is : (15, 10, 2176)
Video :  nonporn_1002
Visual and audio data saved to A:\AI DB\LSPD\Videos\Features_Normal\nonporn_1002.npy, shape is : (19, 10, 2176)
Video :  nonporn_1003
Visual and audio data saved to A:\AI DB\LSPD\Videos\Features_Normal\nonporn_1003.npy, shape is : (13, 10, 2176)
Video :  nonporn_1004
Visual and audio data saved to A:\AI DB\LSPD\Videos\Fe



Visual and audio data saved to A:\AI DB\LSPD\Videos\Features_Normal\nonporn_11.npy, shape is : (118, 10, 2176)
Video :  nonporn_110
Visual and audio data saved to A:\AI DB\LSPD\Videos\Features_Normal\nonporn_110.npy, shape is : (3, 10, 2176)
Video :  nonporn_1100
Visual and audio data saved to A:\AI DB\LSPD\Videos\Features_Normal\nonporn_1100.npy, shape is : (12, 10, 2176)
Video :  nonporn_1101
Visual and audio data saved to A:\AI DB\LSPD\Videos\Features_Normal\nonporn_1101.npy, shape is : (7, 10, 2176)
Video :  nonporn_1102
Visual and audio data saved to A:\AI DB\LSPD\Videos\Features_Normal\nonporn_1102.npy, shape is : (19, 10, 2176)
Video :  nonporn_1103
Visual and audio data saved to A:\AI DB\LSPD\Videos\Features_Normal\nonporn_1103.npy, shape is : (2, 10, 2176)
Video :  nonporn_1104
Visual and audio data saved to A:\AI DB\LSPD\Videos\Features_Normal\nonporn_1104.npy, shape is : (4, 10, 2176)
Video :  nonporn_1105
Visual and audio data saved to A:\AI DB\LSPD\Videos\Features_Normal\n

KeyboardInterrupt: 

In [117]:
visual_and_audio_data = np.load(array_path, allow_pickle=True)
num_samples = len(visual_and_audio_data)
labels = np.ones(num_samples)  # Create labels filled with 1 indicating "porno"

In [52]:
print(" visual audio data shape : ",visual_and_audio_data.shape)