In [4]:
# Import important libraries 
import pickle
import numpy as np
import gdown

In [5]:
# Google Drive file_ids of commercial and non-commercial audio-video features
commercial_Rgb_Features_file_id          = "1SS1P4E_4i4B5ZPVF5TkoZKGE2hixJZeB"
commercial_Audio_Features_file_id        = "14ENBMvm1EfW98ZgXpcfFE7IJ8dZe_-m1"
nonCommercial_rgb_features_file_id       = "1SWvWM5YEDdBmfizxoNKVFEX7EU6hjqkx"
nonCommercial_Audio_features_file_id     = "1zXyn55QkMPz0wDWoqM9bgTqDCbhPO_-f"
# Destination paths
commercial_Rgb_Features_destination      = "commercial_Rgb.pkl"  
commercial_Audio_Features_destination    = "commercial_Audio.pkl"
nonCommercial_Rgb_features_destination   = "non_Commercial_Rgb.pkl"
nonCommercial_Audio_features_destination = "non_Commercial_Audio.pkl"

In [6]:
def download_file_from_google_drive(file_id, destination):
    """
    Download a file from Google Drive.

    Parameters:
    - file_id (str): The unique file ID from Google Drive.
    - destination (str): The local path where the file should be saved.
    """
    url = f"https://drive.google.com/uc?id={file_id}"
    gdown.download(url, destination, quiet=False)

In [7]:
# Downloading commercial_Rgb_Features
download_file_from_google_drive(commercial_Rgb_Features_file_id, commercial_Rgb_Features_destination)

# Downloading commercial_Audio_Features
download_file_from_google_drive(commercial_Audio_Features_file_id, commercial_Audio_Features_destination)

# Downloading nonCommercial_Rgb_features
download_file_from_google_drive(nonCommercial_rgb_features_file_id, nonCommercial_Rgb_features_destination)

# Downloading nonCommercial_Audio_features
download_file_from_google_drive(nonCommercial_Audio_features_file_id, nonCommercial_Audio_features_destination)

Downloading...
From (original): https://drive.google.com/uc?id=1SS1P4E_4i4B5ZPVF5TkoZKGE2hixJZeB
From (redirected): https://drive.google.com/uc?id=1SS1P4E_4i4B5ZPVF5TkoZKGE2hixJZeB&confirm=t&uuid=cdbad2a6-7c8c-4121-a16d-453587b57c6e
To: /Users/Aryan/Documents/Projects/2024_GSoC@BeagleBoard.org/gsoc_2024-enhanced_media_experience_with_ai-powered_commercial_detection_and_replacement/Model/datasetPreProcessing/commercial_Rgb.pkl
100%|██████████████████████████████████████████████████████████████████████████████████████████| 980M/980M [01:11<00:00, 13.8MB/s]
Downloading...
From (original): https://drive.google.com/uc?id=14ENBMvm1EfW98ZgXpcfFE7IJ8dZe_-m1
From (redirected): https://drive.google.com/uc?id=14ENBMvm1EfW98ZgXpcfFE7IJ8dZe_-m1&confirm=t&uuid=5e7d2645-8777-4ff1-9b98-32a9e43546f3
To: /Users/Aryan/Documents/Projects/2024_GSoC@BeagleBoard.org/gsoc_2024-enhanced_media_experience_with_ai-powered_commercial_detection_and_replacement/Model/datasetPreProcessing/commercial_Audio.pkl
100%|██

In [8]:
# Function for loading Audio-Visual Features of Commercial and Non-Commercial Videos
def load_features():
    """
    Load data from a pickled file if it exists, otherwise initialize an empty list.

    Parameters:
    - filename (str): The path to the pickled file.
    Returns:
    - list: The loaded data from the file, or an empty list if the file doesn't exist.
    """
    with open('commercial_Rgb.pkl', 'rb') as f:
        commercial_loaded_rgb = pickle.load(f)
    with open('commercial_Audio.pkl', 'rb') as f:
        commercial_loaded_audio = pickle.load(f)
        
    with open('non_Commercial_Rgb.pkl', 'rb') as f:
        non_commercial_loaded_rgb = pickle.load(f)
    with open('non_Commercial_Audio.pkl', 'rb') as f:
        non_commercial_loaded_audio = pickle.load(f)
        
    print(f'Number of rgb features: {len(commercial_loaded_rgb)}')
    print(f'Number of audio features: {len(commercial_loaded_audio)}')
    print(f'Number of rgb features: {len(non_commercial_loaded_rgb)}')
    print(f'Number of audio features: {len(non_commercial_loaded_audio)}')
    print("Audio-Visual Features Loaded Successfully")
    
    return commercial_loaded_rgb, commercial_loaded_audio, non_commercial_loaded_rgb, non_commercial_loaded_audio

In [9]:
commercialRgb, commercialAudio, nonCommercialRgb, nonCommercialAudio = load_features()

Number of rgb features: 4364
Number of audio features: 4364
Number of rgb features: 4600
Number of audio features: 4600
Audio-Visual Features Loaded Successfully


In [5]:
# Function for merging audio-visual features
def merge_visual_audio_features(visual_features, audio_features):
    """
    This Function merges visual and audio features for each video.
    Parameters:
    - visual_features: list of length num_videos, where each element is a list of shape (num_frames_visual[i], 1024).
    - audio_features: list of length num_videos, where each element is a list of shape (num_frames_audio[i], 128).
    - num_frames_visual[i] = num_frames_audio[i]
    Returns:
    - merged_features: list of length num_videos, where each element is a numpy array of shape (max_num_frames, 1152).

    Note:
    - num_videos is the number of audio-visual features = ( len(visual_features) || len(audio_features) ) = 4364
    """
    merged_features = []

    for visual, audio in zip(visual_features, audio_features):
        # Convert each list to numpy arrays
        visual_array = np.array(visual)
        audio_array = np.array(audio)

        # Concatenate visual and audio features for this video
        merged_video_features = np.concatenate((visual_array, audio_array), axis=1)
        merged_features.append(merged_video_features)

    return merged_features

In [5]:
commercialFeatures = merge_visual_audio_features(commercialRgb, commercialAudio)

In [6]:
nonCommercialFeatures = merge_visual_audio_features(nonCommercialRgb, nonCommercialAudio)

In [7]:
# 1024(Visual frame features) + 128(Audio Frame features) = 1152(Visual + Audio Frame features)
print(f'Length of each commercialFeatures video-frame is {len(commercialFeatures)}')
print(f'Length of each nonCommercialFeatures video-frame is {len(nonCommercialFeatures)}')

Length of each commercialFeatures video-frame is 1152
Length of each nonCommercialFeatures video-frame is 1152


In [12]:
# Saving the merged Commercial features 
with open('commercialFeatures.pkl', 'wb') as f:
    pickle.dump(commercialFeatures, f)

In [8]:
# Saving the merged nonCommercial features 
with open('nonCommercialFeatures.pkl', 'wb') as f:
    pickle.dump(nonCommercialFeatures, f)

# Verifying the merging

### Commercial Features

In [6]:
print(f'Length of each commercialFeatures video-frame is {len(commercialFeatures)}')

Length of each commercialFeatures video-frame is 4364


In [8]:
print(commercialFeatures[20][15])

[115  73  89 ... 165 249 255]


In [9]:
print(commercialRgb[20][15])

tf.Tensor([115  73  89 ... 109 116 169], shape=(1024,), dtype=uint8)


In [10]:
print(commercialAudio[20][15][100:])

tf.Tensor(
[118  69  70   0  58 100  98 152 103 139 122  83 146 148   0 147 154 102
  93 129 255  73  32 247 233 165 249 255], shape=(28,), dtype=uint8)


In [19]:
print(len(commercialRgb[19]))

300


In [20]:
print(len(commercialFeatures[19]))

300


### Non-Commercial Features

In [15]:
print(f'Length of each nonCommercialFeatures video-frame is {len(nonCommercialFeatures)}')

Length of each nonCommercialFeatures video-frame is 4600


In [9]:
print(nonCommercialFeatures[20][15])

[167 160  89 ...  83 120 255]


In [10]:
print(nonCommercialRgb[20][15])

tf.Tensor([167 160  89 ... 191  15 146], shape=(1024,), dtype=uint8)


In [11]:
print(nonCommercialAudio[20][15][100:])

tf.Tensor(
[ 83 170 139 194  97 181 124 194 219 165 255   0   0 140 126 137 228  47
 156  45 100 164 133 193 226  83 120 255], shape=(28,), dtype=uint8)


In [12]:
print(len(nonCommercialRgb[19]))

148


In [13]:
print(len(nonCommercialFeatures[19]))

148
