In [1]:
pip install tensorflow torch transformers pandas opencv-python-headless google-cloud-storage matplotlib


Collecting torch
  Using cached torch-2.4.1-cp310-cp310-manylinux1_x86_64.whl.metadata (26 kB)
Collecting transformers
  Using cached transformers-4.45.2-py3-none-any.whl.metadata (44 kB)
Collecting opencv-python-headless
  Using cached opencv_python_headless-4.10.0.84-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (20 kB)
Collecting sympy (from torch)
  Using cached sympy-1.13.3-py3-none-any.whl.metadata (12 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.1.105 (from torch)
  Using cached nvidia_cuda_nvrtc_cu12-12.1.105-py3-none-manylinux1_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.1.105 (from torch)
  Using cached nvidia_cuda_runtime_cu12-12.1.105-py3-none-manylinux1_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.1.105 (from torch)
  Using cached nvidia_cuda_cupti_cu12-12.1.105-py3-none-manylinux1_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch)
  Using cached nvidia_cudnn_cu12-9.1.0.70-py3-non

In [2]:
import tensorflow as tf

gpus = tf.config.list_physical_devices('GPU')
if gpus:
    print("GPUs Available:", len(gpus), "-", [gpu.name for gpu in gpus])
else:
    print("No GPU detected.")


2024-10-14 02:01:50.540660: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


GPUs Available: 1 - ['/physical_device:GPU:0']


2024-10-14 02:02:39.956966: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
2024-10-14 02:02:45.467838: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
2024-10-14 02:02:45.473300: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysf

In [3]:
import tensorflow as tf
import torch
from transformers import T5Tokenizer
import pandas as pd
import cv2
from google.cloud import storage
import matplotlib


In [4]:
import pandas as pd
from google.cloud import storage

# Set up the Google Cloud Storage client
client = storage.Client()
bucket = client.bucket('whathegifbucket')
blob = bucket.blob('tgif-v2.0.tsv')
blob.download_to_filename('tgif-v2.0.tsv')

# Load the dataset
df = pd.read_csv('tgif-v2.0.tsv', sep='\t')

# Show the first few rows of the dataset for verification
print(df.head())


   Unnamed: 0                                              links  \
0           1  https://38.media.tumblr.com/9f6c25cc350f12aa74...   
1           2  https://38.media.tumblr.com/9ead028ef62004ef6a...   
2           3  https://38.media.tumblr.com/9f43dc410be85b1159...   
3           4  https://38.media.tumblr.com/9f659499c8754e40cf...   
4           5  https://38.media.tumblr.com/9ed1c99afa7d714118...   

                                         description  
0  a man is glaring, and someone with sunglasses ...  
1           a cat tries to catch a mouse on a tablet  
2                   a man dressed in red is dancing.  
3     an animal comes close to another in the jungle  
4  a man in a hat adjusts his tie and makes a wei...  


In [5]:
# Select the first 100 GIFs for initial processing
sample_df = df.head(100)


In [6]:
import cv2
import numpy as np

# Example function to extract frames from a GIF URL and convert to a tensor
def url_to_tensor(url, num_frames=32, frame_size=(224, 224)):
    cap = cv2.VideoCapture(url)
    frames = []

    while True:
        ret, frame = cap.read()
        if not ret or len(frames) >= num_frames:
            break
        frame_resized = cv2.resize(frame, frame_size)
        frames.append(frame_resized)

    cap.release()
    
    # Pad with empty frames if fewer than num_frames extracted
    while len(frames) < num_frames:
        frames.append(np.zeros((frame_size[0], frame_size[1], 3), dtype=np.uint8))

    return np.array(frames, dtype=np.float32)

# Testing with one URL from the subset
url_example = sample_df['links'].iloc[0]
tensor_example = url_to_tensor(url_example)

# Print tensor shape to verify
print("Tensor shape:", tensor_example.shape)


Tensor shape: (32, 224, 224, 3)


In [7]:
import json
import os

# Create local directory to store temporary tensor files
os.makedirs('processed_data', exist_ok=True)

# Create a dictionary to store metadata
metadata = {}

# Process each GIF and save tensor
for idx, row in sample_df.iterrows():
    gif_id = row['Unnamed: 0']
    url = row['links']
    description = row['description']

    # Extract frames and convert to tensor
    tensor = url_to_tensor(url)

    # Save the tensor locally
    tensor_filename = f'processed_data/gif_{gif_id}_tensor.npy'
    np.save(tensor_filename, tensor)

    # Upload the tensor to GCS
    blob = bucket.blob(f'processed_data/gif_{gif_id}_tensor.npy')
    blob.upload_from_filename(tensor_filename)

    # Save metadata
    metadata[gif_id] = {
        'tensor_path': f'gs://whathegifbucket/processed_data/gif_{gif_id}_tensor.npy',
        'description': description,
        'url': url
    }

    # Print progress
    print(f'Processed and uploaded tensor for GIF ID: {gif_id}')

# Save metadata to JSON file
metadata_filename = 'processed_data/metadata.json'
with open(metadata_filename, 'w') as f:
    json.dump(metadata, f)

# Upload metadata JSON to GCS
blob = bucket.blob('processed_data/metadata.json')
blob.upload_from_filename(metadata_filename)

print("All tensors processed and metadata saved.")


Processed and uploaded tensor for GIF ID: 1
Processed and uploaded tensor for GIF ID: 2
Processed and uploaded tensor for GIF ID: 3
Processed and uploaded tensor for GIF ID: 4
Processed and uploaded tensor for GIF ID: 5
Processed and uploaded tensor for GIF ID: 6
Processed and uploaded tensor for GIF ID: 7
Processed and uploaded tensor for GIF ID: 8
Processed and uploaded tensor for GIF ID: 9
Processed and uploaded tensor for GIF ID: 10
Processed and uploaded tensor for GIF ID: 11
Processed and uploaded tensor for GIF ID: 12
Processed and uploaded tensor for GIF ID: 13
Processed and uploaded tensor for GIF ID: 14
Processed and uploaded tensor for GIF ID: 15
Processed and uploaded tensor for GIF ID: 16
Processed and uploaded tensor for GIF ID: 17
Processed and uploaded tensor for GIF ID: 18
Processed and uploaded tensor for GIF ID: 19
Processed and uploaded tensor for GIF ID: 20
Processed and uploaded tensor for GIF ID: 21
Processed and uploaded tensor for GIF ID: 22
Processed and uploa

In [8]:
# List files in the 'processed_data/' folder of the GCS bucket
blobs = bucket.list_blobs(prefix='processed_data/')
print("Files in 'processed_data/' folder:")
for blob in blobs:
    print(blob.name)


Files in 'processed_data/' folder:
processed_data/gif_100_tensor.npy
processed_data/gif_10_tensor.npy
processed_data/gif_11_tensor.npy
processed_data/gif_12_tensor.npy
processed_data/gif_13_tensor.npy
processed_data/gif_14_tensor.npy
processed_data/gif_15_tensor.npy
processed_data/gif_16_tensor.npy
processed_data/gif_17_tensor.npy
processed_data/gif_18_tensor.npy
processed_data/gif_19_tensor.npy
processed_data/gif_1_tensor.npy
processed_data/gif_20_tensor.npy
processed_data/gif_21_tensor.npy
processed_data/gif_22_tensor.npy
processed_data/gif_23_tensor.npy
processed_data/gif_24_tensor.npy
processed_data/gif_25_tensor.npy
processed_data/gif_26_tensor.npy
processed_data/gif_27_tensor.npy
processed_data/gif_28_tensor.npy
processed_data/gif_29_tensor.npy
processed_data/gif_2_tensor.npy
processed_data/gif_30_tensor.npy
processed_data/gif_31_tensor.npy
processed_data/gif_32_tensor.npy
processed_data/gif_33_tensor.npy
processed_data/gif_34_tensor.npy
processed_data/gif_35_tensor.npy
processed

In [12]:
# Load metadata from the local JSON file we just downloaded
with open('processed_data/metadata.json', 'r') as f:
    metadata = json.load(f)

# Display part of the metadata for verification
print("Metadata for the first 5 GIFs:")
for gif_id, gif_data in list(metadata.items())[:5]:
    print(f"GIF ID: {gif_id}, Data: {gif_data}")


Metadata for the first 5 GIFs:
GIF ID: 1, Data: {'tensor_path': 'gs://whathegifbucket/processed_data/gif_1_tensor.npy', 'description': 'a man is glaring, and someone with sunglasses appears.', 'url': 'https://38.media.tumblr.com/9f6c25cc350f12aa74a7dc386a5c4985/tumblr_mevmyaKtDf1rgvhr8o1_500.gif'}
GIF ID: 2, Data: {'tensor_path': 'gs://whathegifbucket/processed_data/gif_2_tensor.npy', 'description': 'a cat tries to catch a mouse on a tablet', 'url': 'https://38.media.tumblr.com/9ead028ef62004ef6ac2b92e52edd210/tumblr_nok4eeONTv1s2yegdo1_400.gif'}
GIF ID: 3, Data: {'tensor_path': 'gs://whathegifbucket/processed_data/gif_3_tensor.npy', 'description': 'a man dressed in red is dancing.', 'url': 'https://38.media.tumblr.com/9f43dc410be85b1159d1f42663d811d7/tumblr_mllh01J96X1s9npefo1_250.gif'}
GIF ID: 4, Data: {'tensor_path': 'gs://whathegifbucket/processed_data/gif_4_tensor.npy', 'description': 'an animal comes close to another in the jungle', 'url': 'https://38.media.tumblr.com/9f659499c87

In [29]:
metadata

{'1': {'tensor_path': 'gs://whathegifbucket/processed_data/gif_1_tensor.npy',
  'description': 'a man is glaring, and someone with sunglasses appears.',
  'url': 'https://38.media.tumblr.com/9f6c25cc350f12aa74a7dc386a5c4985/tumblr_mevmyaKtDf1rgvhr8o1_500.gif'},
 '2': {'tensor_path': 'gs://whathegifbucket/processed_data/gif_2_tensor.npy',
  'description': 'a cat tries to catch a mouse on a tablet',
  'url': 'https://38.media.tumblr.com/9ead028ef62004ef6ac2b92e52edd210/tumblr_nok4eeONTv1s2yegdo1_400.gif'},
 '3': {'tensor_path': 'gs://whathegifbucket/processed_data/gif_3_tensor.npy',
  'description': 'a man dressed in red is dancing.',
  'url': 'https://38.media.tumblr.com/9f43dc410be85b1159d1f42663d811d7/tumblr_mllh01J96X1s9npefo1_250.gif'},
 '4': {'tensor_path': 'gs://whathegifbucket/processed_data/gif_4_tensor.npy',
  'description': 'an animal comes close to another in the jungle',
  'url': 'https://38.media.tumblr.com/9f659499c8754e40cf3f7ac21d08dae6/tumblr_nqlr0rn8ox1r2r0koo1_400.gif

In [13]:
import tensorflow as tf

# Set up multi-GPU training strategy
strategy = tf.distribute.MirroredStrategy()

# Print the number of GPUs being used
print('Number of devices: {}'.format(strategy.num_replicas_in_sync))


2024-10-14 02:20:06.862742: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
2024-10-14 02:20:06.867710: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
2024-10-14 02:20:06.870747: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysf

INFO:tensorflow:Using MirroredStrategy with devices ('/job:localhost/replica:0/task:0/device:GPU:0',)
Number of devices: 1


# 3D CNN

In [14]:
from tensorflow.keras.layers import Input, TimeDistributed, GlobalAveragePooling3D
from tensorflow.keras.models import Model
from tensorflow.keras.applications import InceptionV3

with strategy.scope():
    # Input layer for video tensors
    video_input = Input(shape=(32, 224, 224, 3))  # (num_frames, height, width, channels)

    # Load the base pretrained model (InceptionV3 is used as a stand-in for I3D)
    base_model = InceptionV3(weights='imagenet', include_top=False, input_shape=(224, 224, 3))

    # Wrap the base model in TimeDistributed to apply it across frames
    x = TimeDistributed(base_model)(video_input)

    # Use Global Average Pooling to reduce the feature dimensions
    x = GlobalAveragePooling3D()(x)

    # Create the model
    model_3d_cnn = Model(inputs=video_input, outputs=x)

    # Print model summary to verify the structure
    model_3d_cnn.summary()


INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).
INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).
INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).
INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).
INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).
INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).
INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).
INFO:tensorflow:Redu

In [25]:
from transformers import GPT2Tokenizer, GPT2LMHeadModel

# Load the GPT-2 model and tokenizer
gpt2_tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
gpt2_model = GPT2LMHeadModel.from_pretrained("gpt2")

# Example function to generate a description from features
def generate_description_gpt2(features):
    input_prompt = "GIF description: "
    inputs = gpt2_tokenizer(input_prompt, return_tensors="pt")
    outputs = gpt2_model.generate(inputs.input_ids, max_length=50, num_beams=4, early_stopping=True)

    description = gpt2_tokenizer.decode(outputs[0], skip_special_tokens=True)
    return description




In [26]:
# Redefine the tensor for the example GIF
tensor_example = np.load('processed_data/gif_1_tensor.npy')  # Load tensor for GIF ID 1

# Predict features using the 3D CNN model
example_features = model_3d_cnn.predict(tf.convert_to_tensor([tensor_example]))

# Test GPT-2 for generating description based on the extracted features
example_description_gpt2 = generate_description_gpt2(example_features[0])
print("Generated Description (GPT-2):", example_description_gpt2)




2024-10-14 02:29:08.341867: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_10' with dtype float and shape [1,32,224,224,3]
	 [[{{node Placeholder/_10}}]]
2024-10-14 02:29:08.342283: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_10' with dtype float and shape [1,32,224,224,3]
	 [[{{node Placeholder/_10}}]]
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Generated Description (GPT-2): GIF description: __________________

I'm not sure if it's because I don't like it, or if it's because I don't like it at all. I'm not sure if it's because I don't like it,


In [30]:
import json
from google.cloud import storage

# Define the metadata dictionary (assuming it's already available)
metadata_file_path = 'processed_data/metadata.json'

# Load existing metadata
with open(metadata_file_path, 'r') as f:
    metadata = json.load(f)

# Extend the function to assign even more detailed classes based on the description
def assign_extended_class(description):
    description = description.lower()
    if 'cat' in description or 'dog' in description or 'tiger' in description or 'animal' in description or 'pet' in description:
        return 'animal_interaction'
    elif 'dancing' in description or 'dancer' in description or 'performing' in description or 'singing' in description or 'cheering' in description:
        return 'performance'
    elif 'hugging' in description or 'kissing' in description or 'love' in description or 'couple' in description or 'embrace' in description or 'affectionate' in description:
        return 'affection'
    elif 'fighting' in description or 'attacking' in description or 'destroy' in description or 'exploding' in description or 'violence' in description or 'shooting' in description or 'punching' in description:
        return 'violence'
    elif 'jumping' in description or 'acrobat' in description or 'biking' in description or 'skateboard' in description or 'running' in description or 'exercise' in description or 'sports' in description or 'gymnastics' in description:
        return 'sport'
    elif 'laughing' in description or 'smiling' in description or 'talking' in description or 'conversation' in description or 'winks' in description or 'speaking' in description or 'discussion' in description:
        return 'conversation'
    elif 'mirror' in description or 'reflection' in description or 'looking at self' in description:
        return 'reflection'
    elif 'vehicle' in description or 'car' in description or 'driving' in description or 'racing' in description or 'bike' in description or 'skateboard' in description or 'truck' in description or 'bus' in description:
        return 'vehicle_related'
    elif 'magic' in description or 'supernatural' in description or 'ghost' in description or 'floating' in description or 'wizard' in description or 'witch' in description or 'spells' in description:
        return 'supernatural'
    elif 'food' in description or 'eating' in description or 'drink' in description or 'coffee' in description or 'kitchen' in description or 'restaurant' in description:
        return 'food_related'
    elif 'performer' in description or 'stage' in description or 'concert' in description or 'band' in description or 'orchestra' in description or 'musician' in description:
        return 'stage_performance'
    elif 'water' in description or 'swimming' in description or 'pool' in description or 'sea' in description or 'ocean' in description or 'lake' in description or 'diving' in description:
        return 'water_related'
    elif 'baby' in description or 'child' in description or 'kid' in description or 'young boy' in description or 'young girl' in description or 'infant' in description or 'toddler' in description:
        return 'children_related'
    elif 'drunk' in description or 'intoxicated' in description or 'beer' in description or 'alcohol' in description or 'bar' in description:
        return 'intoxication'
    elif 'fire' in description or 'burning' in description or 'smoke' in description or 'flames' in description or 'explosion' in description:
        return 'fire_related'
    elif 'computer' in description or 'laptop' in description or 'tablet' in description or 'phone' in description or 'technology' in description or 'device' in description:
        return 'technology_related'
    elif 'crying' in description or 'sad' in description or 'tears' in description or 'emotional' in description or 'upset' in description:
        return 'emotional'
    elif 'celebration' in description or 'party' in description or 'birthday' in description or 'wedding' in description or 'festival' in description or 'cheering' in description:
        return 'celebration'
    elif 'snow' in description or 'ice' in description or 'skiing' in description or 'snowboarding' in description or 'winter' in description:
        return 'winter_related'
    elif 'sunset' in description or 'sunrise' in description or 'nature' in description or 'landscape' in description or 'forest' in description or 'mountain' in description or 'scenery' in description:
        return 'nature_related'
    else:
        return 'miscellaneous'

# Add the extended class labels to the metadata
for gif_id, gif_data in metadata.items():
    description = gif_data['description']
    gif_data['class'] = assign_extended_class(description)

# Save the updated metadata back to JSON
updated_metadata_file_path = 'processed_data/metadata_class.json'
with open(updated_metadata_file_path, 'w') as f:
    json.dump(metadata, f, indent=4)

print(f"Updated metadata has been saved to '{updated_metadata_file_path}' with extended class labels.")

# Upload the updated metadata to Google Cloud Storage
def upload_to_gcs(bucket_name, source_file_name, destination_blob_name):
    """Uploads a file to the bucket."""
    storage_client = storage.Client()
    bucket = storage_client.bucket(bucket_name)
    blob = bucket.blob(destination_blob_name)

    blob.upload_from_filename(source_file_name)
    print(f"File {source_file_name} uploaded to {destination_blob_name}.")

# Set bucket details
bucket_name = 'whathegifbucket'
destination_blob_name = 'processed_data/metadata_class.json'

# Upload the updated metadata file to GCS
upload_to_gcs(bucket_name, updated_metadata_file_path, destination_blob_name)

Updated metadata has been saved to 'processed_data/metadata_class.json' with extended class labels.
File processed_data/metadata_class.json uploaded to processed_data/metadata_class.json.


In [31]:
# Load metadata from the local JSON file we just downloaded
with open('processed_data/metadata_class.json', 'r') as f:
    metadata = json.load(f)

# Display part of the metadata for verification
print("Metadata Class for the first 5 GIFs:")
for gif_id, gif_data in list(metadata.items())[:5]:
    print(f"GIF ID: {gif_id}, Data: {gif_data}")

Metadata Class for the first 5 GIFs:
GIF ID: 1, Data: {'tensor_path': 'gs://whathegifbucket/processed_data/gif_1_tensor.npy', 'description': 'a man is glaring, and someone with sunglasses appears.', 'url': 'https://38.media.tumblr.com/9f6c25cc350f12aa74a7dc386a5c4985/tumblr_mevmyaKtDf1rgvhr8o1_500.gif', 'class': 'miscellaneous'}
GIF ID: 2, Data: {'tensor_path': 'gs://whathegifbucket/processed_data/gif_2_tensor.npy', 'description': 'a cat tries to catch a mouse on a tablet', 'url': 'https://38.media.tumblr.com/9ead028ef62004ef6ac2b92e52edd210/tumblr_nok4eeONTv1s2yegdo1_400.gif', 'class': 'animal_interaction'}
GIF ID: 3, Data: {'tensor_path': 'gs://whathegifbucket/processed_data/gif_3_tensor.npy', 'description': 'a man dressed in red is dancing.', 'url': 'https://38.media.tumblr.com/9f43dc410be85b1159d1f42663d811d7/tumblr_mllh01J96X1s9npefo1_250.gif', 'class': 'performance'}
GIF ID: 4, Data: {'tensor_path': 'gs://whathegifbucket/processed_data/gif_4_tensor.npy', 'description': 'an animal

In [36]:
metadata

{'1': {'tensor_path': 'gs://whathegifbucket/processed_data/gif_1_tensor.npy',
  'description': 'a man is glaring, and someone with sunglasses appears.',
  'url': 'https://38.media.tumblr.com/9f6c25cc350f12aa74a7dc386a5c4985/tumblr_mevmyaKtDf1rgvhr8o1_500.gif',
  'class': 'miscellaneous'},
 '2': {'tensor_path': 'gs://whathegifbucket/processed_data/gif_2_tensor.npy',
  'description': 'a cat tries to catch a mouse on a tablet',
  'url': 'https://38.media.tumblr.com/9ead028ef62004ef6ac2b92e52edd210/tumblr_nok4eeONTv1s2yegdo1_400.gif',
  'class': 'animal_interaction'},
 '3': {'tensor_path': 'gs://whathegifbucket/processed_data/gif_3_tensor.npy',
  'description': 'a man dressed in red is dancing.',
  'url': 'https://38.media.tumblr.com/9f43dc410be85b1159d1f42663d811d7/tumblr_mllh01J96X1s9npefo1_250.gif',
  'class': 'performance'},
 '4': {'tensor_path': 'gs://whathegifbucket/processed_data/gif_4_tensor.npy',
  'description': 'an animal comes close to another in the jungle',
  'url': 'https://

In [41]:
blob = bucket.blob('processed_data/gif_1_tensor.npy')


In [44]:
!pip install google-cloud-storage




In [45]:
from google.cloud import storage
import numpy as np
import io


In [46]:
# Initialize the client
client = storage.Client()


In [47]:
# Your GCS bucket name
bucket_name = 'whathegifbucket'  # Replace with your actual bucket name if different

# Path to your tensor file inside the bucket
blob_name = 'processed_data/gif_1_tensor.npy'

In [48]:
# Get the bucket
bucket = client.bucket(bucket_name)

# Get the blob (your tensor file)
blob = bucket.blob(blob_name)

# Download the blob's content as bytes
data = blob.download_as_bytes()

# Load the numpy array from bytes
sample_tensor = np.load(io.BytesIO(data))

# Print the shape of the sample tensor
print("Sample tensor shape:", sample_tensor.shape)


Sample tensor shape: (32, 224, 224, 3)


In [49]:
# Assuming 'metadata' is your dictionary containing all GIF data
classes = []

for gif_id, gif_data in metadata.items():
    classes.append(gif_data['class'])

# Get the unique classes
unique_classes = list(set(classes))
num_classes = len(unique_classes)

print("Unique classes:", unique_classes)
print("Number of classes:", num_classes)


Unique classes: ['miscellaneous', 'sport', 'vehicle_related', 'technology_related', 'reflection', 'fire_related', 'supernatural', 'violence', 'affection', 'food_related', 'stage_performance', 'water_related', 'animal_interaction', 'conversation', 'performance']
Number of classes: 15


In [50]:
num_classes = 15


In [54]:
tensors = []
labels = []


In [55]:
import tensorflow as tf
import numpy as np
import cv2

def load_tensor_from_gcs(gcs_path, target_frames=32, target_size=(224, 224)):
    # Load the tensor from GCS
    with tf.io.gfile.GFile(gcs_path, 'rb') as f:
        tensor = np.load(f)
    
    # Ensure tensor has shape (frames, height, width, channels)
    if tensor.ndim == 4:
        # Check if channels are in the last dimension
        if tensor.shape[-1] != 3:
            # Adjust if channels are not in the last dimension
            tensor = np.transpose(tensor, (0, 2, 3, 1))
    else:
        print(f"Unexpected tensor shape: {tensor.shape}")
        return None

    # Adjust the number of frames
    total_frames = tensor.shape[0]
    if total_frames > target_frames:
        # Sample frames uniformly
        indices = np.linspace(0, total_frames - 1, target_frames).astype(int)
        tensor = tensor[indices]
    elif total_frames < target_frames:
        # Repeat last frame to reach the target number of frames
        last_frame = tensor[-1]
        padding = np.repeat(last_frame[np.newaxis, ...], target_frames - total_frames, axis=0)
        tensor = np.concatenate((tensor, padding), axis=0)

    # Resize frames to the target size
    resized_frames = []
    for frame in tensor:
        resized_frame = cv2.resize(frame, target_size)
        resized_frames.append(resized_frame)
    tensor = np.array(resized_frames)

    # Normalize pixel values to [0, 1]
    tensor = tensor.astype(np.float32) / 255.0

    return tensor


In [56]:
print("Loading tensors and labels...")
for gif_id, gif_data in metadata.items():
    tensor_gcs_path = gif_data['tensor_path']
    label = gif_data['class']
    
    # Load the tensor
    try:
        tensor = load_tensor_from_gcs(tensor_gcs_path)
        if tensor is not None:
            tensors.append(tensor)
            labels.append(label)
        else:
            print(f"Skipping GIF ID {gif_id} due to unexpected tensor shape.")
    except Exception as e:
        print(f"Error loading tensor for GIF ID {gif_id}: {e}")
        continue
print("Finished loading tensors and labels.")


Loading tensors and labels...
Finished loading tensors and labels.


In [57]:
X = np.array(tensors)
y = np.array(labels)


In [58]:
print("Shape of X:", X.shape)
print("Shape of y:", y.shape)


Shape of X: (100, 32, 224, 224, 3)
Shape of y: (100,)


The shapes of the data look correct.

Shape of X: (100, 32, 224, 224, 3)
100 samples.
Each sample has 32 frames.
Each frame is of size 224x224 pixels with 3 color channels (RGB).
Shape of y: (100,)
100 labels corresponding to the samples.

In [61]:
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.utils import to_categorical

# Encode labels
label_encoder = LabelEncoder()
integer_encoded = label_encoder.fit_transform(y)
num_classes = len(label_encoder.classes_)
print("Number of classes:", num_classes)
print("Classes:", label_encoder.classes_)

# Convert labels to one-hot encoding
y_encoded = to_categorical(integer_encoded, num_classes=num_classes)


Number of classes: 15
Classes: ['affection' 'animal_interaction' 'conversation' 'fire_related'
 'food_related' 'miscellaneous' 'performance' 'reflection' 'sport'
 'stage_performance' 'supernatural' 'technology_related' 'vehicle_related'
 'violence' 'water_related']


# Data Split

In [62]:
from sklearn.model_selection import train_test_split

# Split the data
X_train, X_val, y_train, y_val = train_test_split(
    X, y_encoded, test_size=0.2, random_state=42, stratify=integer_encoded
)


ValueError: The least populated class in y has only 1 member, which is too few. The minimum number of groups for any class cannot be less than 2.

In [63]:
import numpy as np
import pandas as pd

# Get the integer-encoded labels
integer_encoded = label_encoder.transform(y)

# Create a DataFrame for easier analysis
label_df = pd.DataFrame({'label': y, 'encoded_label': integer_encoded})

# Count the number of samples per class
class_counts = label_df['label'].value_counts()

print("Class distribution:")
print(class_counts)


Class distribution:
label
miscellaneous         43
conversation          11
sport                  8
animal_interaction     7
performance            7
affection              6
vehicle_related        3
food_related           3
technology_related     3
violence               2
reflection             2
fire_related           2
supernatural           1
water_related          1
stage_performance      1
Name: count, dtype: int64


In [64]:
from sklearn.model_selection import train_test_split

X_train, X_val, y_train, y_val = train_test_split(
    X, y_encoded, test_size=0.2, random_state=42
)


In [65]:
# For training set
train_classes = np.argmax(y_train, axis=1)
unique, counts = np.unique(train_classes, return_counts=True)
print("Training set class distribution:", dict(zip(unique, counts)))

# For validation set
val_classes = np.argmax(y_val, axis=1)
unique, counts = np.unique(val_classes, return_counts=True)
print("Validation set class distribution:", dict(zip(unique, counts)))


Training set class distribution: {0: 5, 1: 6, 2: 10, 3: 2, 4: 3, 5: 32, 6: 6, 7: 1, 8: 6, 9: 1, 11: 2, 12: 3, 13: 2, 14: 1}
Validation set class distribution: {0: 1, 1: 1, 2: 1, 5: 11, 6: 1, 7: 1, 8: 2, 10: 1, 11: 1}


In [66]:
input_shape = X_train.shape[1:]  # Exclude batch size
print("Input shape:", input_shape)


Input shape: (32, 224, 224, 3)


# Build Custom 3D CNN

In [67]:
num_classes = 15  # Update this if the actual number of classes is different


In [68]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv3D, MaxPooling3D, Flatten, Dense, Dropout, BatchNormalization

def build_3d_cnn_model(input_shape, num_classes):
    model = Sequential()
    
    # First Conv3D layer
    model.add(Conv3D(filters=32, kernel_size=(3, 3, 3), activation='relu', input_shape=input_shape))
    model.add(BatchNormalization())
    model.add(MaxPooling3D(pool_size=(1, 2, 2)))  # Reduces height and width, keeps time dimension

    # Second Conv3D layer
    model.add(Conv3D(filters=64, kernel_size=(3, 3, 3), activation='relu'))
    model.add(BatchNormalization())
    model.add(MaxPooling3D(pool_size=(2, 2, 2)))  # Reduces time, height, and width

    # Third Conv3D layer
    model.add(Conv3D(filters=128, kernel_size=(3, 3, 3), activation='relu'))
    model.add(BatchNormalization())
    model.add(MaxPooling3D(pool_size=(2, 2, 2)))

    # Flatten and Dense layers
    model.add(Flatten())
    model.add(Dense(units=256, activation='relu'))
    model.add(Dropout(rate=0.5))
    model.add(Dense(units=num_classes, activation='softmax'))

    return model

# Build the model
model = build_3d_cnn_model(input_shape, num_classes)


In [69]:
model

<keras.engine.sequential.Sequential at 0x7ff5eb5a4460>

In [70]:
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Display the model's architecture
model.summary()


Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv3d (Conv3D)             (None, 30, 222, 222, 32)  2624      
                                                                 
 batch_normalization_94 (Bat  (None, 30, 222, 222, 32)  128      
 chNormalization)                                                
                                                                 
 max_pooling3d (MaxPooling3D  (None, 30, 111, 111, 32)  0        
 )                                                               
                                                                 
 conv3d_1 (Conv3D)           (None, 28, 109, 109, 64)  55360     
                                                                 
 batch_normalization_95 (Bat  (None, 28, 109, 109, 64)  256      
 chNormalization)                                                
                                                        

# Train Model

In [71]:
from sklearn.utils import class_weight
import numpy as np

# Get the integer labels for the training data
train_classes = np.argmax(y_train, axis=1)

# Compute class weights
class_weights = class_weight.compute_class_weight(
    class_weight='balanced',
    classes=np.unique(train_classes),
    y=train_classes
)

# Create a dictionary mapping class indices to weights
class_weights_dict = dict(enumerate(class_weights))

print("Class weights:", class_weights_dict)


Class weights: {0: 1.1428571428571428, 1: 0.9523809523809523, 2: 0.5714285714285714, 3: 2.857142857142857, 4: 1.9047619047619047, 5: 0.17857142857142858, 6: 0.9523809523809523, 7: 5.714285714285714, 8: 0.9523809523809523, 9: 5.714285714285714, 10: 2.857142857142857, 11: 1.9047619047619047, 12: 2.857142857142857, 13: 5.714285714285714}


In [73]:
num_classes = 15  # Since labels range from 0 to 14
print("Number of classes:", num_classes)


Number of classes: 15


In [78]:
from sklearn.model_selection import train_test_split

# Split the data without stratification
X_train, X_val, y_train, y_val = train_test_split(
    X, y_encoded, test_size=0.2, random_state=42
)


In [79]:
import numpy as np

# For training set
train_classes = np.argmax(y_train, axis=1)
unique_train_classes, train_counts = np.unique(train_classes, return_counts=True)
print("Training set class distribution:", dict(zip(unique_train_classes, train_counts)))

# For validation set
val_classes = np.argmax(y_val, axis=1)
unique_val_classes, val_counts = np.unique(val_classes, return_counts=True)
print("Validation set class distribution:", dict(zip(unique_val_classes, val_counts)))


Training set class distribution: {0: 5, 1: 6, 2: 10, 3: 2, 4: 3, 5: 32, 6: 6, 7: 1, 8: 6, 9: 1, 11: 2, 12: 3, 13: 2, 14: 1}
Validation set class distribution: {0: 1, 1: 1, 2: 1, 5: 11, 6: 1, 7: 1, 8: 2, 10: 1, 11: 1}


In [80]:
missing_classes_in_val = set(np.unique(train_classes)) - set(np.unique(val_classes))
print("Classes present in training but missing in validation:", missing_classes_in_val)


Classes present in training but missing in validation: {3, 4, 9, 12, 13, 14}


In [81]:
import numpy as np

# Convert one-hot encoded labels back to integer labels
integer_encoded = np.argmax(y_encoded, axis=1)

# Create a dictionary mapping class labels to indices
class_indices = {}
for idx, label in enumerate(integer_encoded):
    if label in class_indices:
        class_indices[label].append(idx)
    else:
        class_indices[label] = [idx]

# Print the number of samples per class
for label, indices in class_indices.items():
    print(f"Class {label}: {len(indices)} samples")



Class 5: 43 samples
Class 1: 7 samples
Class 6: 7 samples
Class 8: 8 samples
Class 2: 11 samples
Class 10: 1 samples
Class 12: 3 samples
Class 0: 6 samples
Class 4: 3 samples
Class 13: 2 samples
Class 11: 3 samples
Class 7: 2 samples
Class 3: 2 samples
Class 14: 1 samples
Class 9: 1 samples


Step 2: Split Each Class Individually
We'll split the indices of each class into training and validation sets, ensuring at least one sample per class in each set.

In [72]:
model.fit(
    X_train, y_train,
    validation_data=(X_val, y_val),
    epochs=10,        # Adjust as needed
    batch_size=4,     # Adjust based on your system's memory
    class_weight=class_weights_dict,
    verbose=1
)


2024-10-14 03:28:51.753544: W tensorflow/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 1541406720 exceeds 10% of free system memory.
2024-10-14 03:28:52.758128: W tensorflow/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 1541406720 exceeds 10% of free system memory.


Epoch 1/10


2024-10-14 03:29:01.147096: I tensorflow/compiler/xla/service/service.cc:169] XLA service 0x7ff5e7bbdc40 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
2024-10-14 03:29:01.147159: I tensorflow/compiler/xla/service/service.cc:177]   StreamExecutor device (0): Tesla T4, Compute Capability 7.5
2024-10-14 03:29:03.057041: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:269] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.
2024-10-14 03:29:09.515290: I ./tensorflow/compiler/jit/device_compiler.h:180] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.




2024-10-14 03:29:20.258465: I tensorflow/core/common_runtime/executor.cc:1197] [/job:localhost/replica:0/task:0/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: indices[1] = 14 is not in [0, 14)
	 [[{{node GatherV2}}]]
2024-10-14 03:29:20.259672: I tensorflow/core/common_runtime/executor.cc:1197] [/job:localhost/replica:0/task:0/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: indices[1] = 14 is not in [0, 14)
	 [[{{node GatherV2}}]]
	 [[IteratorGetNext]]
2024-10-14 03:29:20.259777: I tensorflow/core/common_runtime/executor.cc:1197] [/job:localhost/replica:0/task:0/device:GPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: indices[1] = 14 is not in [0, 14)
	 [[{{node GatherV2}}]]
	 [[IteratorGetNext]]
	 [[IteratorGetNext/_6]]


InvalidArgumentError: Graph execution error:

2 root error(s) found.
  (0) INVALID_ARGUMENT:  indices[1] = 14 is not in [0, 14)
	 [[{{node GatherV2}}]]
	 [[IteratorGetNext]]
	 [[IteratorGetNext/_6]]
  (1) INVALID_ARGUMENT:  indices[1] = 14 is not in [0, 14)
	 [[{{node GatherV2}}]]
	 [[IteratorGetNext]]
0 successful operations.
0 derived errors ignored. [Op:__inference_train_function_19137]