In [6]:
from keras.layers import TextVectorization 
from tensorflow.data import Dataset, AUTOTUNE  
from keras.applications import InceptionResNetV2
from tensorflow.keras.backend import clear_session
from pickle import dump
from numpy.random import choice 

from functions.dataset_loading import load_flicker8k_captions, load_coco_captions
from functions.text_processing import custom_standardization, vectorize_captions, create_vocab_mappings
from functions.dataset_loading import create_embedding_matrix
from functions.image_processing import batch_extract_features
from functions.model_evaluation import decode_caption

from keras.mixed_precision import set_global_policy
set_global_policy("mixed_float16")

In [2]:
SEQ_LENGTH = 20
EMBED_DIM = 300
MAX_TOKENS = 4096
FLICKER = 'preprocessed_data/flicker8k/'
COCO = 'preprocessed_data/coco/'

In [3]:
flicker_caption_map, flicker_total_captions = load_flicker8k_captions() 
flicker_images = list(flicker_caption_map.keys())

print('Total flicker images: ', len(flicker_images))
print('Total flicker captions:', len(flicker_total_captions))

Total flicker images:  8091
Total flicker captions: 40455


In [4]:
coco_train_caption_map, coco_val_caption_map, coco_total_captions = load_coco_captions() 

coco_train_images = list(coco_train_caption_map.keys())
coco_val_images = list(coco_val_caption_map.keys())

print('Training COCO images: ', len(coco_train_images))
print('Validation COCO images: ', len(coco_val_images))
print('Total COCO captions: ', len(coco_total_captions))

Training COCO images:  82783
Validation COCO images:  40504
Total COCO captions:  616767


# Extracting Features

In [5]:
encoder = InceptionResNetV2(weights= 'imagenet', include_top= False, pooling = 'avg', input_shape=(224,224,3))
encoder.trainable = False

In [6]:
# Extracting features from Flicker8K images and writing to file 
flicker_feature_map = batch_extract_features(flicker_images, 'datasets/flicker8k/Flicker8k_images', encoder)

with open(FLICKER + 'avg_feature_map.pkl', 'wb') as file: 
    dump(flicker_feature_map, file)
del flicker_feature_map



In [7]:
# Extracting features from COCO training images and writing to file 
coco_train_feature_map = batch_extract_features(coco_train_images, 'datasets/coco/train2014/', encoder)

with open(COCO + 'train_avg_feature_map.pkl', 'wb') as file: 
    dump(coco_train_feature_map, file)
    
del coco_train_feature_map



In [8]:
# Extracting features from COCO validation images and writing to file 
coco_val_feature_map = batch_extract_features(coco_val_images, 'datasets/coco/val2014/', encoder)

with open(COCO + 'val_avg_feature_map.pkl', 'wb') as file: 
    dump(coco_val_feature_map, file)
    
del coco_val_feature_map



In [None]:
clear_session()

encoder_2 = InceptionResNetV2(weights= 'imagenet', include_top= False, input_shape=(224,224,3))
encoder_2.trainable = False
encoder_2.output

In [9]:
# Extracting features from Flicker8K images and writing to file 
flicker_feature_map = batch_extract_features(flicker_images, 'datasets/flicker8k/Flicker8k_images', encoder_2)

with open(FLICKER + 'feature_map.pkl', 'wb') as file: 
    dump(flicker_feature_map, file)
del flicker_feature_map




In [10]:
# Extracting features from COCO training images and writing to file 
coco_train_feature_map = batch_extract_features(coco_train_images, 'datasets/coco/train2014/', encoder_2)

with open(COCO + 'train_feature_map.pkl', 'wb') as file: 
    dump(coco_train_feature_map, file)
    
del coco_train_feature_map



In [11]:
# Extracting features from COCO validation images and writing to file 
coco_val_feature_map = batch_extract_features(coco_val_images, 'datasets/coco/val2014/', encoder_2)

with open(COCO + 'val_feature_map.pkl', 'wb') as file: 
    dump(coco_val_feature_map, file)
    
del coco_val_feature_map



# Text Vectorization

In [8]:
text_vectorizer = TextVectorization(
    max_tokens=MAX_TOKENS,
    output_mode='int',  
    output_sequence_length=SEQ_LENGTH + 1,  
    standardize=custom_standardization,  
)

total_captions = flicker_total_captions + coco_total_captions

captions_dataset = Dataset.from_tensor_slices(total_captions)
captions_dataset = captions_dataset.batch(2048).prefetch(AUTOTUNE)

text_vectorizer.adapt(captions_dataset)

In [9]:
vocab = text_vectorizer.get_vocabulary()

with open('preprocessed_data/vocab.pkl', 'wb') as file: 
    dump(vocab, file)
    
print('Vocab size: ', len(vocab))

word_to_idx, idx_to_word = create_vocab_mappings(vocab)

embedding_matrix = create_embedding_matrix(EMBED_DIM, len(vocab), word_to_idx)

with open('preprocessed_data/embedding_matrix.pkl', 'wb') as file: 
    dump(embedding_matrix, file)

Vocab size:  4096
Found 400000 word vectors
Converted 4047 words (49 misses)


In [10]:
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go
from sklearn.decomposition import PCA
from sklearn.manifold import TSNE

# Assuming vectorized_caption_map is your data
# First, let's create a function to extract and prepare the data

def analyze_vectors(vectorized_caption_map):
    # Stack all vectors into a single array
    all_vectors = np.vstack(list(vectorized_caption_map.values()))
    return all_vectors

def plot_vector_analysis(vectorized_caption_map):
    all_vectors = analyze_vectors(vectorized_caption_map)
    
    # Create a figure with multiple subplots
    plt.figure(figsize=(20, 15))
    
    # 1. Vector Magnitude Distribution
    plt.subplot(2, 2, 1)
    magnitudes = np.linalg.norm(all_vectors, axis=1)
    sns.histplot(magnitudes)
    plt.title('Distribution of Vector Magnitudes')
    plt.xlabel('Magnitude')
    plt.ylabel('Count')
    
    # 2. Dimension-wise Statistics
    plt.subplot(2, 2, 2)
    mean_values = np.mean(all_vectors, axis=0)
    plt.plot(mean_values)
    plt.title('Mean Values Across Dimensions')
    plt.xlabel('Dimension')
    plt.ylabel('Mean Value')
    
    # 3. Dimension-wise Variance
    plt.subplot(2, 2, 3)
    variances = np.var(all_vectors, axis=0)
    plt.bar(range(len(variances)), variances)
    plt.title('Variance Across Dimensions')
    plt.xlabel('Dimension')
    plt.ylabel('Variance')
    
    # 4. PCA Visualization
    pca = PCA(n_components=2)
    pca_result = pca.fit_transform(all_vectors)
    plt.subplot(2, 2, 4)
    plt.scatter(pca_result[:, 0], pca_result[:, 1], alpha=0.5)
    plt.title('PCA Visualization (2D)')
    plt.xlabel('First Principal Component')
    plt.ylabel('Second Principal Component')
    
    plt.tight_layout()
    plt.show()

# Interactive visualizations with Plotly
def create_interactive_plots(vectorized_caption_map):
    all_vectors = analyze_vectors(vectorized_caption_map)
    
    # t-SNE visualization
    tsne = TSNE(n_components=2, random_state=42)
    tsne_results = tsne.fit_transform(all_vectors)
    
    fig = go.Figure()
    fig.add_trace(go.Scatter(
        x=tsne_results[:, 0],
        y=tsne_results[:, 1],
        mode='markers',
        marker=dict(
            size=8,
            color=np.linalg.norm(all_vectors, axis=1),
            colorscale='Viridis',
            showscale=True
        ),
        text=[f"Vector {i}" for i in range(len(all_vectors))]
    ))
    
    fig.update_layout(
        title='t-SNE Visualization of Vectors',
        width=800,
        height=600
    )
    fig.show()

# Usage example:
# plot_vector_analysis(your_vectorized_caption_map)
# create_interactive_plots(your_vectorized_caption_map)

# Heatmap of vector correlations
def plot_vector_correlations(vectorized_caption_map):
    all_vectors = analyze_vectors(vectorized_caption_map)
    corr_matrix = np.corrcoef(all_vectors.T)
    
    plt.figure(figsize=(12, 10))
    sns.heatmap(corr_matrix, cmap='coolwarm', center=0)
    plt.title('Vector Dimension Correlations')
    plt.show()

# Plot sequence lengths if available
def plot_sequence_lengths(caption_map):
    lengths = [len(caption) for captions in caption_map.values() for caption in captions]
    
    plt.figure(figsize=(10, 6))
    sns.histplot(lengths, bins=30)
    plt.title('Distribution of Caption Lengths')
    plt.xlabel('Length')
    plt.ylabel('Count')
    plt.show()

# Vectorizing captions

In [11]:
test_image = choice(flicker_images)
test_caption = flicker_caption_map[test_image][0]
print('Original caption: ', test_caption)

flicker_caption_map = vectorize_captions(flicker_caption_map, text_vectorizer)

test_caption = flicker_caption_map[test_image][0]
print('Vectorized caption: ', test_caption)

print('Decoded caption: ', decode_caption(test_caption, idx_to_word))

with open(FLICKER + 'caption_map.pkl', 'wb') as file: 
    dump(flicker_caption_map, file)

# # Basic static visualizations
# plot_vector_analysis(flicker_caption_map)

del flicker_caption_map

Original caption:  A man sits on a step with two duffel bags and a plastic bag by his sides .
Vectorized caption:  [   3    2   12   98    5    2 1804    9   15    1  642   10    2  493
  392   51   39  922    4    0    0]
Decoded caption:  ['a', 'man', 'sits', 'on', 'a', 'step', 'with', 'two', '[UNK]', 'bags', 'and', 'a', 'plastic', 'bag', 'by', 'his', 'sides']


In [12]:
# # Basic static visualizations
# plot_vector_analysis(flicker_caption_map)

In [13]:
coco_train_caption_map = vectorize_captions(coco_train_caption_map, text_vectorizer) 

print('Finished vectorizing coco training captions')

with open(COCO + 'train_caption_map.pkl', 'wb') as file: 
    dump(coco_train_caption_map, file)

del coco_train_caption_map

print('Completed writting vectorized training captions to file')
    
coco_val_caption_map = vectorize_captions(coco_val_caption_map, text_vectorizer)

print('Finished vectorizing coco validation captions')

with open(COCO + 'val_caption_map.pkl', 'wb') as file: 
    dump(coco_val_caption_map, file)
    
del coco_val_caption_map

print('Completed writting vectorized validation captions to file')


Finished vectorizing coco training captions
Completed writting vectorized training captions to file
Finished vectorizing coco validation captions
Completed writting vectorized validation captions to file
