# Animal classes :

## Importing modules :

In [1]:
import os
import pickle
import numpy as np
from tqdm.notebook import tqdm

from tensorflow.keras.applications.vgg16 import VGG16, preprocess_input
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from tensorflow.keras.models import Model
from tensorflow.keras.utils import to_categorical, plot_model
from tensorflow.keras.layers import Input, Dense, Dropout

In [2]:
BASE_DIR = '/kaggle/input/'
WORKING_DIR = '/kaggle/working'

## Extracting image features (visual feature vectors) :

In [3]:
model = VGG16(weights="imagenet", include_top=True)
model = Model(inputs=model.inputs, outputs=model.layers[-2].output)
model.summary()

2022-05-20 15:03:56.083366: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-05-20 15:03:56.231083: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-05-20 15:03:56.232129: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-05-20 15:03:56.233681: I tensorflow/core/platform/cpu_feature_guard.cc:142] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX512F FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compil

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/vgg16/vgg16_weights_tf_dim_ordering_tf_kernels.h5
Model: "model"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         [(None, 224, 224, 3)]     0         
_________________________________________________________________
block1_conv1 (Conv2D)        (None, 224, 224, 64)      1792      
_________________________________________________________________
block1_conv2 (Conv2D)        (None, 224, 224, 64)      36928     
_________________________________________________________________
block1_pool (MaxPooling2D)   (None, 112, 112, 64)      0         
_________________________________________________________________
block2_conv1 (Conv2D)        (None, 112, 112, 128)     73856     
_________________________________________________________________
block2_conv2 (Conv2D)        (None, 112, 112, 128)     14758

## Extracting features :

In [None]:
features = dict()
directory = os.path.join(BASE_DIR, 'raw-img')

for img_name


In [4]:
import pandas as pd

animal_embedding = pd.read_csv(os.path.join(BASE_DIR, 'animals-embedding/animal_embedding.csv'))
animal_embedding.head(10)

Unnamed: 0,dog,1.2658627033233643,-4.176978588104248,-4.9388017654418945,-2.2473573684692383,-2.843980550765991,1.946314811706543,0.11756449937820435,0.2842515707015991,2.7562973499298096,...,-1.9296479225158691,4.560481071472168,-3.062861204147339,2.8291776180267334,-2.377556324005127,2.325477123260498,-2.9993598461151123,-2.520197629928589,-3.83392333984375,3.496582508087158
0,cat,-0.381995,-1.766797,-4.787522,-2.558725,-1.339253,0.59239,1.112785,-0.448487,2.54552,...,0.419664,2.502401,-5.534971,3.39323,-2.916208,0.997427,0.015585,-3.182588,-5.565441,4.002025
1,horse,-0.310629,-1.608654,-6.294101,-1.746291,0.241648,1.082419,1.045648,0.58633,1.99226,...,-1.74144,4.98984,-3.777325,1.467986,-1.259178,0.1908,-1.146512,0.782675,-7.548736,4.409935
2,spider,-0.153984,-0.921221,-2.211644,-1.882236,-0.366823,1.705931,-1.306779,-0.889177,2.667622,...,-2.327637,2.149812,-0.65622,1.882225,-1.771466,0.120196,4.925205,-0.714396,-8.256945,2.651939
3,butterfly,-1.743653,-1.830749,-1.705997,-2.083525,1.55409,0.162393,-2.695845,0.061042,6.692966,...,-1.987755,3.786486,-2.81995,2.089549,-1.229188,2.952096,3.885926,-2.103109,-6.863734,3.08154
4,chicken,0.905259,-1.523104,-3.55002,-1.398244,0.582835,0.300335,-1.103551,1.649764,2.314934,...,-1.755018,2.376158,-2.386566,1.619205,-1.167002,-0.535578,0.524562,-4.101033,-5.639382,3.184503
5,sheep,3.230775,-1.12327,-5.874129,-1.157936,-0.381808,0.167193,0.865875,-0.177649,1.986301,...,-0.828826,2.623193,-2.910724,2.78048,-2.682538,-1.863513,0.674739,-2.314202,-5.71128,3.753953
6,cow,-1.415986,-2.460523,-5.304926,-3.270924,-0.951144,0.772819,-2.170794,2.144072,3.37339,...,-0.273733,1.311115,-3.833792,2.949827,-1.561491,2.246352,-1.094555,-2.902785,-6.87894,2.775446
7,squirrel,-0.798317,-1.077563,-3.863101,-1.820429,-2.552133,1.427552,-0.508762,0.314221,1.89527,...,0.155655,1.989301,-3.232816,2.229646,-0.935127,1.374805,0.034162,-1.591616,-5.631794,2.468991
8,elephant,-2.977109,-1.60553,-6.026855,-2.209567,-0.086298,-0.136468,1.158288,1.916751,3.337121,...,-1.640409,1.578179,-4.394119,2.875115,-2.020316,3.60597,-0.788909,-1.108602,-4.782419,3.162038


In [5]:
animal_embedding.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 9 entries, 0 to 8
Columns: 1025 entries, dog to 3.496582508087158
dtypes: float64(1024), object(1)
memory usage: 72.2+ KB


## Generate embedding for each class :

In [6]:
def clean_embeddings(embeddings):
    info = dict()
    n = embeddings.shape[0]
    info[embeddings.columns[0]] = list(embeddings.columns[1:])
    for i in range(n):
        name = embeddings.iloc[i, :][0]
        vectors = list(embeddings.iloc[i, :][1:])
        info[name] = vectors
    
    return info

In [7]:
info = clean_embeddings(animal_embedding)

In [8]:
features = dict()
img_emb = dict()
classes = ["butterfly","cat","chicken","cow","dog","elephant","horse","sheep","spider","squirrel"]
translate = {"cane": "dog", "cavallo": "horse", "elefante": "elephant", 
             "farfalla": "butterfly", "gallina": "chicken", "gatto": "cat", 
             "mucca": "cow", "pecora": "sheep", "scoiattolo": "squirrel", 
             "dog": "cane", "cavallo": "horse", "elephant" : "elefante", 
             "butterfly": "farfalla", "chicken": "gallina", "cat": "gatto", 
             "cow": "mucca", "spider": "ragno", "squirrel": "scoiattolo",
             "horse":"cavallo","sheep":"pecora"}


for animal in classes:
    transl_animal = translate[animal]
    images = BASE_DIR+'animals10/raw-img'
    image_dir = os.path.join(images, transl_animal)
    for dirname, _, filenames in os.walk(image_dir):  
        for file in tqdm(filenames):
            img_path = os.path.join(dirname, file)
            image = load_img(img_path, target_size=(224, 224))
            image = img_to_array(image)
            #reshape
            image = image.reshape((1, image.shape[0], image.shape[1], image.shape[2]))
            image = preprocess_input(image)
            #extract features :
            feature = model.predict(image, verbose=0)
            #image_id :
            image_id = file.split('.')[0]
            #store feature
            features[image_id] = feature
            img_emb[image_id] = info[animal]


  0%|          | 0/2112 [00:00<?, ?it/s]

2022-05-20 15:04:21.602939: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:185] None of the MLIR Optimization Passes are enabled (registered 2)
2022-05-20 15:04:22.551081: I tensorflow/stream_executor/cuda/cuda_dnn.cc:369] Loaded cuDNN version 8005


  0%|          | 0/1668 [00:00<?, ?it/s]

  0%|          | 0/3098 [00:00<?, ?it/s]

  0%|          | 0/1866 [00:00<?, ?it/s]

  0%|          | 0/4863 [00:00<?, ?it/s]

  0%|          | 0/1446 [00:00<?, ?it/s]

  0%|          | 0/2623 [00:00<?, ?it/s]

  0%|          | 0/1820 [00:00<?, ?it/s]

  0%|          | 0/4821 [00:00<?, ?it/s]

  0%|          | 0/1862 [00:00<?, ?it/s]

## Storing the visual and textual features :

In [15]:
pickle.dump(features, open(os.path.join(WORKING_DIR, 'visual_features.pkl'), 'wb'))
pickle.dump(img_emb, open(os.path.join(WORKING_DIR, 'textual_features.pkl'), 'wb'))

## Loading the features :

In [16]:
with open(os.path.join(WORKING_DIR, 'textual_features.pkl'), 'rb') as f:
    txt_features = pickle.load(f)

with open(os.path.join(WORKING_DIR, 'visual_features.pkl'), 'rb') as f:
    vs_features = pickle.load(f)

In [22]:
txt_features['OIF-e2bexWrojgtQnAPPcUfOWQ'] = np.array(txt_features['OIF-e2bexWrojgtQnAPPcUfOWQ'])

In [23]:
vs_features['OIF-e2bexWrojgtQnAPPcUfOWQ'].shape

(1, 4096)

In [26]:
#txt_features['OIF-e2bexWrojgtQnAPPcUfOWQ'].shape

In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session