# Animal classes :

## Importing modules :

In [None]:
import os
import pickle
import numpy as np
from tqdm.notebook import tqdm

from tensorflow.keras.applications.vgg16 import VGG16, preprocess_input
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from tensorflow.keras.models import Model
from tensorflow.keras.utils import to_categorical, plot_model
from tensorflow.keras.layers import Input, Dense, Dropout

In [None]:
BASE_DIR = '/kaggle/input/'
WORKING_DIR = '/kaggle/working'

## Extracting image features (visual feature vectors) :

In [None]:
model = VGG16(weights="imagenet", include_top=True)
model = Model(inputs=model.inputs, outputs=model.layers[-2].output)
model.summary()

## Extracting features :
### Textual feature vectors :

In [None]:
import pandas as pd

animal_embedding = pd.read_csv(os.path.join(BASE_DIR, 'animals-embedding/animal_embedding.csv'))
animal_embedding.head(10)

In [None]:
animal_embedding.info()

## Generate embedding for each class :

In [None]:
def clean_embeddings(embeddings):
    info = dict()
    n = embeddings.shape[0]
    info[embeddings.columns[0]] = list(embeddings.columns[1:])
    for i in range(n):
        name = embeddings.iloc[i, :][0]
        vectors = list(embeddings.iloc[i, :][1:])
        info[name] = vectors
    
    return info

In [None]:
info = clean_embeddings(animal_embedding)

## Store visual and textual vectors in dictionaries with image ids:

In [None]:
features = dict()
img_emb = dict()
classes = ["butterfly","cat","chicken","cow","dog","elephant","horse","sheep","spider","squirrel"]
translate = {"cane": "dog", "cavallo": "horse", "elefante": "elephant", 
             "farfalla": "butterfly", "gallina": "chicken", "gatto": "cat", 
             "mucca": "cow", "pecora": "sheep", "scoiattolo": "squirrel", 
             "dog": "cane", "cavallo": "horse", "elephant" : "elefante", 
             "butterfly": "farfalla", "chicken": "gallina", "cat": "gatto", 
             "cow": "mucca", "spider": "ragno", "squirrel": "scoiattolo",
             "horse":"cavallo","sheep":"pecora"}


for animal in classes:
    transl_animal = translate[animal]
    images = BASE_DIR+'animals10/raw-img'
    image_dir = os.path.join(images, transl_animal)
    for dirname, _, filenames in os.walk(image_dir):  
        for file in tqdm(filenames):
            img_path = os.path.join(dirname, file)
            image = load_img(img_path, target_size=(224, 224))
            image = img_to_array(image)
            #reshape
            image = image.reshape((1, image.shape[0], image.shape[1], image.shape[2]))
            image = preprocess_input(image)
            #extract features :
            feature = model.predict(image, verbose=0)
            #image_id :
            image_id = file.split('.')[0]
            #store feature
            features[image_id] = feature
            img_emb[image_id] = info[animal]


## Storing the visual and textual features :

In [None]:
pickle.dump(features, open(os.path.join(WORKING_DIR, 'visual_features.pkl'), 'wb'))
pickle.dump(img_emb, open(os.path.join(WORKING_DIR, 'textual_features.pkl'), 'wb'))

In [None]:
txt_features['OIF-e2bexWrojgtQnAPPcUfOWQ'] = np.array(txt_features['OIF-e2bexWrojgtQnAPPcUfOWQ'])

In [None]:
vs_features['OIF-e2bexWrojgtQnAPPcUfOWQ'].shape

In [None]:
#txt_features['OIF-e2bexWrojgtQnAPPcUfOWQ'].shape

## Mapping :

### CCA between Visual and Textual feature vectors :
#### Loading the vetors :

In [None]:
with open(os.path.join(WORKING_DIR, 'textual_features.pkl'), 'rb') as f:
    txt_features = pickle.load(f)

with open(os.path.join(WORKING_DIR, 'visual_features.pkl'), 'rb') as f:
    vs_features = pickle.load(f)

## Info about features (shape):

In [None]:
print("visual features shape example:", vs_features['OIF-e2bexWrojgtQnAPPcUfOWQ'].shape)
print("textual features shape example:", np.array(txt_features['OIF-e2bexWrojgtQnAPPcUfOWQ']).shape)



In [None]:
for key, value in txt_features.items():
    txt_features[key] = np.array(txt_features[key], dtype=np.float32)
    #txt_features[key] = txt_features[key].reshape(1, txt_features[key].shape[0])

In [None]:
print("textual features shape example:", txt_features['OIF-e2bexWrojgtQnAPPcUfOWQ'].shape)

## Splitting the data :

In [None]:
import pandas as pd
data = {'image_id': list(vs_features.keys()), 'visual feature': list(vs_features.values()), 'textual feature': list(txt_features.values())}
df = pd.DataFrame(data=data)

In [None]:
df.head()

In [None]:
df.to_csv(os.path.join(WORKING_DIR, 'vectors.csv'), index=False)

In [None]:
df = df.drop(['image_id'], axis=1)

In [None]:
df.head()

In [None]:
df.shape

In [None]:
def convert_to_list(df):
    for i in range(df.shape[0]):
        df['textual feature'][i] = df['textual feature'][i].tolist()
        df['visual feature'][i] = df['visual feature'][i].tolist()
    return df

In [None]:
df = convert_to_list(df)

In [None]:
df['visual feature'].values.tolist()

## Train test split :

In [None]:
from sklearn.model_selection import train_test_split

X = df['visual feature'].values
y = df['textual feature'].values

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

In [None]:
print("X Train shape : ", X_train.shape)
print("y Train test : ", y_train.shape)
print("X Test shape : ", X_test.shape)
print("y Test shape : ", y_test.shape)

## CCA :

In [None]:
X_train = X_train.tolist()

In [None]:
y_train = y_train.tolist()

In [None]:
from sklearn.cross_decomposition import CCA

CCA = CCA(n_components=500)
CCA.fit_transform(X_train, y_train)

In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session