## ⛏️ Rajasegaran - Embedding

Using a pretrained VGG model, embed the style and content of app icons into feature vectors.

#### Imports

In [None]:
# IMPORT
from keras.applications                         import VGG19
from tensorflow.keras.preprocessing.image       import load_img, img_to_array
from keras.applications.vgg19                   import preprocess_input
from keras.applications.vgg19                   import decode_predictions
from tensorflow.keras.models                    import Model
from sklearn.random_projection                  import SparseRandomProjection

from   PIL          import Image
from   tqdm         import tqdm
import pandas       as pd
import numpy        as np
import os

In [None]:
# Initialize TQDM library for Pandas
tqdm.pandas()

In [None]:
print("⚡ START ⚡")

#### Parameters

In [None]:
# Ground-Truth Dataset
#INPUT_PATH  = "../../../../0_Data/CSV/0_AndroCatSet.csv"
INPUT_PATH  = "../../../../0_Data/CSV/1_AndroCatSet_MiniTEST.csv"

# Outputh Path
OUTPUT_PATH = "../TMP/2c_RajasegaranFeatures.csv"

TMP_PATH = "../TMP"
if not os.path.exists(TMP_PATH):
    os.makedirs(TMP_PATH)
    print("📁🆕 Folder created       :", TMP_PATH)
else:
    print("📁✅ Folder already exists:", TMP_PATH)

In [None]:
RANDOM_SEED = 151836

### 1. Load Data

In [None]:
appsDF = pd.read_csv(INPUT_PATH, index_col=False)
appsDF = appsDF.loc[:,['sha256','classID']]
print("#️⃣ Apps: {}".format(appsDF.shape[0]))

appsDF.head(3)

### 2. Load Pretrained Model

In [None]:
# Load the pre-trained VGG19 model
vggModel = VGG19(weights='imagenet', include_top=True)

# Get all the layers with their names
for i, layer in enumerate(vggModel.layers):
    print("Layer {}: {}".format(i,layer.name))

In [None]:
# Create a modified model that includes only the layers up to and including the desired layer
def getModifiedModel(model,desiredOutputLayer):
 
    outputs = [layer.output for layer in model.layers[:desiredOutputLayer + 1]]
    modifieModel = Model(inputs=model.input, outputs=outputs)
    return modifieModel

# Get the output of the last layer
def getVggEmbedding(X, model):
    # Use VGG to extract output of last layer
    Y = model.predict(X, verbose=  0)[-1]
    return Y

### 3. Preprocess Images

In [None]:
ICONS_PATH = "../TMP/appIcons/"

In [None]:
# Preprocess images for VGG Input
def preprocessImg(sha256, iconsPath):

    # Get the icon path
    iconPath = iconsPath + sha256 + ".png"

    img = Image.open(iconPath).convert("RGB")  # Convert image to RGBA format
    img = img.resize((224, 224))  # Resize the image
    x = img_to_array(img)
    x = preprocess_input(x)
    x = np.expand_dims(x, axis=0)

    return x

In [None]:
print("\n⛏️ 1) Preprocess Images")
appsDF['imageFeatures'] = appsDF['sha256'].progress_apply(lambda x: preprocessImg(x, ICONS_PATH))

print("\n📐 1) Tensor Shape: {}".format(appsDF.loc[0,'imageFeatures'].shape))

### 4. Content Embedding - VGG19 (fc_2 layer)

In [None]:
# I want the output from fc_2 layer (24)
contentVggModel = getModifiedModel(vggModel, 24)

# Get all the layers with their names
#for i, layer in enumerate(contentVggModel.layers):
#    print("Layer {}: {}".format(i,layer.name))

In [None]:
print("\n⛏️ 2) VGG19 fc_2 layer Extraction")
appsDF['contentEmbedding'] = appsDF['imageFeatures'].progress_apply(lambda x: getVggEmbedding(x, contentVggModel))

print("\n⛏️ 2b) Reshape")
appsDF['contentEmbedding'] = appsDF['contentEmbedding'].progress_apply(lambda x: np.reshape(x, 4096))

print("\n📐 2) Tensor Shape: {}".format(appsDF.loc[0,'contentEmbedding'].shape))

### 5. Style Embedding - VGG19(block5_conv1 layer) + Gram Matrix + Very Sparse Random Projection

In [None]:
# I want the output from block5_conv1 layer
styleVggModel = getModifiedModel(vggModel, 17)

# Get all the layers with their names
#for i, layer in enumerate(contentVggModel.layers):
#    print("Layer {}: {}".format(i,layer.name))

In [None]:
def applyGramMatrix(tensor):

    # Reshape Tensor
    reshapedTensor = np.reshape(tensor, (14*14, 512))

    # Normalized GramMatrix
    gramMatrix = np.matmul(reshapedTensor.T, reshapedTensor)
    normalizedGramMatrix = gramMatrix / (14 * 14 * 512)

    # Extract the upper triangular portion of the GramMatrix
    upperTriangular = np.triu(normalizedGramMatrix)

    # Flatten the upper triangular GramMatrix into a vector
    vector = upperTriangular[np.triu_indices_from(upperTriangular)]

    return vector

def applySparseRandomProjection(matrix):
    # Create the sparse random projection instance
    projection = SparseRandomProjection(n_components=4096, density='auto', random_state = RANDOM_SEED)

    # Apply sparse random projection to the vector
    projectedMatrix = projection.fit_transform(matrix)

    # Get the projected Matrix
    projectedMatrix = projectedMatrix.squeeze()

    return projectedMatrix

In [None]:
print("\n⛏️ 3) VGG19 bloc5_conv1 Extraction")
appsDF['styleEmbedding'] = appsDF['imageFeatures'].progress_apply(lambda x: getVggEmbedding(x, styleVggModel))
print("\n📐 3) Tensor Shape: {}".format(appsDF.loc[0,'styleEmbedding'].shape))

In [None]:
print("\n⛏️ 4) GramMatrix")
appsDF['styleEmbedding'] = appsDF['styleEmbedding'].progress_apply(lambda x: applyGramMatrix(x))
print("\n📐 4) Tensor Shape: {}".format(appsDF.loc[0,'styleEmbedding'].shape))

In [None]:
print("\n⛏️ 5) Sparse Random Projection")

styleMatrix = np.array(appsDF['styleEmbedding'].to_list())
styleMatrix = applySparseRandomProjection(styleMatrix)
appsDF['styleEmbedding'] = styleMatrix.tolist()

print("📐 5) Tensor Shape: {}".format(len(appsDF.loc[0,'styleEmbedding'])))

### 6. Save everything

In [None]:
# Drop image features
appsDF = appsDF.drop('imageFeatures', axis=1)

print("⛏️ 6) Reorganizing features as lists")
appsDF['contentEmbedding'] = appsDF['contentEmbedding'].progress_apply(lambda x: x.tolist())

# Save the result
appsDF.to_csv(OUTPUT_PATH, index=False)
appsDF.head(3)

In [None]:
print("\n🔚 END \n")