## ⛏️ App Libraries - Embedding

Generate Feature Vectros from the list of Libraries used.

### Imports

In [None]:
# IMPORT
from sklearn.feature_extraction.text    import TfidfVectorizer
from   tqdm                             import tqdm
import pandas                           as pd
import numpy                            as np
import ast

In [None]:
# Initialize TQDM library for Pandas
tqdm.pandas()

In [None]:
print("⚡ START ⚡")

#### Parameters

In [None]:
# Where to temporarily store APK Files
APK_PATH    = "../../../0_Data/APKS/"

# Ground-Truth Dataset
INPUT_PATH  = "../TMP/4f_AppLibrariesDataPreprocessed.csv"

# Output Path
OUTPUT_PATH = "../TMP/4f_AppLibrariesFeatures.csv"

### 1. Load Data

In [None]:
appsDF = pd.read_csv(INPUT_PATH, index_col=False)
print("#️⃣ Apps: {}".format(appsDF.shape[0]))

appsDF.head(3)

In [None]:
print("\n🔨 Reading data as lists")
appsDF['appLibraries'] = appsDF['appLibraries'].progress_apply(ast.literal_eval) 
appsDF['appSystemLibraries'] = appsDF['appSystemLibraries'].progress_apply(ast.literal_eval) 

### 2. Generate Feature Vectors

In [None]:
# Create an instance of Vectorizer to transform the permissions into feature Vectors
vectorizer = TfidfVectorizer(tokenizer = lambda text: text.split(DELIMITER), max_features = 1536)

# Vectorize the Permissions
appsDF['appLibrariesFeatures']       = vectorizer.fit_transform([DELIMITER.join(lst) for lst in appsDF['appLibraries'].values]).toarray().tolist()
appsDF['appSystemLibrariesFeatures'] = vectorizer.fit_transform([DELIMITER.join(lst) for lst in appsDF['appSystemLibraries'].values]).toarray().tolist()

print("📐 FV App Libraries Len        : {}".format(len(appsDF.loc[0,'appLibrariesFeatures'])))
print("📐 FV App System LIbraries Len : {}".format(len(appsDF.loc[0,'appSystemLibrariesFeatures'])))

### 3. Save Everything

In [None]:
# Save the result
appsDF = appsDF[['sha256','classID',"appLibrariesFeatures","appSystemLibrariesFeatures"]]
appsDF.to_csv(OUTPUT_PATH, index=False)
appsDF.head(5)

In [None]:
print("\n🔚 END \n")
