## ⛏️  Api Restricted APIs - Embedding

#### Imports

In [None]:
#IMPORT
from   sklearn.feature_extraction.text  import TfidfVectorizer
from   tqdm                             import tqdm
import pandas                           as pd
import ast

In [None]:
# Initialize TQDM library for Pandas
tqdm.pandas()

In [None]:
print("⚡ START ⚡")

#### Parameters

In [None]:
# Where to temporarily store APK Files
APK_PATH    = "../../../0_Data/APKS/"

# Ground-Truth Dataset
INPUT_PATH  = "../TMP/4c_AppRestrictedApiData.csv"

# Output Path
OUTPUT_PATH = "../TMP/4c_AppRestrictedApiFeatures.csv"

In [None]:
DELIMITER = "&&&"

### 1. Load Data

In [None]:
appsDF = pd.read_csv(INPUT_PATH, index_col=False)
print("#️⃣ Apps: {}".format(appsDF.shape[0]))

appsDF.head(3)

In [None]:
print("\n🔨 Reading data as lists")
appsDF['apisList'] = appsDF['apisList'].progress_apply(ast.literal_eval) 

print("\n🔨 Order the lists")
appsDF['apisList'] = appsDF['apisList'].progress_apply(lambda lst: sorted(lst))

Details

In [None]:
def getAvgLen(appsDF, column):
    totLen = appsDF[column].apply(len).sum()
    return totLen / appsDF[column].count()

In [None]:
print("📐 AVG Api List Len: {}".format(getAvgLen(appsDF,'apisList')))

### 2. Generate Feature Vectors for API Calls using TFIDF Vectorizer

In [None]:
# Create an instance of CountVectorizer to transform the permissions into feature Vectors
vectorizer = TfidfVectorizer(tokenizer = lambda text: text.split(DELIMITER), max_features = 1024)

# Generatr Feature Vectors and Save into the Df
appsDF['restrictedApiFeatures'] = vectorizer.fit_transform([DELIMITER.join(lst) for lst in appsDF['apisList'].values]).toarray().tolist()

print("📐 FV Len: {}".format(len(appsDF.loc[0,'restrictedApiFeatures'])))

# Example of features Names
featureNames = vectorizer.get_feature_names_out()
print("\n✏️ Example of Features Names:")
for f in featureNames[0:5]:
    print(f)

### 3. Save eveything to CSV

In [None]:
appsDF = appsDF[['sha256','classID','restrictedApiFeatures']]

# Save the result
appsDF.to_csv(OUTPUT_PATH,index=False)
appsDF.head(5)

In [None]:
print("\n🔚 END \n")