## 🧪 Application Phase : Anomaly Detection 

In [None]:
# Imports
from   dotenv   import load_dotenv
import pandas   as pd
import datetime
import sys
import os

# Add the upper folder to sys.path
sys.path.insert(0, "../")
from   Testing     import TestingManager, AnomalyDetectionResults
from   RedisClient import RedisClient
from   App         import App

#### Parameters

In [None]:
# TMP Folder
TMP_PATH    = "../../../0_Data/TMP/"

#### Initialization

In [None]:
print("⚡ Start - {} ⚡\n".format(datetime.datetime.now()))
startTime = datetime.datetime.now()

In [None]:
# Create TMP Folder
if not os.path.exists(TMP_PATH):
	os.makedirs(TMP_PATH)
	print("📁🆕 Folder created       : {}\n".format(TMP_PATH))
else:
	print("📁✅ Folder already exists: {}\n".format(TMP_PATH))

#### 📥 1) Load Data 

In [None]:
REDIS_PREFIX = "test"

In [None]:
DATASET          = "androcatset"
DIRECTION        = "backward"
SOURCES_APPROACH = "nosources"

redisProjectKey = REDIS_PREFIX + ".{}.{}.{}".format(DATASET, DIRECTION, SOURCES_APPROACH)
print("--- 🔑 Redis Key: ", redisProjectKey)

📡 Redis Connection

In [None]:
# Load .env file
load_dotenv()
redisClientExtraction = RedisClient(host=os.getenv("REDIS_SERVER"), 
									port=os.getenv("REDIS_PORT"), 
									db=os.getenv("REDIS_DB"), 
									password=os.getenv("REDIS_PSW"), 
									projectKey = redisProjectKey)

In [None]:
# Paths
INPUT_PATH   = "../../../0_Data/2_AndroCatSet_TrainingSet.csv"
INPUT_PATH   = "../../../0_Data/1_AndroCatSet_Mini.csv"
# Read the data
appsDF = pd.read_csv(INPUT_PATH)
print("--- #️⃣ Apps: {} ".format(appsDF.shape[0]))

# TEST
appsDF.head(5)

#### 🏷️ 2) Load Data Flows Embeddings and Test the model.

In [None]:
# Where embeddingsa are stores
redisClientEmbedding  = RedisClient(host=os.getenv("REDIS_SERVER"), 
									port=os.getenv("REDIS_PORT"), 
									db=os.getenv("REDIS_DB"), 
									password=os.getenv("REDIS_PSW"), 
									projectKey = REDIS_PREFIX + ".embeddings")

In [None]:
EMBEDDING_MODEL = "gpt"
#EMBEDDING_MODEL = "codebert"
#EMBEDDING_MODEL = "sfr"

In [None]:
# Model Folder --> Where they are saved
MODELS_PATH = "../../../0_Data/MODELS/"

# Results Folder --> Where to save results
RESULTS_PATH = "../../../0_Data/RESULTS/{}/".format(DATASET) 
resultsPath = RESULTS_PATH + "{}.json".format(EMBEDDING_MODEL)

# Delete the resultsPath file if it already exists
if os.path.isfile(resultsPath):
	os.remove(resultsPath)
	print(f"\n--- 🗑️ Results File Already Exist \n--- 🗑️ Deleting: {resultsPath}")

In [None]:
# Group by category
groupedDF = appsDF.groupby('classID')

for categoryID, categoryDF in groupedDF:
	print("\n🏷️ --- Category ID: {} --- 🏷️".format(categoryID))
	print("--- #️⃣ Num. of apps: {}".format(categoryDF.shape[0]))

	# Testing Manager 
	modelPath      = MODELS_PATH + "{}/{}.joblib".format(categoryID, EMBEDDING_MODEL)
	testingManager = TestingManager(modelPath, resultsPath, EMBEDDING_MODEL)
	print(testingManager)

	def processRow(row):
		# Print message 
		print("\n--- 🔑 Analyzing APK: {} 🔑 ---".format(row['sha256']))

		# Create App instance
		app = App(row['sha256'], row['pkgName'], row['classID'])

		# Get Data Flows From Redis
		app.downloadDataFlowsFromRedis(redisClient = redisClientExtraction, forTraining = False, forTesting=True) 

		#Check if dataFlows have been extracted and are not empty.
		if(app.dataFlows is not None and not app.dataFlows.isEmpty()):
			
			print("--- ⚙️ Embedding Model  : {}".format(EMBEDDING_MODEL))	
			app.downloadPairsEmbeddingsFromRedis(redisClientEmbedding, EMBEDDING_MODEL)

			# Test the app
			try:
				testingManager.testingAnomalyDetectionModel(app) 
			except Exception as e:
				print(e)
			# # Print the results
			if not testingManager.results.isNone(): 
				print(testingManager.results) 
			else:
				print("--- ❌ Results Unavailaible")
			
		# Save the results
		testingManager.saveResults(app) 

		# Reset Results
		testingManager.results = AnomalyDetectionResults(None, None) 

	# Apply the function to each row in the DataFrame
	_ = categoryDF.apply(processRow, axis=1) 

	print("\n\n" + "++++"*40 + "\n\n")

##### 🔚 End

In [None]:
endTime = datetime.datetime.now()
print("\n🔚 --- End - {} --- 🔚".format(endTime))

# Assuming endTime and startTime are in seconds
totalTime = endTime - startTime
minutes = totalTime.total_seconds() // 60
seconds = totalTime.total_seconds() % 60
print("⏱️ --- Time: {:02d} minutes and {:02d} seconds --- ⏱️".format(int(minutes), int(seconds)))