## 🔢 Description Crawler

In [None]:
# Imports
import numpy    as np
import pandas   as pd
import google_play_scraper 
import langdetect
import datetime
import requests
import os

#### Initialization

In [None]:
print("⚡ Start - {} ⚡\n".format(datetime.datetime.now()))
startTime = datetime.datetime.now()

#### 📥 1) Load Data 

In [None]:
DATA_PATH = "../../0_Data/3_MalCatSet.csv"

# Read the data
appsDF = pd.read_csv(DATA_PATH)

# TEST
#appsDF = appsDF.head(10)

print("--- #️⃣ Apps: {} ".format(appsDF.shape[0]))

In [None]:
appsDF.head(5)

In [None]:
def getGoogleEnglishDescription(pkgName):
    try:
        # Use googlePlayScraper Library
        result = google_play_scraper.app(pkgName, lang='en', country='us')

        if result is not None and result['description'] is not None:
            try:
                # Use langdetect
                lang = langdetect.detect(result['description'])
                # If the description is in English, return it
                if lang == "en":
                    description = result['description'].replace('\n', ' ').replace('\r', '')
                    return pd.Series([description], index=['description'])
                else:
                    return pd.Series([np.nan], index=['description'])
            except langdetect.LangDetectException:
                return pd.Series([np.nan], index=['description'])
        else:
            return pd.Series([np.nan], index=['description'])
    except Exception:
        return pd.Series([np.nan], index=['description'])

In [None]:
appsDF['description'] = appsDF['pkgName'].apply(getGoogleEnglishDescription)

print(f"Initial size of the DataFrame: {appsDF.shape[0]}")
appsDF= appsDF.dropna(subset=['description'])
print(f"Size after removing NaN values: {appsDF.shape[0]}")

appsDF.head(5)

In [None]:
# curl -G -d apikey=${APIKEY} 'https://androzoo.uni.lu/api/get_gp_metadata/occam.hammer.drone

In [None]:
# Function to get metadata for a package name from AZ
def getMetadata(pkgName):
    url = 'https://androzoo.uni.lu/api/get_gp_metadata/{}'.format(pkgName)
    params = {'apikey': os.getenv('ANDROZOO_API_KEY')}

    response = requests.get(url, params=params)
    if response.status_code == 200:
        return response.json()  
    else:
        return None

##### 🔚 End

In [None]:
endTime = datetime.datetime.now()
print("\n🔚 --- End - {} --- 🔚".format(endTime))

# Assuming endTime and startTime are in seconds
totalTime = endTime - startTime
minutes = totalTime.total_seconds() // 60
seconds = totalTime.total_seconds() % 60
print("⏱️ --- Time: {:02d} minutes and {:02d} seconds --- ⏱️".format(int(minutes), int(seconds)))