In [1]:
import os
import requests
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import pandas as pd
import time

In [2]:
def extract_data(driver):
    try:
        app_rows = WebDriverWait(driver, 10).until(
            EC.presence_of_all_elements_located((By.XPATH, "//div[@class='appRow']"))
        )
    except Exception as e:
        print(f"Error finding app rows: {e}")
        return [], [], [], [], []

    app_titles, developers, upload_dates, app_links, apk_links = [], [], [], [], []

    for app_row in app_rows:
        try:
            # Extract relevant data
            app_title = app_row.find_element(By.CLASS_NAME, 'appRowTitle').text
            app_link = app_row.find_element(By.CLASS_NAME, 'appRowTitle').find_element(By.TAG_NAME, 'a').get_attribute('href')
            developer = app_row.find_element(By.CLASS_NAME, 'byDeveloper').text
            upload_date = app_row.find_element(By.CLASS_NAME, 'dateyear_utc').text
            apk_link = app_row.find_element(By.CLASS_NAME, 'downloadLink').get_attribute('href')

            app_titles.append(app_title)
            developers.append(developer)
            upload_dates.append(upload_date)
            app_links.append(app_link)
            apk_links.append(apk_link)
        except Exception as e:
            print(f"Error extracting data for an app: {e}")

    return app_titles, developers, upload_dates, app_links, apk_links

In [3]:
url = 'https://www.apkmirror.com/?post_type=app_release&searchtype=apk&s=famapp'

# Configure the WebDriver and load the URL
try:
    driver = webdriver.Chrome()
    driver.get(url)
    print("URL loaded")
except Exception as e:
    print(f"Error loading URL: {e}")

# wait for the page to load
time.sleep(5)

app_titles, developers, upload_dates, app_links, apk_links = [], [], [], [], []

while True:
    # Extract data from the current page
    titles, devs, dates, links, apk_links_page = extract_data(driver)
    app_titles.extend(titles)
    developers.extend(devs)
    upload_dates.extend(dates)
    app_links.extend(links)
    apk_links.extend(apk_links_page)

    # Check if there's a "next" button to go to the next page
    try:
        next_page = driver.find_element(By.CLASS_NAME, "nextpostslink")
        next_page_url = next_page.get_attribute("href")
        driver.get(next_page_url)
    except Exception as e:
        print("No more pages found")
        break

# Save data to a dataframe
data = {'App Title': app_titles,
        'Developer': developers,
        'Upload Date': upload_dates,
        'App Link': app_links,
        'APK Link': apk_links}

df = pd.DataFrame(data)

driver.quit()

URL loaded
Error extracting data for an app: Message: no such element: Unable to locate element: {"method":"css selector","selector":".byDeveloper"}
  (Session info: chrome=113.0.5672.127)
Stacktrace:
Backtrace:
	GetHandleVerifier [0x00A08893+48451]
	(No symbol) [0x0099B8A1]
	(No symbol) [0x008A5058]
	(No symbol) [0x008D0467]
	(No symbol) [0x008D069B]
	(No symbol) [0x008C9631]
	(No symbol) [0x008EA304]
	(No symbol) [0x008C9586]
	(No symbol) [0x008EA614]
	(No symbol) [0x008FC482]
	(No symbol) [0x008EA0B6]
	(No symbol) [0x008C7E08]
	(No symbol) [0x008C8F2D]
	GetHandleVerifier [0x00C68E3A+2540266]
	GetHandleVerifier [0x00CA8959+2801161]
	GetHandleVerifier [0x00CA295C+2776588]
	GetHandleVerifier [0x00A92280+612144]
	(No symbol) [0x009A4F6C]
	(No symbol) [0x009A11D8]
	(No symbol) [0x009A12BB]
	(No symbol) [0x00994857]
	BaseThreadInitThunk [0x75E77EC9+25]
	RtlInitializeExceptionChain [0x77B0D22B+107]
	RtlGetAppContainerNamedObjectPath [0x77B0D1AD+237]

Error extracting data for an app: Messa

In [4]:
df.to_csv('apkmirror_fam.csv', index=False)

In [10]:
df["App Link"]

0     https://www.apkmirror.com/apk/fampay/fampay-pr...
1     https://www.apkmirror.com/apk/fampay/fampay-pr...
2     https://www.apkmirror.com/apk/fampay/fampay-pr...
3     https://www.apkmirror.com/apk/fampay/fampay-pr...
4     https://www.apkmirror.com/apk/fampay/fampay-pr...
                            ...                        
66    https://www.apkmirror.com/apk/fampay/fampay-pr...
67    https://www.apkmirror.com/apk/fampay/fampay-pr...
68    https://www.apkmirror.com/apk/fampay/fampay-pr...
69    https://www.apkmirror.com/apk/fampay/fampay-pr...
70    https://www.apkmirror.com/apk/fampay/fampay-pr...
Name: App Link, Length: 71, dtype: object


In [5]:
import re

def convert_url(input_url):
    # define the pattern to match the version number
    pattern = r'(\d+-){2}\d+'

    # define the base URL and the version string
    base_url = 'https://www.apkmirror.com/apk/fampay/fampay-prepaid-card-payments-for-teenagers/'
    version_string = 'fampay-teen-upi-payment-app-'

    # extract the version number from the input URL
    version_number = re.search(pattern, input_url).group(0)

    # construct the output URL
    output_url = base_url + version_number + '-release/' + version_string + version_number + '-android-apk-download/'

    # return the output URL
    return output_url


In [7]:
# open the csv file and save it to a dataframe

# in the dataframe, replace the APK link with the new URL using the convert_url function

df = pd.read_csv('apkmirror_fam.csv')
df['APK Link'] = df['APK Link'].apply(convert_url)
df.to_csv('apkmirror_fam_more.csv', index=False)