In [44]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
from tqdm import tqdm
import time
import certifi
import urllib3

In [41]:
# Headers for request
headers = ({'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/136.0.0.0 Safari/537.36', 'Accept-Language':'en-US, en;q=0.5'})

In [42]:
BASE_URL = "https://www.mobiledokan.com"

In [36]:
LISTING_URL = BASE_URL + "/mobile-price-list?page={}"

In [46]:
# Disable SSL warnings
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)

In [52]:
def get_product_links(page=1):
    url = f'https://www.mobiledokan.com/mobile-price-list?page={page}'
    res = requests.get(url, headers=headers, verify=False)
    soup = BeautifulSoup(res.text, 'html.parser')
    
    product_links = []

    for a in soup.select('.product-box a'):
        href = a.get('href')
        if href and href.startswith('https://www.mobiledokan.com/mobile/'):
            product_links.append(href)

    return list(set(product_links)) 

In [53]:
# Function to get individual product details (name, price, and all specs)
def get_product_specs(url):
    res = requests.get(url, headers=headers, verify=False)
    soup = BeautifulSoup(res.text, 'html.parser')

    # Get Model Name from the specs section
    name_tag = soup.select_one('#product-specs h2')
    name = None
    if name_tag:
        name_text = name_tag.get_text(strip=True)
        name = name_text.replace(' Full Specifications', '').strip()

    # Get Price
    price_tag = soup.select_one('.price span.h3')
    price = None
    if price_tag:
        price_raw = price_tag.get_text(strip=True)
        price = price_raw.replace('৳.', '').split('(')[0].strip()

    # Get Specs
    specs = {}
    for row in soup.select('#product-specs table tr'):
        tds = row.find_all('td')
        if len(tds) == 2:
            key = tds[0].text.strip()
            value = tds[1].text.strip()
            specs[key] = value

    return {
        'name': name,
        'price': price,
        'specs': specs,
        'url': url
    }



In [54]:
# Scrape all product links from all pages
all_product_links = []
for page in tqdm(range(1, 253)):  # 252 pages
    links = get_product_links(page)
    all_product_links.extend(links)
    time.sleep(1)

100%|████████████████████████████████████████████████████████████████████████████████| 252/252 [10:23<00:00,  2.47s/it]


In [55]:
# Scrape data for each product
all_data = []
for link in tqdm(all_product_links):  # Loop through each product
    data = get_product_specs(link)
    if data['name']:  # Skip products without name
        all_data.append(data)
    time.sleep(1)

 72%|██████████████████████████████████████████████████████▍                     | 3600/5021 [1:56:28<45:58,  1.94s/it]


TooManyRedirects: Exceeded 30 redirects.

In [56]:
import pandas as pd

df = pd.DataFrame(all_data)
df.to_csv("mobiledokan_scraped_partial.csv", index=False)
print("Saved 72% data to CSV successfully.")

Saved 72% data to CSV successfully.


In [57]:
# Step 1: Load saved data
df_existing = pd.read_csv("mobiledokan_scraped_partial.csv")
scraped_urls = set(df_existing['url']) 

# Step 2: Filter remaining product URLs
remaining_links = [link for link in all_product_links if link not in scraped_urls]

# Step 3: Resume scraping
all_data = []

for link in tqdm(remaining_links):
    try:
        data = get_product_specs(link)
        if data['name']:
            all_data.append(data)
            # Save each successful one to avoid data loss
            temp_df = pd.DataFrame([data])
            df_existing = pd.concat([df_existing, temp_df], ignore_index=True)
            df_existing.to_csv("mobiledokan_scraped_partial.csv", index=False)
    except requests.exceptions.TooManyRedirects:
        print(f"Too many redirects on {link}, skipping.")
    except Exception as e:
        print(f"Error on {link}: {e}")
    time.sleep(1)

  0%|                                                                                         | 0/1421 [00:00<?, ?it/s]

Too many redirects on https://www.mobiledokan.com/mobile/apple-iphone-se-64gb, skipping.


100%|████████████████████████████████████████████████████████████████████████████| 1421/1421 [1:10:07<00:00,  2.96s/it]
