<h1>Table of Contents<span class="tocSkip"></span></h1>
<div class="toc"><ul class="toc-item"><li><span><a href="#Import-packages" data-toc-modified-id="Import-packages-1"><span class="toc-item-num">1&nbsp;&nbsp;</span>Import packages</a></span></li><li><span><a href="#Custom-functions" data-toc-modified-id="Custom-functions-2"><span class="toc-item-num">2&nbsp;&nbsp;</span>Custom functions</a></span></li><li><span><a href="#Set-request-parameters" data-toc-modified-id="Set-request-parameters-3"><span class="toc-item-num">3&nbsp;&nbsp;</span>Set request parameters</a></span></li><li><span><a href="#Crawl-main-page" data-toc-modified-id="Crawl-main-page-4"><span class="toc-item-num">4&nbsp;&nbsp;</span>Crawl main page</a></span></li><li><span><a href="#Crawl-each-sub-page" data-toc-modified-id="Crawl-each-sub-page-5"><span class="toc-item-num">5&nbsp;&nbsp;</span>Crawl each sub-page</a></span></li></ul></div>

### Import packages

In [1]:
import time, re, json, requests, pandas as pd
from bs4 import BeautifulSoup
from datetime import datetime, timedelta

### Custom functions

In [2]:
def process_datetime(soup, class_):
    for dt in soup.find('li', class_=class_):
        dt_str = dt.text
        if 'at' in dt_str:
            dt_str = dt_str.replace('at', '')
        dt_str = dt_str.split()
        return datetime.timestamp(datetime.strptime(dt_str[0], '%d/%m/%y'))
    
def get_thread_id(soup):
    for data in soup.find_all('link', rel='canonical'):
        return data.get('href').split('.')[-1].replace('/','')
    
def get_seller_info(soup):
    tmp = []
    for info in soup.find('div', class_='threadview-header--seller'):
        tmp.append(info)

    tmp[1] = tmp[1].span.text
    if tmp[3].span:
        tmp[3] = datetime.timestamp(datetime.strptime(tmp[3].span.text, '%d/%m/%y'))
    else:
        tmp[3] = tmp[3].abbr['data-time']
    tmp[5] = tmp[5].dd.text
    tmp[7] = tmp[7].dd.text
    return tmp

### Set request parameters

In [3]:
headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_6) \
                AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.90 Safari/537.36'}

### Crawl main page

In [4]:
# Marketplace page
url = 'https://nhattao.com/f/can-mua-trao-doi.589/'

# Create request session
r = requests.Session()

# Retrieve webpage
pages = r.get(url, headers=headers)

# Parse webpage contents
pages = BeautifulSoup(pages.text, 'html.parser')

In [5]:
# Determine maximum number of webpages
no_pages = [int(link.text) for link in pages.find_all('a', \
                attrs={'class': '', 'href': re.compile(r'.*?type=recent&search.*')}) \
                                                      if link.text.isdigit()]
max_pages = max(no_pages)
print('Number of pages on website: {}'.format(max_pages))

Number of pages on website: 34


### Crawl each sub-page

In [6]:
# For each page, retrieve the list of postings
combined = []
for i in range(1, max_pages+1):
    print('Processing page {} of {}'.format(i, max_pages))
    listing_links = []
    url = 'https://nhattao.com/f/can-mua-trao-doi.589/page-{}?type=recent&search_id=122532862&order=up_time&direction=desc'.format(i) 
    page_listings = r.get(url, headers=headers)
    listings = BeautifulSoup(page_listings.text, 'html.parser')
    
    # Grab all listing links from each page
    for link in listings.find_all('a', attrs={'href': re.compile(r'threads/.*'), \
                                              'class': 'Nhattao-CardItem--image'}):
        listing_links.append('https://nhattao.com/'+link.get('href'))
        
    # Retrieve information about each listing
    for j in range(len(listing_links)):
        print('Processing page {} of {}, link {} of {}'.format(max_pages, i, j+1, \
                                                               len(listing_links)))
        print(listing_links[j])
        print('\n')
        
        # Prevent timeout
        time.sleep(5)
        info = r.get(listing_links[j], headers=headers)
        data = BeautifulSoup(info.text, 'html.parser')                
        seller_info = get_seller_info(data)
        if data.find('li', class_='threadview-header--classifiedStatus'):
            condition = data.find('li', class_='threadview-header--classifiedStatus').text
        else:
            condition = 'N/A'
            
        if data.find('li', class_='threadview-header--classifiedLoc'):
            location = data.find('li', class_='threadview-header--classifiedLoc').text.strip()
        else:
            location = 'N/A'
        
        if data.find('span', class_='address'):
            addr = data.find('span', class_='address').text.strip()
        else:
            addr = 'N/A'
            
        if data.find('a', class_='threadview-header--contactPhone'):
            contact = data.find('a', class_='threadview-header--contactPhone').text.strip().replace(' ','')
        else:
            contact = 'N/A'
            
        results = {
            'Thread Link': listing_links[j],
            'Thread ID': get_thread_id(data),
            'Title': data.find('h2').text,
            'Condition': condition,
            'Location': location,
            'Posted Date': process_datetime(data, 'threadview-header--postDate'),
            'Seen': int(data.find('li', class_='threadview-header--viewCount').text.split()[1].replace('.','')),
            'Price': float(data.find('p', class_='threadview-header--classifiedPrice').text.strip().replace(' đ', '').replace('.','')),
            'Contact': contact,
            'Address': addr,
            'Seller': seller_info[1],
            'Date Joined': seller_info[3],
            'No Products': seller_info[5],
            'Likes': seller_info[7]
                 }
        combined.append(results)

print('Writing json file'.format(i))
with open('./nhattao_marketplace.json', 'w') as f:
    json.dump(combined, f)

Processing page 1 of 34
Processing page 1, link 1 of 30
https://nhattao.com/threads/shop-chuyen-thu-mua-may-choi-game-ds-2ds-3ds-psp-vita-ps3-ps4-wiiu-xbox-077-707-1528.7220310/


Processing page 1, link 2 of 30
https://nhattao.com/threads/thu-mua-may-anh-cu-gia-cao-tan-noi-tai-ha-noi.8490455/


Processing page 1, link 3 of 30
https://nhattao.com/threads/can-mua-mot-so-linh-kien-may-tinh-bi-hu-hoac-loi.7296829/


Processing page 1, link 4 of 30
https://nhattao.com/threads/thu-mua-may-game-gia-cao-ps4-ps3-xbox-nintendo-switch-0947700818.6430441/


Processing page 1, link 5 of 30
https://nhattao.com/threads/ban-ten-mien-thegioiruouvang-com.8393484/


Processing page 1, link 6 of 30
https://nhattao.com/threads/ong-kinh-sigma-35mm-f-1-4-dg-hsm-art-for-canon.8524643/


Processing page 1, link 7 of 30
https://nhattao.com/threads/vi-nam.8525203/


Processing page 1, link 8 of 30
https://nhattao.com/threads/giay-nike-womens-air-zoom-vomero-13-running-shoe.8455852/


Processing page 1, link 9 o

Processing page 3, link 9 of 30
https://nhattao.com/threads/in-bao-bi-nilon-dung-gao.8525386/


Processing page 3, link 10 of 30
https://nhattao.com/threads/ban-tai-khoan-google-drive-unlimited-google-drive-khong-gioi-han.8366250/


Processing page 3, link 11 of 30
https://nhattao.com/threads/thanh-long-binh-thuan-gia-si.8495763/


Processing page 3, link 12 of 30
https://nhattao.com/threads/thu-mua-laptop-cu-gia-cao-tphcm-0977419856-nghia.2597794/


Processing page 3, link 13 of 30
https://nhattao.com/threads/dich-vu-thu-mua-may-tinh-laptop-macbook-tai-ha-noi.8490457/


Processing page 3, link 14 of 30
https://nhattao.com/threads/oto-cu-thu-mua-oto-cu-gia-cao-tai-tp-hcm-mua-oto-da-qua-su-dung.3264194/


Processing page 3, link 15 of 30
https://nhattao.com/threads/mua-xe-dap-dien-cu-gia-cao.4403397/


Processing page 3, link 16 of 30
https://nhattao.com/threads/can-mua-con-ps-vita-tv.8423449/


Processing page 3, link 17 of 30
https://nhattao.com/threads/do-choi-xep-hinh-tu-chau-au-an-

Processing page 5, link 17 of 30
https://nhattao.com/threads/do-choi-xep-hinh-tu-chau-au-an-toan-thong-minh-kich-thich-phat-trien-tu-duy-cho-be.8466682/


Processing page 5, link 18 of 30
https://nhattao.com/threads/nhung-mau-thiep-sinh-nhat-vintage-vo-cung-doc-dao-va-la-mat.8524838/


Processing page 5, link 19 of 30
https://nhattao.com/threads/chuyen-thu-mua-laptop-cu-gia-cao-khu-vuc-tphcm.8494482/


Processing page 5, link 20 of 30
https://nhattao.com/threads/the-cao-data-3g-mobifone-tron-goi-1-nam.8517668/


Processing page 5, link 21 of 30
https://nhattao.com/threads/dich-vu-thu-mua-may-tinh-laptop-macbook-tai-ha-noi.8524751/


Processing page 5, link 22 of 30
https://nhattao.com/threads/thu-mua-may-anh-cu-gia-cao-tan-noi-tai-ha-noi.8524749/


Processing page 5, link 23 of 30
https://nhattao.com/threads/co-so-san-xuat-moc-khoa-mica-moc-khoa-nhua-keo-mem-moc-khoa-qua-tang.8524438/


Processing page 5, link 24 of 30
https://nhattao.com/threads/chuyen-mua-tat-ca-cac-loai-xe-may-cu-ta

Processing page 7, link 24 of 30
https://nhattao.com/threads/chuyen-mua-tat-ca-cac-loai-xe-may-cu-tai-nha-gia-cao-nhat-sai-gon.6122465/


Processing page 7, link 25 of 30
https://nhattao.com/threads/muon-giao-luu-ip6-32g-voi-android.8430407/


Processing page 7, link 26 of 30
https://nhattao.com/threads/phan-mem-quan-ly-cong-no-tu-dong.8505203/


Processing page 7, link 27 of 30
https://nhattao.com/threads/tuyen-dung-viec-lam-tai-xe-b2-c-viec-lam-lo-xe-tai-giao-hang-tim-viec-lam-them-phu-kho-xep-hang-t.8491766/


Processing page 7, link 28 of 30
https://nhattao.com/threads/tai-sao-nen-mua-nick-cf-tai-shopnickre24h.8524110/


Processing page 7, link 29 of 30
https://nhattao.com/threads/nha-phan-phoi-quat-thong-gio-am-tran-nanyoo.8524109/


Processing page 7, link 30 of 30
https://nhattao.com/threads/trung-tam-thu-mua-xe-may-cu-gia-ca-canh-tranh.6122496/


Processing page 8 of 34
Processing page 8, link 1 of 30
https://nhattao.com/threads/shop-chuyen-thu-mua-may-choi-game-ds-2ds-3ds-psp-

Processing page 10, link 2 of 30
https://nhattao.com/threads/thu-mua-may-anh-cu-gia-cao-tan-noi-tai-ha-noi.8490455/


Processing page 10, link 3 of 30
https://nhattao.com/threads/can-mua-mot-so-linh-kien-may-tinh-bi-hu-hoac-loi.7296829/


Processing page 10, link 4 of 30
https://nhattao.com/threads/thu-mua-may-game-gia-cao-ps4-ps3-xbox-nintendo-switch-0947700818.6430441/


Processing page 10, link 5 of 30
https://nhattao.com/threads/ban-ten-mien-thegioiruouvang-com.8393484/


Processing page 10, link 6 of 30
https://nhattao.com/threads/ong-kinh-sigma-35mm-f-1-4-dg-hsm-art-for-canon.8524643/


Processing page 10, link 7 of 30
https://nhattao.com/threads/vi-nam.8525203/


Processing page 10, link 8 of 30
https://nhattao.com/threads/giay-nike-womens-air-zoom-vomero-13-running-shoe.8455852/


Processing page 10, link 9 of 30
https://nhattao.com/threads/in-bao-bi-nilon-dung-gao.8525386/


Processing page 10, link 10 of 30
https://nhattao.com/threads/ban-tai-khoan-google-drive-unlimited-goog

Processing page 12, link 10 of 30
https://nhattao.com/threads/ban-tai-khoan-google-drive-unlimited-google-drive-khong-gioi-han.8366250/


Processing page 12, link 11 of 30
https://nhattao.com/threads/thanh-long-binh-thuan-gia-si.8495763/


Processing page 12, link 12 of 30
https://nhattao.com/threads/thu-mua-laptop-cu-gia-cao-tphcm-0977419856-nghia.2597794/


Processing page 12, link 13 of 30
https://nhattao.com/threads/dich-vu-thu-mua-may-tinh-laptop-macbook-tai-ha-noi.8490457/


Processing page 12, link 14 of 30
https://nhattao.com/threads/oto-cu-thu-mua-oto-cu-gia-cao-tai-tp-hcm-mua-oto-da-qua-su-dung.3264194/


Processing page 12, link 15 of 30
https://nhattao.com/threads/mua-xe-dap-dien-cu-gia-cao.4403397/


Processing page 12, link 16 of 30
https://nhattao.com/threads/can-mua-con-ps-vita-tv.8423449/


Processing page 12, link 17 of 30
https://nhattao.com/threads/do-choi-xep-hinh-tu-chau-au-an-toan-thong-minh-kich-thich-phat-trien-tu-duy-cho-be.8466682/


Processing page 12, link 

Processing page 14, link 18 of 30
https://nhattao.com/threads/nhung-mau-thiep-sinh-nhat-vintage-vo-cung-doc-dao-va-la-mat.8524838/


Processing page 14, link 19 of 30
https://nhattao.com/threads/chuyen-thu-mua-laptop-cu-gia-cao-khu-vuc-tphcm.8494482/


Processing page 14, link 20 of 30
https://nhattao.com/threads/the-cao-data-3g-mobifone-tron-goi-1-nam.8517668/


Processing page 14, link 21 of 30
https://nhattao.com/threads/dich-vu-thu-mua-may-tinh-laptop-macbook-tai-ha-noi.8524751/


Processing page 14, link 22 of 30
https://nhattao.com/threads/thu-mua-may-anh-cu-gia-cao-tan-noi-tai-ha-noi.8524749/


Processing page 14, link 23 of 30
https://nhattao.com/threads/co-so-san-xuat-moc-khoa-mica-moc-khoa-nhua-keo-mem-moc-khoa-qua-tang.8524438/


Processing page 14, link 24 of 30
https://nhattao.com/threads/chuyen-mua-tat-ca-cac-loai-xe-may-cu-tai-nha-gia-cao-nhat-sai-gon.6122465/


Processing page 14, link 25 of 30
https://nhattao.com/threads/muon-giao-luu-ip6-32g-voi-android.8430407/


Pro

Processing page 16, link 25 of 30
https://nhattao.com/threads/muon-giao-luu-ip6-32g-voi-android.8430407/


Processing page 16, link 26 of 30
https://nhattao.com/threads/phan-mem-quan-ly-cong-no-tu-dong.8505203/


Processing page 16, link 27 of 30
https://nhattao.com/threads/tuyen-dung-viec-lam-tai-xe-b2-c-viec-lam-lo-xe-tai-giao-hang-tim-viec-lam-them-phu-kho-xep-hang-t.8491766/


Processing page 16, link 28 of 30
https://nhattao.com/threads/tai-sao-nen-mua-nick-cf-tai-shopnickre24h.8524110/


Processing page 16, link 29 of 30
https://nhattao.com/threads/nha-phan-phoi-quat-thong-gio-am-tran-nanyoo.8524109/


Processing page 16, link 30 of 30
https://nhattao.com/threads/trung-tam-thu-mua-xe-may-cu-gia-ca-canh-tranh.6122496/


Processing page 17 of 34
Processing page 17, link 1 of 30
https://nhattao.com/threads/thu-mua-laptop-cu-gia-cao-tphcm-0977419856-nghia.2597794/


Processing page 17, link 2 of 30
https://nhattao.com/threads/shop-chuyen-thu-mua-may-choi-game-ds-2ds-3ds-psp-vita-ps3-

Processing page 19, link 2 of 30
https://nhattao.com/threads/shop-chuyen-thu-mua-may-choi-game-ds-2ds-3ds-psp-vita-ps3-ps4-wiiu-xbox-077-707-1528.7220310/


Processing page 19, link 3 of 30
https://nhattao.com/threads/thu-mua-may-anh-cu-gia-cao-tan-noi-tai-ha-noi.8490455/


Processing page 19, link 4 of 30
https://nhattao.com/threads/can-mua-mot-so-linh-kien-may-tinh-bi-hu-hoac-loi.7296829/


Processing page 19, link 5 of 30
https://nhattao.com/threads/thu-mua-may-game-gia-cao-ps4-ps3-xbox-nintendo-switch-0947700818.6430441/


Processing page 19, link 6 of 30
https://nhattao.com/threads/ban-ten-mien-thegioiruouvang-com.8393484/


Processing page 19, link 7 of 30
https://nhattao.com/threads/ong-kinh-sigma-35mm-f-1-4-dg-hsm-art-for-canon.8524643/


Processing page 19, link 8 of 30
https://nhattao.com/threads/vi-nam.8525203/


Processing page 19, link 9 of 30
https://nhattao.com/threads/giay-nike-womens-air-zoom-vomero-13-running-shoe.8455852/


Processing page 19, link 10 of 30
https://n

Processing page 21, link 9 of 30
https://nhattao.com/threads/giay-nike-womens-air-zoom-vomero-13-running-shoe.8455852/


Processing page 21, link 10 of 30
https://nhattao.com/threads/in-bao-bi-nilon-dung-gao.8525386/


Processing page 21, link 11 of 30
https://nhattao.com/threads/ban-tai-khoan-google-drive-unlimited-google-drive-khong-gioi-han.8366250/


Processing page 21, link 12 of 30
https://nhattao.com/threads/thanh-long-binh-thuan-gia-si.8495763/


Processing page 21, link 13 of 30
https://nhattao.com/threads/dich-vu-thu-mua-may-tinh-laptop-macbook-tai-ha-noi.8490457/


Processing page 21, link 14 of 30
https://nhattao.com/threads/oto-cu-thu-mua-oto-cu-gia-cao-tai-tp-hcm-mua-oto-da-qua-su-dung.3264194/


Processing page 21, link 15 of 30
https://nhattao.com/threads/mua-xe-dap-dien-cu-gia-cao.4403397/


Processing page 21, link 16 of 30
https://nhattao.com/threads/can-mua-con-ps-vita-tv.8423449/


Processing page 21, link 17 of 30
https://nhattao.com/threads/do-choi-xep-hinh-tu-ch

Processing page 23, link 17 of 30
https://nhattao.com/threads/do-choi-xep-hinh-tu-chau-au-an-toan-thong-minh-kich-thich-phat-trien-tu-duy-cho-be.8466682/


Processing page 23, link 18 of 30
https://nhattao.com/threads/nhung-mau-thiep-sinh-nhat-vintage-vo-cung-doc-dao-va-la-mat.8524838/


Processing page 23, link 19 of 30
https://nhattao.com/threads/chuyen-thu-mua-laptop-cu-gia-cao-khu-vuc-tphcm.8494482/


Processing page 23, link 20 of 30
https://nhattao.com/threads/the-cao-data-3g-mobifone-tron-goi-1-nam.8517668/


Processing page 23, link 21 of 30
https://nhattao.com/threads/dich-vu-thu-mua-may-tinh-laptop-macbook-tai-ha-noi.8524751/


Processing page 23, link 22 of 30
https://nhattao.com/threads/thu-mua-may-anh-cu-gia-cao-tan-noi-tai-ha-noi.8524749/


Processing page 23, link 23 of 30
https://nhattao.com/threads/co-so-san-xuat-moc-khoa-mica-moc-khoa-nhua-keo-mem-moc-khoa-qua-tang.8524438/


Processing page 23, link 24 of 30
https://nhattao.com/threads/chuyen-mua-tat-ca-cac-loai-xe-m

Processing page 25, link 24 of 30
https://nhattao.com/threads/chuyen-mua-tat-ca-cac-loai-xe-may-cu-tai-nha-gia-cao-nhat-sai-gon.6122465/


Processing page 25, link 25 of 30
https://nhattao.com/threads/muon-giao-luu-ip6-32g-voi-android.8430407/


Processing page 25, link 26 of 30
https://nhattao.com/threads/phan-mem-quan-ly-cong-no-tu-dong.8505203/


Processing page 25, link 27 of 30
https://nhattao.com/threads/tuyen-dung-viec-lam-tai-xe-b2-c-viec-lam-lo-xe-tai-giao-hang-tim-viec-lam-them-phu-kho-xep-hang-t.8491766/


Processing page 25, link 28 of 30
https://nhattao.com/threads/tai-sao-nen-mua-nick-cf-tai-shopnickre24h.8524110/


Processing page 25, link 29 of 30
https://nhattao.com/threads/nha-phan-phoi-quat-thong-gio-am-tran-nanyoo.8524109/


Processing page 25, link 30 of 30
https://nhattao.com/threads/trung-tam-thu-mua-xe-may-cu-gia-ca-canh-tranh.6122496/


Processing page 26 of 34
Processing page 26, link 1 of 30
https://nhattao.com/threads/thu-mua-laptop-cu-gia-cao-tphcm-09774198

Processing page 28 of 34
Processing page 28, link 1 of 30
https://nhattao.com/threads/thu-mua-laptop-cu-gia-cao-tphcm-0977419856-nghia.2597794/


Processing page 28, link 2 of 30
https://nhattao.com/threads/shop-chuyen-thu-mua-may-choi-game-ds-2ds-3ds-psp-vita-ps3-ps4-wiiu-xbox-077-707-1528.7220310/


Processing page 28, link 3 of 30
https://nhattao.com/threads/thu-mua-may-anh-cu-gia-cao-tan-noi-tai-ha-noi.8490455/


Processing page 28, link 4 of 30
https://nhattao.com/threads/can-mua-mot-so-linh-kien-may-tinh-bi-hu-hoac-loi.7296829/


Processing page 28, link 5 of 30
https://nhattao.com/threads/thu-mua-may-game-gia-cao-ps4-ps3-xbox-nintendo-switch-0947700818.6430441/


Processing page 28, link 6 of 30
https://nhattao.com/threads/ban-ten-mien-thegioiruouvang-com.8393484/


Processing page 28, link 7 of 30
https://nhattao.com/threads/ong-kinh-sigma-35mm-f-1-4-dg-hsm-art-for-canon.8524643/


Processing page 28, link 8 of 30
https://nhattao.com/threads/vi-nam.8525203/


Processing page 28

Processing page 30, link 8 of 30
https://nhattao.com/threads/vi-nam.8525203/


Processing page 30, link 9 of 30
https://nhattao.com/threads/giay-nike-womens-air-zoom-vomero-13-running-shoe.8455852/


Processing page 30, link 10 of 30
https://nhattao.com/threads/in-bao-bi-nilon-dung-gao.8525386/


Processing page 30, link 11 of 30
https://nhattao.com/threads/ban-tai-khoan-google-drive-unlimited-google-drive-khong-gioi-han.8366250/


Processing page 30, link 12 of 30
https://nhattao.com/threads/thanh-long-binh-thuan-gia-si.8495763/


Processing page 30, link 13 of 30
https://nhattao.com/threads/dich-vu-thu-mua-may-tinh-laptop-macbook-tai-ha-noi.8490457/


Processing page 30, link 14 of 30
https://nhattao.com/threads/oto-cu-thu-mua-oto-cu-gia-cao-tai-tp-hcm-mua-oto-da-qua-su-dung.3264194/


Processing page 30, link 15 of 30
https://nhattao.com/threads/mua-xe-dap-dien-cu-gia-cao.4403397/


Processing page 30, link 16 of 30
https://nhattao.com/threads/can-mua-con-ps-vita-tv.8423449/


Proce

Processing page 32, link 16 of 30
https://nhattao.com/threads/can-mua-con-ps-vita-tv.8423449/


Processing page 32, link 17 of 30
https://nhattao.com/threads/do-choi-xep-hinh-tu-chau-au-an-toan-thong-minh-kich-thich-phat-trien-tu-duy-cho-be.8466682/


Processing page 32, link 18 of 30
https://nhattao.com/threads/nhung-mau-thiep-sinh-nhat-vintage-vo-cung-doc-dao-va-la-mat.8524838/


Processing page 32, link 19 of 30
https://nhattao.com/threads/chuyen-thu-mua-laptop-cu-gia-cao-khu-vuc-tphcm.8494482/


Processing page 32, link 20 of 30
https://nhattao.com/threads/the-cao-data-3g-mobifone-tron-goi-1-nam.8517668/


Processing page 32, link 21 of 30
https://nhattao.com/threads/dich-vu-thu-mua-may-tinh-laptop-macbook-tai-ha-noi.8524751/


Processing page 32, link 22 of 30
https://nhattao.com/threads/thu-mua-may-anh-cu-gia-cao-tan-noi-tai-ha-noi.8524749/


Processing page 32, link 23 of 30
https://nhattao.com/threads/co-so-san-xuat-moc-khoa-mica-moc-khoa-nhua-keo-mem-moc-khoa-qua-tang.8524438/

Processing page 34, link 23 of 30
https://nhattao.com/threads/co-so-san-xuat-moc-khoa-mica-moc-khoa-nhua-keo-mem-moc-khoa-qua-tang.8524438/


Processing page 34, link 24 of 30
https://nhattao.com/threads/chuyen-mua-tat-ca-cac-loai-xe-may-cu-tai-nha-gia-cao-nhat-sai-gon.6122465/


Processing page 34, link 25 of 30
https://nhattao.com/threads/muon-giao-luu-ip6-32g-voi-android.8430407/


Processing page 34, link 26 of 30
https://nhattao.com/threads/phan-mem-quan-ly-cong-no-tu-dong.8505203/


Processing page 34, link 27 of 30
https://nhattao.com/threads/tuyen-dung-viec-lam-tai-xe-b2-c-viec-lam-lo-xe-tai-giao-hang-tim-viec-lam-them-phu-kho-xep-hang-t.8491766/


Processing page 34, link 28 of 30
https://nhattao.com/threads/tai-sao-nen-mua-nick-cf-tai-shopnickre24h.8524110/


Processing page 34, link 29 of 30
https://nhattao.com/threads/nha-phan-phoi-quat-thong-gio-am-tran-nanyoo.8524109/


Processing page 34, link 30 of 30
https://nhattao.com/threads/trung-tam-thu-mua-xe-may-cu-gia-ca-canh-t

In [8]:
test_pd = pd.read_json('./nhattao_marketplace.json')

In [9]:
test_pd.to_excel('./nhattao_marketplace.xlsx', index=False)

In [None]:
#####################

In [1]:
import time, json, pandas as pd
from nhattao_crawler import Nhattao_crawler

crawler = Nhattao_crawler('https://nhattao.com/f/can-mua-trao-doi.589/')

In [2]:
no_pages = crawler.get_no_pages()
combined = []

for i in range(1, no_pages+1):
    print('Processing page {} of {}'.format(i, no_pages))
       
    page_listings = crawler.get_listings_per_page(i)
    for j in range(len(page_listings)):    
        print('Processing page {} of {}, link {} of {}'.format(i, no_pages, \
                                                               j+1, len(page_listings)))
        print(page_listings[j])
        print('\n')
        
        time.sleep(5)
        crawler.process_data(page_listings[j])
        combined.append(crawler.process_data(page_listings[j]))

Processing page 1 of 34
Processing page 1 of 34, link 1 of 30
https://nhattao.com/threads/ban-tai-khoan-google-drive-unlimited-google-drive-khong-gioi-han.8366250/


Processing page 1 of 34, link 2 of 30
https://nhattao.com/threads/thu-mua-laptop-cu-gia-cao-tphcm-0977419856-nghia.2597794/


Processing page 1 of 34, link 3 of 30
https://nhattao.com/threads/ban-ten-mien-thegioiruouvang-com.8393484/


Processing page 1 of 34, link 4 of 30
https://nhattao.com/threads/can-iphone-ipad-cu-vo-nat-icloud-chet-gia-soc.8277677/


Processing page 1 of 34, link 5 of 30
https://nhattao.com/threads/thu-mua-may-anh-cu-gia-cao-tan-noi-tai-ha-noi.8490455/


Processing page 1 of 34, link 6 of 30
https://nhattao.com/threads/shop-chuyen-thu-mua-may-choi-game-ds-2ds-3ds-psp-vita-ps3-ps4-wiiu-xbox-077-707-1528.7220310/


Processing page 1 of 34, link 7 of 30
https://nhattao.com/threads/giay-nike-womens-air-zoom-vomero-13-running-shoe.8455852/


Processing page 1 of 34, link 8 of 30
https://nhattao.com/thread

Processing page 3 of 34, link 6 of 30
https://nhattao.com/threads/shop-chuyen-thu-mua-may-choi-game-ds-2ds-3ds-psp-vita-ps3-ps4-wiiu-xbox-077-707-1528.7220310/


Processing page 3 of 34, link 7 of 30
https://nhattao.com/threads/giay-nike-womens-air-zoom-vomero-13-running-shoe.8455852/


Processing page 3 of 34, link 8 of 30
https://nhattao.com/threads/dich-vu-thu-mua-may-tinh-laptop-macbook-tai-ha-noi.8490457/


Processing page 3 of 34, link 9 of 30
https://nhattao.com/threads/do-choi-xep-hinh-tu-chau-au-an-toan-thong-minh-kich-thich-phat-trien-tu-duy-cho-be.8466682/


Processing page 3 of 34, link 10 of 30
https://nhattao.com/threads/vi-nam.8525203/


Processing page 3 of 34, link 11 of 30
https://nhattao.com/threads/can-mua-mot-so-linh-kien-may-tinh-bi-hu-hoac-loi.7296829/


Processing page 3 of 34, link 12 of 30
https://nhattao.com/threads/thu-mua-may-game-gia-cao-ps4-ps3-xbox-nintendo-switch-0947700818.6430441/


Processing page 3 of 34, link 13 of 30
https://nhattao.com/threads/on

Processing page 5 of 34, link 11 of 30
https://nhattao.com/threads/vi-nam.8525203/


Processing page 5 of 34, link 12 of 30
https://nhattao.com/threads/can-mua-mot-so-linh-kien-may-tinh-bi-hu-hoac-loi.7296829/


Processing page 5 of 34, link 13 of 30
https://nhattao.com/threads/thu-mua-may-game-gia-cao-ps4-ps3-xbox-nintendo-switch-0947700818.6430441/


Processing page 5 of 34, link 14 of 30
https://nhattao.com/threads/ong-kinh-sigma-35mm-f-1-4-dg-hsm-art-for-canon.8524643/


Processing page 5 of 34, link 15 of 30
https://nhattao.com/threads/in-bao-bi-nilon-dung-gao.8525386/


Processing page 5 of 34, link 16 of 30
https://nhattao.com/threads/thanh-long-binh-thuan-gia-si.8495763/


Processing page 5 of 34, link 17 of 30
https://nhattao.com/threads/oto-cu-thu-mua-oto-cu-gia-cao-tai-tp-hcm-mua-oto-da-qua-su-dung.3264194/


Processing page 5 of 34, link 18 of 30
https://nhattao.com/threads/mua-xe-dap-dien-cu-gia-cao.4403397/


Processing page 5 of 34, link 19 of 30
https://nhattao.com/thre

Processing page 7 of 34, link 16 of 30
https://nhattao.com/threads/thanh-long-binh-thuan-gia-si.8495763/


Processing page 7 of 34, link 17 of 30
https://nhattao.com/threads/oto-cu-thu-mua-oto-cu-gia-cao-tai-tp-hcm-mua-oto-da-qua-su-dung.3264194/


Processing page 7 of 34, link 18 of 30
https://nhattao.com/threads/mua-xe-dap-dien-cu-gia-cao.4403397/


Processing page 7 of 34, link 19 of 30
https://nhattao.com/threads/can-mua-con-ps-vita-tv.8423449/


Processing page 7 of 34, link 20 of 30
https://nhattao.com/threads/nhung-mau-thiep-sinh-nhat-vintage-vo-cung-doc-dao-va-la-mat.8524838/


Processing page 7 of 34, link 21 of 30
https://nhattao.com/threads/chuyen-thu-mua-laptop-cu-gia-cao-khu-vuc-tphcm.8494482/


Processing page 7 of 34, link 22 of 30
https://nhattao.com/threads/the-cao-data-3g-mobifone-tron-goi-1-nam.8517668/


Processing page 7 of 34, link 23 of 30
https://nhattao.com/threads/dich-vu-thu-mua-may-tinh-laptop-macbook-tai-ha-noi.8524751/


Processing page 7 of 34, link 24 of

Processing page 9 of 34, link 21 of 30
https://nhattao.com/threads/chuyen-thu-mua-laptop-cu-gia-cao-khu-vuc-tphcm.8494482/


Processing page 9 of 34, link 22 of 30
https://nhattao.com/threads/the-cao-data-3g-mobifone-tron-goi-1-nam.8517668/


Processing page 9 of 34, link 23 of 30
https://nhattao.com/threads/dich-vu-thu-mua-may-tinh-laptop-macbook-tai-ha-noi.8524751/


Processing page 9 of 34, link 24 of 30
https://nhattao.com/threads/thu-mua-may-anh-cu-gia-cao-tan-noi-tai-ha-noi.8524749/


Processing page 9 of 34, link 25 of 30
https://nhattao.com/threads/co-so-san-xuat-moc-khoa-mica-moc-khoa-nhua-keo-mem-moc-khoa-qua-tang.8524438/


Processing page 9 of 34, link 26 of 30
https://nhattao.com/threads/chuyen-mua-tat-ca-cac-loai-xe-may-cu-tai-nha-gia-cao-nhat-sai-gon.6122465/


Processing page 9 of 34, link 27 of 30
https://nhattao.com/threads/muon-giao-luu-ip6-32g-voi-android.8430407/


Processing page 9 of 34, link 28 of 30
https://nhattao.com/threads/phan-mem-quan-ly-cong-no-tu-dong.8

Processing page 11 of 34, link 26 of 30
https://nhattao.com/threads/chuyen-mua-tat-ca-cac-loai-xe-may-cu-tai-nha-gia-cao-nhat-sai-gon.6122465/


Processing page 11 of 34, link 27 of 30
https://nhattao.com/threads/muon-giao-luu-ip6-32g-voi-android.8430407/


Processing page 11 of 34, link 28 of 30
https://nhattao.com/threads/phan-mem-quan-ly-cong-no-tu-dong.8505203/


Processing page 11 of 34, link 29 of 30
https://nhattao.com/threads/tuyen-dung-viec-lam-tai-xe-b2-c-viec-lam-lo-xe-tai-giao-hang-tim-viec-lam-them-phu-kho-xep-hang-t.8491766/


Processing page 11 of 34, link 30 of 30
https://nhattao.com/threads/tai-sao-nen-mua-nick-cf-tai-shopnickre24h.8524110/


Processing page 12 of 34
Processing page 12 of 34, link 1 of 30
https://nhattao.com/threads/shop-chuyen-thu-mua-may-choi-game-ds-2ds-3ds-psp-vita-ps3-ps4-wiiu-xbox-077-707-1528.7220310/


Processing page 12 of 34, link 2 of 30
https://nhattao.com/threads/may-trong-rau-tu-dong-ero-farm.8525962/


Processing page 12 of 34, link 3 of

Processing page 14 of 34, link 30 of 30
https://nhattao.com/threads/tuyen-dung-viec-lam-tai-xe-b2-c-viec-lam-lo-xe-tai-giao-hang-tim-viec-lam-them-phu-kho-xep-hang-t.8491766/


Processing page 15 of 34
Processing page 15 of 34, link 1 of 30
https://nhattao.com/threads/dich-vu-thu-mua-may-tinh-laptop-macbook-tai-ha-noi.8490457/


Processing page 15 of 34, link 2 of 30
https://nhattao.com/threads/trung-tam-thu-mua-xe-may-cu-gia-ca-canh-tranh.6122496/


Processing page 15 of 34, link 3 of 30
https://nhattao.com/threads/shop-chuyen-thu-mua-may-choi-game-ds-2ds-3ds-psp-vita-ps3-ps4-wiiu-xbox-077-707-1528.7220310/


Processing page 15 of 34, link 4 of 30
https://nhattao.com/threads/may-trong-rau-tu-dong-ero-farm.8525962/


Processing page 15 of 34, link 5 of 30
https://nhattao.com/threads/ban-tai-khoan-google-drive-unlimited-google-drive-khong-gioi-han.8366250/


Processing page 15 of 34, link 6 of 30
https://nhattao.com/threads/thu-mua-laptop-cu-gia-cao-tphcm-0977419856-nghia.2597794/


Pro

Processing page 17 of 34, link 4 of 30
https://nhattao.com/threads/shop-chuyen-thu-mua-may-choi-game-ds-2ds-3ds-psp-vita-ps3-ps4-wiiu-xbox-077-707-1528.7220310/


Processing page 17 of 34, link 5 of 30
https://nhattao.com/threads/may-trong-rau-tu-dong-ero-farm.8525962/


Processing page 17 of 34, link 6 of 30
https://nhattao.com/threads/ban-tai-khoan-google-drive-unlimited-google-drive-khong-gioi-han.8366250/


Processing page 17 of 34, link 7 of 30
https://nhattao.com/threads/thu-mua-laptop-cu-gia-cao-tphcm-0977419856-nghia.2597794/


Processing page 17 of 34, link 8 of 30
https://nhattao.com/threads/ban-ten-mien-thegioiruouvang-com.8393484/


Processing page 17 of 34, link 9 of 30
https://nhattao.com/threads/can-iphone-ipad-cu-vo-nat-icloud-chet-gia-soc.8277677/


Processing page 17 of 34, link 10 of 30
https://nhattao.com/threads/thu-mua-may-anh-cu-gia-cao-tan-noi-tai-ha-noi.8490455/


Processing page 17 of 34, link 11 of 30
https://nhattao.com/threads/giay-nike-womens-air-zoom-vome

Processing page 19 of 34, link 8 of 30
https://nhattao.com/threads/ban-ten-mien-thegioiruouvang-com.8393484/


Processing page 19 of 34, link 9 of 30
https://nhattao.com/threads/can-iphone-ipad-cu-vo-nat-icloud-chet-gia-soc.8277677/


Processing page 19 of 34, link 10 of 30
https://nhattao.com/threads/thu-mua-may-anh-cu-gia-cao-tan-noi-tai-ha-noi.8490455/


Processing page 19 of 34, link 11 of 30
https://nhattao.com/threads/giay-nike-womens-air-zoom-vomero-13-running-shoe.8455852/


Processing page 19 of 34, link 12 of 30
https://nhattao.com/threads/do-choi-xep-hinh-tu-chau-au-an-toan-thong-minh-kich-thich-phat-trien-tu-duy-cho-be.8466682/


Processing page 19 of 34, link 13 of 30
https://nhattao.com/threads/vi-nam.8525203/


Processing page 19 of 34, link 14 of 30
https://nhattao.com/threads/can-mua-mot-so-linh-kien-may-tinh-bi-hu-hoac-loi.7296829/


Processing page 19 of 34, link 15 of 30
https://nhattao.com/threads/thu-mua-may-game-gia-cao-ps4-ps3-xbox-nintendo-switch-0947700818.643

Processing page 21 of 34, link 13 of 30
https://nhattao.com/threads/vi-nam.8525203/


Processing page 21 of 34, link 14 of 30
https://nhattao.com/threads/can-mua-mot-so-linh-kien-may-tinh-bi-hu-hoac-loi.7296829/


Processing page 21 of 34, link 15 of 30
https://nhattao.com/threads/thu-mua-may-game-gia-cao-ps4-ps3-xbox-nintendo-switch-0947700818.6430441/


Processing page 21 of 34, link 16 of 30
https://nhattao.com/threads/ong-kinh-sigma-35mm-f-1-4-dg-hsm-art-for-canon.8524643/


Processing page 21 of 34, link 17 of 30
https://nhattao.com/threads/in-bao-bi-nilon-dung-gao.8525386/


Processing page 21 of 34, link 18 of 30
https://nhattao.com/threads/thanh-long-binh-thuan-gia-si.8495763/


Processing page 21 of 34, link 19 of 30
https://nhattao.com/threads/oto-cu-thu-mua-oto-cu-gia-cao-tai-tp-hcm-mua-oto-da-qua-su-dung.3264194/


Processing page 21 of 34, link 20 of 30
https://nhattao.com/threads/mua-xe-dap-dien-cu-gia-cao.4403397/


Processing page 21 of 34, link 21 of 30
https://nhattao

Processing page 23 of 34, link 18 of 30
https://nhattao.com/threads/in-bao-bi-nilon-dung-gao.8525386/


Processing page 23 of 34, link 19 of 30
https://nhattao.com/threads/thanh-long-binh-thuan-gia-si.8495763/


Processing page 23 of 34, link 20 of 30
https://nhattao.com/threads/oto-cu-thu-mua-oto-cu-gia-cao-tai-tp-hcm-mua-oto-da-qua-su-dung.3264194/


Processing page 23 of 34, link 21 of 30
https://nhattao.com/threads/mua-xe-dap-dien-cu-gia-cao.4403397/


Processing page 23 of 34, link 22 of 30
https://nhattao.com/threads/can-mua-con-ps-vita-tv.8423449/


Processing page 23 of 34, link 23 of 30
https://nhattao.com/threads/nhung-mau-thiep-sinh-nhat-vintage-vo-cung-doc-dao-va-la-mat.8524838/


Processing page 23 of 34, link 24 of 30
https://nhattao.com/threads/chuyen-thu-mua-laptop-cu-gia-cao-khu-vuc-tphcm.8494482/


Processing page 23 of 34, link 25 of 30
https://nhattao.com/threads/the-cao-data-3g-mobifone-tron-goi-1-nam.8517668/


Processing page 23 of 34, link 26 of 30
https://nhatt

Processing page 25 of 34, link 24 of 30
https://nhattao.com/threads/chuyen-thu-mua-laptop-cu-gia-cao-khu-vuc-tphcm.8494482/


Processing page 25 of 34, link 25 of 30
https://nhattao.com/threads/the-cao-data-3g-mobifone-tron-goi-1-nam.8517668/


Processing page 25 of 34, link 26 of 30
https://nhattao.com/threads/dich-vu-thu-mua-may-tinh-laptop-macbook-tai-ha-noi.8524751/


Processing page 25 of 34, link 27 of 30
https://nhattao.com/threads/thu-mua-may-anh-cu-gia-cao-tan-noi-tai-ha-noi.8524749/


Processing page 25 of 34, link 28 of 30
https://nhattao.com/threads/co-so-san-xuat-moc-khoa-mica-moc-khoa-nhua-keo-mem-moc-khoa-qua-tang.8524438/


Processing page 25 of 34, link 29 of 30
https://nhattao.com/threads/muon-giao-luu-ip6-32g-voi-android.8430407/


Processing page 25 of 34, link 30 of 30
https://nhattao.com/threads/phan-mem-quan-ly-cong-no-tu-dong.8505203/


Processing page 26 of 34
Processing page 26 of 34, link 1 of 30
https://nhattao.com/threads/ban-hoac-doi-do-cong-nghe.8505249/


Processing page 27 of 34, link 29 of 30
https://nhattao.com/threads/muon-giao-luu-ip6-32g-voi-android.8430407/


Processing page 27 of 34, link 30 of 30
https://nhattao.com/threads/phan-mem-quan-ly-cong-no-tu-dong.8505203/


Processing page 28 of 34
Processing page 28 of 34, link 1 of 30
https://nhattao.com/threads/ban-hoac-doi-do-cong-nghe.8505249/


Processing page 28 of 34, link 2 of 30
https://nhattao.com/threads/chuyen-mua-tat-ca-cac-loai-xe-may-cu-tai-nha-gia-cao-nhat-sai-gon.6122465/


Processing page 28 of 34, link 3 of 30
https://nhattao.com/threads/dich-vu-thu-mua-may-tinh-laptop-macbook-tai-ha-noi.8490457/


Processing page 28 of 34, link 4 of 30
https://nhattao.com/threads/trung-tam-thu-mua-xe-may-cu-gia-ca-canh-tranh.6122496/


Processing page 28 of 34, link 5 of 30
https://nhattao.com/threads/shop-chuyen-thu-mua-may-choi-game-ds-2ds-3ds-psp-vita-ps3-ps4-wiiu-xbox-077-707-1528.7220310/


Processing page 28 of 34, link 6 of 30
https://nhattao.com/threads/may-trong-rau-tu-do

Processing page 30 of 34, link 5 of 30
https://nhattao.com/threads/shop-chuyen-thu-mua-may-choi-game-ds-2ds-3ds-psp-vita-ps3-ps4-wiiu-xbox-077-707-1528.7220310/


Processing page 30 of 34, link 6 of 30
https://nhattao.com/threads/may-trong-rau-tu-dong-ero-farm.8525962/


Processing page 30 of 34, link 7 of 30
https://nhattao.com/threads/ban-tai-khoan-google-drive-unlimited-google-drive-khong-gioi-han.8366250/


Processing page 30 of 34, link 8 of 30
https://nhattao.com/threads/thu-mua-laptop-cu-gia-cao-tphcm-0977419856-nghia.2597794/


Processing page 30 of 34, link 9 of 30
https://nhattao.com/threads/ban-ten-mien-thegioiruouvang-com.8393484/


Processing page 30 of 34, link 10 of 30
https://nhattao.com/threads/can-iphone-ipad-cu-vo-nat-icloud-chet-gia-soc.8277677/


Processing page 30 of 34, link 11 of 30
https://nhattao.com/threads/thu-mua-may-anh-cu-gia-cao-tan-noi-tai-ha-noi.8490455/


Processing page 30 of 34, link 12 of 30
https://nhattao.com/threads/giay-nike-womens-air-zoom-vom

Processing page 32 of 34, link 10 of 30
https://nhattao.com/threads/can-iphone-ipad-cu-vo-nat-icloud-chet-gia-soc.8277677/


Processing page 32 of 34, link 11 of 30
https://nhattao.com/threads/thu-mua-may-anh-cu-gia-cao-tan-noi-tai-ha-noi.8490455/


Processing page 32 of 34, link 12 of 30
https://nhattao.com/threads/giay-nike-womens-air-zoom-vomero-13-running-shoe.8455852/


Processing page 32 of 34, link 13 of 30
https://nhattao.com/threads/do-choi-xep-hinh-tu-chau-au-an-toan-thong-minh-kich-thich-phat-trien-tu-duy-cho-be.8466682/


Processing page 32 of 34, link 14 of 30
https://nhattao.com/threads/vi-nam.8525203/


Processing page 32 of 34, link 15 of 30
https://nhattao.com/threads/can-mua-mot-so-linh-kien-may-tinh-bi-hu-hoac-loi.7296829/


Processing page 32 of 34, link 16 of 30
https://nhattao.com/threads/thu-mua-may-game-gia-cao-ps4-ps3-xbox-nintendo-switch-0947700818.6430441/


Processing page 32 of 34, link 17 of 30
https://nhattao.com/threads/ong-kinh-sigma-35mm-f-1-4-dg-hsm-a

TypeError: 'NoneType' object is not iterable

In [3]:
print('Writing json file'.format(i))
with open('./nhattao_marketplace.json', 'w') as f:
    json.dump(combined, f)

Writing json file


In [4]:
test_pd = pd.read_json('./nhattao_marketplace.json')
test_pd.to_excel('./nhattao_marketplace.xlsx', index=False)