# Install selenium in Colab

In [1]:
%%shell
# Ubuntu no longer distributes chromium-browser outside of snap
# Proposed solution: https://askubuntu.com/questions/1204571/how-to-install-chromium-without-snap

# Add Debian Buster repository
cat > /etc/apt/sources.list.d/debian.list << "EOF"
deb [arch=amd64 signed-by=/usr/share/keyrings/debian-buster.gpg] http://deb.debian.org/debian buster main
deb [arch=amd64 signed-by=/usr/share/keyrings/debian-buster-updates.gpg] http://deb.debian.org/debian buster-updates main
deb [arch=amd64 signed-by=/usr/share/keyrings/debian-security-buster.gpg] http://deb.debian.org/debian-security buster/updates main
EOF

# Add keys
apt-key adv --keyserver keyserver.ubuntu.com --recv-keys DCC9EFBF77E11517
apt-key adv --keyserver keyserver.ubuntu.com --recv-keys 648ACFD622F3D138
apt-key adv --keyserver keyserver.ubuntu.com --recv-keys 112695A0E562B32A

apt-key export 77E11517 | gpg --dearmour -o /usr/share/keyrings/debian-buster.gpg
apt-key export 22F3D138 | gpg --dearmour -o /usr/share/keyrings/debian-buster-updates.gpg
apt-key export E562B32A | gpg --dearmour -o /usr/share/keyrings/debian-security-buster.gpg

# Prefer Debian repo for chromium* packages only
cat > /etc/apt/preferences.d/chromium.pref << "EOF"
Package: *
Pin: release a=eoan
Pin-Priority: 500

Package: *
Pin: origin "deb.debian.org"
Pin-Priority: 300

Package: chromium*
Pin: origin "deb.debian.org"
Pin-Priority: 700
EOF

# Install chromium and chromium-driver
apt-get update
apt-get install chromium chromium-driver

# Install selenium
pip install selenium

Executing: /tmp/apt-key-gpghome.V2tovXVK7n/gpg.1.sh --keyserver keyserver.ubuntu.com --recv-keys DCC9EFBF77E11517
gpg: key DCC9EFBF77E11517: public key "Debian Stable Release Key (10/buster) <debian-release@lists.debian.org>" imported
gpg: Total number processed: 1
gpg:               imported: 1
Executing: /tmp/apt-key-gpghome.QS0pjB4LFd/gpg.1.sh --keyserver keyserver.ubuntu.com --recv-keys 648ACFD622F3D138
gpg: key DC30D7C23CBBABEE: public key "Debian Archive Automatic Signing Key (10/buster) <ftpmaster@debian.org>" imported
gpg: Total number processed: 1
gpg:               imported: 1
Executing: /tmp/apt-key-gpghome.DmwchDOpWF/gpg.1.sh --keyserver keyserver.ubuntu.com --recv-keys 112695A0E562B32A
gpg: key 4DFAB270CAA96DFA: public key "Debian Security Archive Automatic Signing Key (10/buster) <ftpmaster@debian.org>" imported
gpg: Total number processed: 1
gpg:               imported: 1
Get:1 http://deb.debian.org/debian buster InRelease [122 kB]
Hit:2 http://archive.ubuntu.com/ubuntu 



# Import libraries

In [2]:
import pandas as pd
import os
import requests
import time
import random

from bs4 import BeautifulSoup
from tqdm import tqdm
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

# Create driver

In [3]:
WEBDRIVER_DELAY_TIME_INT = 10

service = Service(excutable_path=r'/usr/bin/chromedriver')
chrome_options = webdriver.ChromeOptions()
chrome_options.add_argument('--headless')
chrome_options.add_argument('--no-sandbox')
chrome_options.headless = True
driver = webdriver.Chrome(service=service, options=chrome_options)
driver.implicitly_wait(5)
wait = WebDriverWait(driver, WEBDRIVER_DELAY_TIME_INT)

# Crawl data

In [4]:
def crawl_data(url, start_page, end_page):
  datasets = []

  for page_idx in tqdm(range(start_page, end_page + 1)):
      main_url = f'{url}&Page={page_idx}'
      driver.get(main_url)

      content_tags = driver.find_elements(By.CLASS_NAME, 'page-content-main')

      for content_tag in content_tags:
          list_items = content_tag.find_elements(By.CLASS_NAME, 'list-item')
          poem_urls = []
          for item in list_items:
              a_tag = item.find_element(By.CSS_SELECTOR, 'h4.list-item-header a')
              poem_url = a_tag.get_attribute('href')
              poem_urls.append(poem_url)

          print('\n', poem_urls)
          for poem_url in poem_urls:
              print(f"Truy cập {poem_url}")
              try:
                  driver.get(poem_url)

                  poem_content_tag = wait.until(
                      EC.presence_of_element_located((By.CLASS_NAME, 'poem-content')))

                  p_tags = poem_content_tag.find_elements(By.XPATH, './p')
                  if not p_tags:
                      continue

                  poem_lines = []
                  for p_tag in p_tags:
                      soup = BeautifulSoup(p_tag.get_attribute('outerHTML'), 'html.parser')

                      for br in soup.find_all("br"):
                          br.replace_with("\n")

                      poem_lines.append(soup.get_text(separator=" ", strip=True))

                  poem_content = "\n".join(poem_lines)

                  poem_info = {
                      'content': poem_content,
                      'url': poem_url,
                  }
                  datasets.append(poem_info)
                  print(f"Lấy dữ liệu thành công {poem_url}")

              except Exception as e:
                  print(f"Lỗi truy cập link {poem_url}")

  return datasets

In [5]:
that_ngon_tu_tuyet_1 = crawl_data('https://www.thivien.net/searchpoem.php?PoemType=6&ViewType=1&Country=2&Age[]=55', 1, 3)

  0%|          | 0/3 [00:00<?, ?it/s]


 ['https://www.thivien.net/Nguy%E1%BB%85n-B%E1%BB%89nh-Khi%C3%AAm/B%C3%A0i-98/poem-mE-LYXC5BY8fjsmlyDfP6g', 'https://www.thivien.net/Tr%E1%BB%8Bnh-Doanh/Ban-cung-nh%C3%A2n-b%C3%A0i-7/poem-MwxPhw4C75HQsgfR8V0--g', 'https://www.thivien.net/Nguy%E1%BB%85n-Qu%E1%BB%B3nh/B%E1%BB%A1n-quan-tr%C6%B0%E1%BB%9Dng-ch%E1%BA%A5m-thi/poem-xi6SHf8oI0xwzRyJgBD-6g', 'https://www.thivien.net/Nguy%E1%BB%85n-Qu%E1%BB%B3nh/Ch%C6%A1i-Ph%E1%BB%91-Hi%E1%BA%BFn/poem-Yg6Mr8Wg9A6n-qpLItq_PQ', 'https://www.thivien.net/Khuy%E1%BA%BFt-danh-Vi%E1%BB%87t-Nam/Chung-t%C3%ACnh-I-b%C3%A0i-5-Quan-nh%E1%BA%ADm-ch%C3%A2u-kh%C3%A1ch-Hoan-ch%C3%A2u/poem-6XE8_CHUbvt8Fbn9fjZhiQ', 'https://www.thivien.net/Khuy%E1%BA%BFt-danh-Vi%E1%BB%87t-Nam/Chung-t%C3%ACnh-II-b%C3%A0i-1-T%E1%BB%AB-gh%C3%A9-non-B%E1%BB%93ng-di%E1%BB%85n-b%E1%BA%A1n-ti%C3%AAn/poem-PPwwQdxPpTajZvNfSNVANA', 'https://www.thivien.net/L%C3%AA-Th%C3%A1nh-T%C3%B4ng/D%E1%BB%87t-c%E1%BB%ADi/poem-1EWpuZh-pyaCZrrqX0o68g', 'https://www.thivien.net/Tr%E1%BB%8Bnh-Doanh/D%E1%BB

 33%|███▎      | 1/3 [00:23<00:46, 23.29s/it]

Lấy dữ liệu thành công https://www.thivien.net/Nguy%E1%BB%85n-Tr%C3%A3i/%C4%90%C3%A0o-hoa-thi-b%C3%A0i-2/poem-T77YPUFr1r-w2GT5GOPcQw

 ['https://www.thivien.net/Nguy%E1%BB%85n-Tr%C3%A3i/%C4%90%C3%A0o-hoa-thi-b%C3%A0i-3/poem-Eo8O_bjQV9CSK1xQ2MCw5g', 'https://www.thivien.net/Nguy%E1%BB%85n-Tr%C3%A3i/%C4%90%C3%A0o-hoa-thi-b%C3%A0i-4/poem-mniVeONHoNctrLh3GD5K_Q', 'https://www.thivien.net/Nguy%E1%BB%85n-Tr%C3%A3i/%C4%90%C3%A0o-hoa-thi-b%C3%A0i-5/poem-oTUWo08PVWVgk1Z73UFsZg', 'https://www.thivien.net/Nguy%E1%BB%85n-Tr%C3%A3i/%C4%90%C3%A0o-hoa-thi-b%C3%A0i-6/poem-XRqcrciH6L9671B2HFBcfw', 'https://www.thivien.net/Nguy%E1%BB%85n-Gia-Ch%C3%A2u/%C4%90%E1%BB%81-%E1%BB%9F-c%E1%BB%ADa-Sinh-T%E1%BB%AD/poem-0m57jrjDO0rvGhET6T3zDQ', 'https://www.thivien.net/Nguy%E1%BB%85n-Tr%C3%A3i/Gh%E1%BA%B9o-c%C3%B4-h%C3%A0ng-chi%E1%BA%BFu/poem-mhKCYVUAIeTrxJ6Ewog4eg', 'https://www.thivien.net/Nguy%E1%BB%85n-Tr%C3%A3i/Hoa-xu%C3%A2n/poem-MdBXw3KkXM368dBLHlrMjQ', 'https://www.thivien.net/Khuy%E1%BA%BFt-danh-Vi%E1%BB%8

 67%|██████▋   | 2/3 [00:45<00:22, 22.89s/it]

Lấy dữ liệu thành công https://www.thivien.net/Nguy%E1%BB%85n-Qu%E1%BB%B3nh/Ph%E1%BA%ADt-say-Thu%E1%BB%B5-Ch%C6%B0%C6%A1ng/poem-BCWjU4gquzYx4rCamUlRKQ

 ['https://www.thivien.net/Tr%E1%BB%8Bnh-S%C3%A2m/Ph%E1%BB%A5ng-canh-ng%E1%BB%B1-ch%E1%BA%BF-th%C6%B0%E1%BB%9Fng-nghinh-xu%C3%A2n-vi%E1%BB%87n-thi/poem-6KT2iT_lEHwFt5NM3tJgSA', 'https://www.thivien.net/%C4%90%E1%BB%97-Th%E1%BA%BF-Giai/Ph%E1%BB%A5ng-ho%E1%BA%A1-ng%E1%BB%B1-ch%E1%BA%BF-t%C3%A2n-xu%C3%A2n/poem-BGSN8YqLn1M2SoufQXK3cQ', 'https://www.thivien.net/Nguy%E1%BB%85n-Qu%E1%BB%B3nh/Th%C6%A1-x%E1%BB%8F-g%C3%A1i-chua-ngoa/poem-KJSJBK9wl_7o3Hh-D0KebQ', 'https://www.thivien.net/%C4%90%E1%BB%97-Th%E1%BA%BF-Giai/Ti%E1%BB%83u-lu%E1%BA%ADt/poem-MW4gjbxV3dzYNIAEVqQmvA', 'https://www.thivien.net/%C4%90%E1%BB%97-Th%E1%BA%BF-Giai/Ti%E1%BB%83u-lu%E1%BA%ADt/poem-CA9_8kPzZb80aWE8Y5iZhw', 'https://www.thivien.net/L%C3%AA-Th%C3%A1nh-T%C3%B4ng/V%E1%BB%8Bnh-con-c%C3%B3c/poem-7fiNjkICzfeK5Ji6ZlZ56g']
Truy cập https://www.thivien.net/Tr%E1%BB%8Bnh-S%C3%A

100%|██████████| 3/3 [00:57<00:00, 19.27s/it]

Lấy dữ liệu thành công https://www.thivien.net/L%C3%AA-Th%C3%A1nh-T%C3%B4ng/V%E1%BB%8Bnh-con-c%C3%B3c/poem-7fiNjkICzfeK5Ji6ZlZ56g





In [5]:
that_ngon_tu_tuyet_2 = crawl_data('https://www.thivien.net/searchpoem.php?PoemType=6&ViewType=1&Country=2&Age[]=57', 1, 4)

  0%|          | 0/4 [00:00<?, ?it/s]


 ['https://www.thivien.net/Cao-B%C3%A1-Qu%C3%A1t/Quan-ng%C6%A1i/poem-fvGX2ZMxCaMEjcQpSRjL6A', 'https://www.thivien.net/H%E1%BB%93-Xu%C3%A2n-H%C6%B0%C6%A1ng/B%C3%A1nh-tr%C3%B4i-n%C6%B0%E1%BB%9Bc/poem-uWq3KGCd3SUUse06kE6PYA', 'https://www.thivien.net/Phan-Th%C3%BAc-Tr%E1%BB%B1c/Bu%E1%BB%95i-s%C3%A1ng-%C4%91i-thuy%E1%BB%81n-tr%C3%AAn-s%C3%B4ng-M%C3%A3/poem-OERgyTfhZTMrH8mc_a2lYg', 'https://www.thivien.net/H%E1%BB%93-Xu%C3%A2n-H%C6%B0%C6%A1ng/C%C3%A1i-qu%E1%BA%A1t/poem-scROveDf2WzzNJM9eAvSlQ', 'https://www.thivien.net/T%E1%BA%A1-Hi%E1%BB%87n/C%E1%BA%A3m-%C4%91%E1%BB%81/poem-IEhO9vdaYKxBKUMMZ8kMCQ', 'https://www.thivien.net/Nguy%E1%BB%85n-C%C3%B4ng-Tr%E1%BB%A9/C%C3%A1m-%C6%A1n-hai-c%C3%B4-%C4%91%C3%A0o/poem-eikFbHcDEWNk7ziidPzSyg', 'https://www.thivien.net/H%E1%BB%93-Xu%C3%A2n-H%C6%B0%C6%A1ng/Ch%C6%A1i-hoa/poem-jSzm65JZ1S2GD6Vw9RK_6w', 'https://www.thivien.net/H%E1%BB%93-Xu%C3%A2n-H%C6%B0%C6%A1ng/Con-cua/poem-desawxihH_FjwviJgfWqRw', 'https://www.thivien.net/H%E1%BB%93-Xu%C3%A2n-H%C6%B0%C6

 25%|██▌       | 1/4 [00:23<01:11, 23.74s/it]

Lấy dữ liệu thành công https://www.thivien.net/Nguy%E1%BB%85n-C%C3%B4ng-Tr%E1%BB%A9/%C4%90%C3%A1nh-t%E1%BB%95-t%C3%B4m/poem-o1Dgj5mZFHmPIMkvJm0okw

 ['https://www.thivien.net/Cao-B%C3%A1-Qu%C3%A1t/%C4%90%E1%BA%AFp-voi/poem-LNX-B6YEnTxrHdNh6Rwzhg', 'https://www.thivien.net/H%E1%BB%93-Xu%C3%A2n-H%C6%B0%C6%A1ng/%C4%90%E1%BB%8Dc-cho-Chi%C3%AAu-H%E1%BB%95-ho%E1%BA%A1/poem-YXI6cVrwCfbMi8IDswKjhQ', 'https://www.thivien.net/H%E1%BB%93-Xu%C3%A2n-H%C6%B0%C6%A1ng/%C4%90%E1%BB%93ng-ti%E1%BB%81n-ho%E1%BA%BBn/poem-UZ4PJ0gdsEsSl_pypy8vGw', 'https://www.thivien.net/Nguy%E1%BB%85n-C%C3%B4ng-Tr%E1%BB%A9/%C4%90%E1%BB%9Di-ng%C6%B0%E1%BB%9Di-th%E1%BA%A5m-tho%E1%BA%AFt/poem-dpoG8f6cGqypGv740YMB1g', 'https://www.thivien.net/H%E1%BB%93-Xu%C3%A2n-H%C6%B0%C6%A1ng/Kh%C3%B3c-T%E1%BB%95ng-C%C3%B3c/poem-6pamu7FlLkIevS5S6C9yfw', 'https://www.thivien.net/H%E1%BB%93-Xu%C3%A2n-H%C6%B0%C6%A1ng/L%E1%BB%A1m-h%E1%BB%8Dc-tr%C3%B2/poem-0rhDlwzvK5OxEIc98gs0mw', 'https://www.thivien.net/H%E1%BB%93-Xu%C3%A2n-H%C6%B0%C6%A1ng/Mi%

 50%|█████     | 2/4 [00:42<00:41, 20.75s/it]

Lấy dữ liệu thành công https://www.thivien.net/H%E1%BB%93-Xu%C3%A2n-H%C6%B0%C6%A1ng/N%C3%BAi-K%E1%BA%BDm-Tr%E1%BB%91ng/poem-feCLUxz53X41QOuf4SwK2A

 ['https://www.thivien.net/H%E1%BB%93-Xu%C3%A2n-H%C6%B0%C6%A1ng/N%C6%B0%E1%BB%9Bc-%C4%90%E1%BA%B1ng/poem--fDjA9dpven1ybYhX9MM0w', 'https://www.thivien.net/H%E1%BB%93-Xu%C3%A2n-H%C6%B0%C6%A1ng/%E1%BB%90c-nh%E1%BB%93i/poem-gDMeCDBVvtYDv6KKqbVyBw', 'https://www.thivien.net/H%E1%BB%93-Xu%C3%A2n-H%C6%B0%C6%A1ng/Qu%E1%BA%A3-m%C3%ADt/poem-LieX1fygdE0jurQxV_C4rQ', 'https://www.thivien.net/Phan-Thanh-Gi%E1%BA%A3n/Qua-r%E1%BB%ABng-v%E1%BA%AFng/poem-PqYCTCphR7mB4hU6Y-hEjw', 'https://www.thivien.net/H%E1%BB%93-Xu%C3%A2n-H%C6%B0%C6%A1ng/S%C6%B0-ho%E1%BA%A1nh-d%C3%A2m/poem-FW5cbM0YaktMhTajssd9GQ', 'https://www.thivien.net/Nguy%E1%BB%85n-C%C3%B4ng-Tr%E1%BB%A9/Th%C6%A1-%C4%91%E1%BB%81-mo-cau/poem-8SHK80U9f93nzUXM1iZDVA', 'https://www.thivien.net/Ho%C3%A0ng-Phan-Th%C3%A1i/Th%C6%A1-tuy%E1%BB%87t-m%E1%BB%87nh/poem-SSg0YSXZGCzypeIC1UN9Xg', 'https://www.thivien

 75%|███████▌  | 3/4 [01:01<00:19, 19.96s/it]

Lấy dữ liệu thành công https://www.thivien.net/H%E1%BB%93-Xu%C3%A2n-H%C6%B0%C6%A1ng/Tr%C3%A1ch-Chi%C3%AAu-H%E1%BB%95/poem-PevYJ4ZQTa8nQsmeM0lQIQ

 ['https://www.thivien.net/H%E1%BB%93-Xu%C3%A2n-H%C6%B0%C6%A1ng/Tr%C3%A0o-t%C4%83ng/poem-HGACRiPNptGsfdAvpuEO5A', 'https://www.thivien.net/Phan-Thanh-Gi%E1%BA%A3n/T%E1%BB%AB-bi%E1%BB%87t-phu-nh%C3%A2n/poem-cpdmgK3J0elLYWFe0UDQ-g', 'https://www.thivien.net/Phan-Thanh-Gi%E1%BA%A3n/T%E1%BB%B1-th%C3%A1n/poem-Jpb59qOl31NUFMyM4vMX7w', 'https://www.thivien.net/H%E1%BB%93-Xu%C3%A2n-H%C6%B0%C6%A1ng/T%E1%BB%A9c-s%E1%BB%B1/poem-O1uqLz4V2A1a4Bo4nvqG9A', 'https://www.thivien.net/Nguy%E1%BB%85n-Qu%C3%BD-T%C3%A2n/V%E1%BB%8Bnh-chim-b%E1%BB%93-c%C3%A2u/poem-VSl8M4IOaOF7L79JsbX-kA', 'https://www.thivien.net/Nguy%E1%BB%85n-Qu%C3%BD-T%C3%A2n/V%E1%BB%8Bnh-%C4%91%C3%A8n-k%C3%A9o-qu%C3%A2n/poem-EFXZg0ro2J6o4zvRSteB1w', 'https://www.thivien.net/Chu-M%E1%BA%A1nh-Trinh/V%E1%BB%8Bnh-hoa-thu%E1%BB%B7-ti%C3%AAn/poem-j3Y-sGVhlTnyEqUhDTO8rA', 'https://www.thivien.net/Nguy%

100%|██████████| 4/4 [01:27<00:00, 21.83s/it]

Lấy dữ liệu thành công https://www.thivien.net/Nguy%E1%BB%85n-C%C3%B4ng-Tr%E1%BB%A9/V%E1%BB%8Bnh-tr%C3%B2-leo-d%C3%A2y/poem-CkazKy4rzMMmBT5w5tQSVw





In [5]:
that_ngon_tu_tuyet_3 = crawl_data('https://www.thivien.net/searchpoem.php?PoemType=6&ViewType=1&Country=2&Age[]=2', 10, 10)

  0%|          | 0/1 [00:00<?, ?it/s]


 ['https://www.thivien.net/Tr%E1%BA%A7n-T%E1%BA%BF-X%C6%B0%C6%A1ng/O%C3%A1n-Ki%E1%BB%81u/poem-na-5iB1T5lQNFvXSNTxOIQ', 'https://www.thivien.net/Tr%E1%BA%A7n-T%E1%BA%BF-X%C6%B0%C6%A1ng/%C3%94ng-H%C3%A0n/poem-Eeo4cvNjsEAcaCN1pv7UAQ', 'https://www.thivien.net/Nguy%E1%BB%85n-Khuy%E1%BA%BFn/%C3%94ng-ph%E1%BB%97ng-%C4%91%C3%A1-b%C3%A0i-1/poem-wKJv1CZbWt0UhuCagXrD5w', 'https://www.thivien.net/Tr%E1%BA%A7n-T%E1%BA%BF-X%C6%B0%C6%A1ng/%C3%94ng-ti%E1%BA%BFn-s%C4%A9-m%E1%BB%9Bi/poem-1_NWurOu4ZLVJr7MENUZMA', 'https://www.thivien.net/Tr%E1%BA%A7n-T%E1%BA%BF-X%C6%B0%C6%A1ng/Ph%E1%BB%91-H%C3%A0ng-Song/poem-Q-z2Y0x2ZSv3u8XUD6QP6Q', 'https://www.thivien.net/Tr%E1%BA%A7n-T%E1%BA%BF-X%C6%B0%C6%A1ng/Ph%C6%B0%E1%BB%9Dng-h%C3%A1t-tu%E1%BB%93ng/poem-i1kzARIBOU7YlwpIVn6LQw', 'https://www.thivien.net/Hu%E1%BB%B3nh-Th%C3%BAc-Kh%C3%A1ng/Quanh-kho%C3%A1-b%E1%BA%A7u-c%E1%BB%AD-d%C3%A2n-bi%E1%BB%83u/poem-UPTrXiwS68U1GbPdh86fUw', 'https://www.thivien.net/Tr%E1%BA%A7n-T%E1%BA%BF-X%C6%B0%C6%A1ng/S%C6%B0-%E1%BB%9F-t%C3

100%|██████████| 1/1 [00:39<00:00, 39.22s/it]

Lấy dữ liệu thành công https://www.thivien.net/T%E1%BA%A3n-%C4%90%C3%A0/T%E1%BA%BFt-t%E1%BB%B1-thu%E1%BA%ADt/poem-ZLpgO8yEcxByY08XKfYmfA





In [5]:
that_ngon_tu_tuyet_4 = crawl_data('https://www.thivien.net/searchpoem.php?PoemType=6&ViewType=1&Country=2&Age[]=3', 7, 10)

  0%|          | 0/4 [00:00<?, ?it/s]


 ['https://www.thivien.net/H%E1%BB%93-Ch%C3%AD-Minh/B%E1%BA%A5y-l%C3%A2u-m%C6%A1-ng%E1%BB%A7-m%C3%A3i-ch%C6%B0a-th%C3%B4i/poem-6SyVQPu-Ye7BMDLIQ8Izuw', 'https://www.thivien.net/H%C3%A0n-Qu%E1%BB%91c-V%C5%A9/B%C3%A9n-r%E1%BB%85/poem-LuwlmC0F-qI06fZeHjRq8A', 'https://www.thivien.net/M%E1%BB%99ng-Tuy%E1%BA%BFt/B%C3%A8o-n%C6%B0%E1%BB%9Bc-t%C6%B0%C6%A1ng-t%C6%B0/poem-CbTBwxAdOr0ApCHRt77ACA', 'https://www.thivien.net/Qu%C3%A1ch-T%E1%BA%A5n/B%E1%BA%BFn-c%C5%A9/poem-y_VkGyKMEf3m9TdFogEVww', 'https://www.thivien.net/Qu%C3%A1ch-T%E1%BA%A5n/B%C3%AAn-g%C3%A1c-m%E1%BB%99ng/poem--ljRyPbqGTPiPV9K1IsANA', 'https://www.thivien.net/J-Leiba/B%E1%BA%BFn-gi%C3%A1c/poem-OktUaBmCXzBLGYk5lSyHbA', 'https://www.thivien.net/Qu%C3%A1ch-T%E1%BA%A5n/B%E1%BA%BFn-l%E1%BA%A1-thu-bay/poem-OEp3Yh3XaMcHo2SQq4BA2A', 'https://www.thivien.net/Qu%C3%A1ch-T%E1%BA%A5n/B%C3%AAn-s%C3%B4ng/poem-1eG-0ZCFQN6lh3SOMh59xA', 'https://www.thivien.net/Tr%E1%BA%A7n-%C4%90%C3%B4ng-Phong/B%E1%BA%BFn-x%C6%B0a/poem-gYG8-S0-Aj2t_huz74aroA', '

 25%|██▌       | 1/4 [00:24<01:14, 24.90s/it]

Lấy dữ liệu thành công https://www.thivien.net/H%C3%A0n-Qu%E1%BB%91c-V%C5%A9/B%E1%BB%81nh-b%E1%BB%93ng/poem--sOAaXRpFVe3vK95MGlaXw

 ['https://www.thivien.net/H%C3%A0n-Qu%E1%BB%91c-V%C5%A9/Bi%E1%BA%BFn-m%E1%BA%A5t/poem-ldo2rdV5ixBouMq0N4ms2Q', 'https://www.thivien.net/H%C3%A0n-Qu%E1%BB%91c-V%C5%A9/Bi%E1%BB%83n-m%C3%B4i-trinh/poem-UXjWIIOXoJB47BSdUvgisQ', 'https://www.thivien.net/H%C3%A0n-Qu%E1%BB%91c-V%C5%A9/Bi%E1%BB%83n-s%E1%BA%A7u-%C4%91au/poem--hEGankx4zxIoXY4jzeBeA', 'https://www.thivien.net/Ho%C3%A0ng-T%E1%BA%A1o/B%C3%B3c-l%E1%BB%8Bch-c%E1%BA%A3m-t%C3%A1c/poem-h4Fb-NQfwz8stpODRFaUNA', 'https://www.thivien.net/V%C5%A9-Ho%C3%A0ng-Ch%C6%B0%C6%A1ng/B%C3%B3ng-%C4%91%C3%B3-h%C3%ACnh-%C4%91%C3%A2u/poem-61lyUEYFtKwuFPPc2DVstA', 'https://www.thivien.net/H%C3%A0n-Qu%E1%BB%91c-V%C5%A9/B%C3%B3ng-gi%E1%BA%B7c/poem-VdLqJp8RjxJqjEEipUTn0g', 'https://www.thivien.net/T%E1%BB%A5-Vinh/B%C3%B3ng-l%E1%BA%BB/poem-fPDscgPPOlJmVwDnty_tig', 'https://www.thivien.net/H%C3%A0n-Qu%E1%BB%91c-V%C5%A9/B%C3%B3ng-

 50%|█████     | 2/4 [00:47<00:47, 23.71s/it]

Lấy dữ liệu thành công https://www.thivien.net/Qu%C3%A1ch-T%E1%BA%A5n/B%C3%B3ng-xu%C3%A2n/poem-mn4SFv3HXyTgOOg0_Ti3nQ

 ['https://www.thivien.net/Qu%C3%A1ch-T%E1%BA%A5n/B%E1%BB%93i-h%E1%BB%93i-ng%E1%BB%8Dn-b%E1%BA%A5c/poem-1AAXDo30Z2a4OxX1tZO5sQ', 'https://www.thivien.net/Tr%E1%BA%A7n-%C4%90%C3%B4ng-Phong/B%E1%BB%91n-c%E1%BA%A3nh-%C4%91%E1%BA%B9p/poem-U3_jk6ctYWm4rAxb9KZZiQ', 'https://www.thivien.net/Tr%E1%BA%A7n-%C4%90%C3%B4ng-Phong/B%E1%BB%91n-%C4%91i%E1%BB%83m-hay-%E1%BB%9F-%C4%90%C3%A0i-B%E1%BA%AFc/poem-XzX0TPcqKl_ZXmP_Ks4mqw', 'https://www.thivien.net/M%C6%B0%E1%BB%9Dng-M%C3%A1n/B%C3%B4ng-c%C6%A1m-ngu%E1%BB%99i/poem-lmkiKEV-sWNldC2aDxLnuA', 'https://www.thivien.net/Ph%E1%BA%A1m-Tr%C6%B0%E1%BB%9Dng-Giang/B%C3%B4ng-h%E1%BB%93ng-t%C3%ACnh-y%C3%AAu/poem-HG-D7EKkFmD-iRyCS3NdrA', 'https://www.thivien.net/Th%C3%ADch-Vi%C3%AAn-Th%C3%A0nh/Bu%E1%BB%95i-%C4%91%E1%BA%A7u-%C4%91%E1%BA%BFn-ch%C3%B9a-H%C6%B0%C6%A1ng/poem-JkTNe0WMXVtHHJHbl-8CBA', 'https://www.thivien.net/Tr%E1%BA%A7n-%C4%90%C3%B4

 75%|███████▌  | 3/4 [01:08<00:22, 22.32s/it]

Lấy dữ liệu thành công https://www.thivien.net/Nguy%E1%BB%85n-L%C3%A3m-Th%E1%BA%AFng/Bu%E1%BB%93n-kh%C3%B4ng-h%E1%BB%9F-n%C3%BAi/poem-o5LwGoBfW3nmpI1H-GeJpg

 ['https://www.thivien.net/Qu%C3%A1ch-T%E1%BA%A5n/Bu%E1%BB%93n-th%C6%B0%C6%A1ng/poem-phTI_-K01_hrtHfBfk1Pcw', 'https://www.thivien.net/M%E1%BB%99ng-Tuy%E1%BA%BFt/B%E1%BB%A9c-th%C6%B0-t%C6%B0%C6%A1ng-t%C6%B0/poem-cyb6NlDva_Z3wPH-d_pEtw', 'https://www.thivien.net/Y%E1%BA%BFn-Lan/C%C3%A1i-c%C3%B2n-l%E1%BA%A1i/poem-NH15Pe1wMSKOCGc4lrGaHA', 'https://www.thivien.net/Huy-C%E1%BA%ADn/C%C3%A1i-d%E1%BA%A1-y%C3%AAu-%C4%91%E1%BB%9Di/poem-oRilsH6WKPb4FiOlhqERuQ', 'https://www.thivien.net/H%C3%A0n-Qu%E1%BB%91c-V%C5%A9/C%C3%A0i-gi%E1%BB%8Dt-nh%E1%BB%9B/poem-qv8weKWfNaeGDFHAwMzpWg', 'https://www.thivien.net/Nguy%E1%BB%85n-L%C3%A3m-Th%E1%BA%AFng/C%C3%A1i-nh%E1%BB%9B/poem-mm0OFiA3-GEuEt5E-uA0Gg', 'https://www.thivien.net/V%C5%A9-Ho%C3%A0ng-Ch%C6%B0%C6%A1ng/C%C3%A1i-nh%E1%BB%A5c-l%C3%A0m-ng%C6%B0%E1%BB%9Di/poem-x-NYrmTNP0MQ9S6RwHJHgQ', 'https://www.

100%|██████████| 4/4 [01:38<00:00, 24.57s/it]

Lấy dữ liệu thành công https://www.thivien.net/B%C3%ADch-Kh%C3%AA/C%E1%BA%A3m-h%E1%BB%A9ng/poem-1Lv93jpix-pcD4QsZqB7-g





# Save datasets to csv

In [6]:
def save_dataset(dataset, name):
  df = pd.DataFrame(dataset)
  df.to_csv(name, index=False, encoding='utf-8-sig')

In [9]:
save_dataset(that_ngon_tu_tuyet_1, 'that_ngon_tu_tuyet_1.csv')

In [8]:
save_dataset(that_ngon_tu_tuyet_2, 'that_ngon_tu_tuyet_2.csv')

In [8]:
save_dataset(that_ngon_tu_tuyet_3, 'that_ngon_tu_tuyet_3.csv')

In [7]:
save_dataset(that_ngon_tu_tuyet_4, 'that_ngon_tu_tuyet_4.csv')

# Save final dataset

In [9]:
file_list = [
    "that_ngon_tu_tuyet_1.csv",
    "that_ngon_tu_tuyet_2.csv",
    "that_ngon_tu_tuyet_3.csv",
    "that_ngon_tu_tuyet_4.csv"
]

df_final = pd.concat([pd.read_csv(file) for file in file_list], ignore_index=True)

df_final.to_csv("that_ngon_tu_tuyet_final.csv", index=False, encoding='utf-8-sig')