In [1]:
import json
from bs4 import Tag
from tqdm import tqdm
import os
from concurrent.futures import ThreadPoolExecutor, as_completed


In [2]:
import requests
from bs4 import BeautifulSoup
import re
import time
import random
from requests.adapters import HTTPAdapter
from requests.packages.urllib3.util.retry import Retry
from datetime import datetime, timedelta

def requests_retry_session(
    retries=3,
    backoff_factor=0.3,
    status_forcelist=(500, 502, 504),
    session=None,
):
    session = session or requests.Session()
    retry = Retry(
        total=retries,
        read=retries,
        connect=retries,
        backoff_factor=backoff_factor,
        status_forcelist=status_forcelist,
    )
    adapter = HTTPAdapter(max_retries=retry)
    session.mount('http://', adapter)
    session.mount('https://', adapter)
    return session


In [3]:
def each_link_content(url):
    headers = {
        'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',
        'accept-encoding': 'gzip, deflate, br, zstd',
        'accept-language': 'en-US,en;q=0.9,en-IN;q=0.8',
        'cache-control': 'max-age=0',
        'cookie': 'VEE=wikitext; bowikimwclientpreferences=vector-feature-limited-width-clientpref-0; GeoIP=IN:::22.00:79.00:v4; bowikimwuser-sessionId=57993a543a8ba6c65a70; WMF-Last-Access=25-Sep-2024; WMF-Last-Access-Global=25-Sep-2024; bowikiPreventProbe=1; NetworkProbeLimit=0.001',
        'priority': 'u=0, i',
        'referer': 'https://bo.wikipedia.org/wiki/Special:AllPages',
        'sec-ch-ua': '"Microsoft Edge";v="129", "Not=A?Brand";v="8", "Chromium";v="129"',
        'sec-ch-ua-mobile': '?0',
        'sec-ch-ua-platform': '"Windows"',
        'sec-fetch-dest': 'document',
        'sec-fetch-mode': 'navigate',
        'sec-fetch-site': 'same-origin',
        'sec-fetch-user': '?1',
        'upgrade-insecure-requests': '1',
        'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/129.0.0.0 Safari/537.36 Edg/129.0.0.0'
    }
    
    
    final_response = {
        "data": {
            'title': "",
            'body': {"Text": []},
            'meta_data': {'URL': url,}
        },
        "Message": "Success",
        "Response": 200
    }
    
    try:
        # Add a random delay before making the request
        time.sleep(random.uniform(1, 3))
        
        # Make the request to the URL using the retry session
        session = requests_retry_session()
        response = session.get(url, headers=headers, allow_redirects=False)
        response.raise_for_status()
        
        # Check for redirect
        if response.is_redirect:
            final_response["Message"] = f"Redirected to: {response.headers['Location']}"
            final_response["Response"] = response.status_code
            return final_response
        
        # Parse the page content with BeautifulSoup
        soup = BeautifulSoup(response.content, 'html.parser')
        # Remove script and style elements
        for script in soup(["script", "style"]):
            script.decompose()

        # Extract title
        title = soup.find('span', class_="mw-page-title-main")
        if title:
            title_text = title.get_text(strip=True)
        else:
            title_text = ""
        final_response['data']["title"] = title_text
        
        
        full_body = soup.find('div', id="bodyContent")
        # print(full_body)
        if full_body:
            # Extract body content
            try:
                body = full_body.find("div", class_="mw-content-ltr mw-parser-output")
                # print(body)
                if body:
                    text = body.get_text(separator='\n', strip=True)
                    lines = [line for line in text.split('\n') if line.strip()]
                    # print(lines)
                    if lines:
                        # Extracting all <p> tags for text content
                        final_response['data']['body']["Text"] = lines
                    else:
                        final_response['data']['body']["Text"] = [""]
    
            except AttributeError as e:
                final_response['data']['body']["Text"] = [f"Error fetching body content{str(e)}"]

       
        return final_response
    except requests.Timeout:
        final_response["Message"] = "Request timed out"
        final_response["Response"] = 408  # Request Timeout
        return final_response
    except requests.RequestException as e:
        final_response["Message"] = f"An error occurred while fetching the article: {str(e)}"
        final_response["Response"] = getattr(e.response, 'status_code', 500)
        return final_response
    except Exception as e:
        final_response["Message"] = f"An unexpected error occurred: {e}"
        final_response["Response"] = 500
        return final_response

In [4]:
url = "https://bo.wikipedia.org/wiki/%E0%BD%A8%E0%BC%8B%E0%BD%A3%E0%BC%8B%E0%BD%94%E0%BC%8B%E0%BD%98%E0%BC%8D"
url = "https://bo.wikipedia.org/wiki/%E0%BD%9A%E0%BC%8B%E0%BD%A2%E0%BD%BC%E0%BD%84%E0%BC%8B%E0%BD%9F%E0%BE%B3%E0%BC%8B%E0%BD%96%E0%BD%9F%E0%BD%84%E0%BC%8B%E0%BD%91%E0%BD%82%E0%BE%B2%E0%BC%8B%E0%BD%A0%E0%BD%91%E0%BD%B4%E0%BD%A3%E0%BC%8B"
each_link_content(url)


{'data': {'title': 'ཚ་རོང་ཟླ་བཟང་དགྲ་འདུལ་',
  'body': {'Text': ['ཚ་རོང་ཟླ་བཟང་དགྲ་འདུལ',
    'ཚ་རོང་ཟླ་བཟང་དགྲ་འདུལ་ནི་འཕན་པོ་དཔལ་གླིང་ཤང་ཁ་སྐོར་གྲོང་ཚོའི་མདའ་བཟོ་ཚང་ཞེས་སེ་ར་ཕུར་ལྕོག་བླ་བྲང་གི་མི་སེར་ཞིག་གི་ཁྱིམ་ནས་ཡིན་ཞིང་། ཁྱིམ་ཚང་ཁྲལ་བུན་ཆེ་བས་ཁོང་དང་གཅུང་པོ་བསོད་ནམས་དབང་འདུས་གཉིས་ལྷ་སའི་བྱང་ཕྱོགས་དོག་བདེ་ལྷོ་མོས་གཞིས(དེ་སྔ་ཕུར་ལྕོག་བླ་བྲང་གི་གཞིས་ཀ་ཡིན)སུ་ལུག་རྫི་ར་སྡོད་སྐབས་སྤྱང་ཀིས་ར་ལུག་བསད་རྐྱེན་བྲོས་བྱོལ་གྱིས་ལྷ་སའི་ནུབ་ཏུ་ཡོད་པའི་མཁར་ནག་གླིང་ཁར་བོད་ས་གནས་སྲིད་གཞུང་གི་རྔ་མོང་བདག་སྐྱོང་འགོ་པ་ཞིག་གི་སར་བསྡད་པ་བརྒྱུད་དེའི་ངོ་ཤེས་རྩེ་དྲུང་རྩེ་གཉེར་ཚང་བ་ཞིག་གི་གཡོག་པོར་སྡོད་སྐབས་བོད་རབ་བྱུང་བཅོ་ལྔ་པའི་ཤིང་འབྲུག(༡༩༠༤)ལོར་དབྱིན་ཇིས་བོད་དུ་བཙན་བཛུལ་བྱེད་དུས་ཏཱ་ལའི་བླ་མ་སྐུ་ཕྲེང་བཅུ་གསུམ་པ་རྒྱ་སོག་ཡུལ་གྲུར་ཕེབས་སྐབས་རྩེ་དྲུང་དེ་ཆིབས་ཞབས་སུ་བསྐྱོད་ནས་ཟླ་བཟང་དགྲ་འདུལ་གཡོག་ཏུ་ཡོད་པ་དེའི་གྲུང་ཤ་དོད་པ་སོགས་ཀྱི་རྣམ་འགྱུར་ལ་ཏཱ་ལའི་བླ་མའི་མཉེས་པོ་བྱུང་ནས་ཆིབས་ཞབས་སུ་ཚུད། ལྕགས་ཁྱི་(༡༩༡༠)ལོར་ཏཱ་ལའི་བླ་མ་རྒྱ་གར་དུ་ཕེབས་པའི་ཆིབས་ཞབས་སུ་བསྐྱོད་དེ་ལྕགས་ཟམ་གྲུ་ཁར་ལུའུ་ཅུན་དམག་གི་རྗེས་འདེད་ལ་འགོག་རྒ

In [5]:


# def read_json(path, file_name):
#     try:
#         with open(path+file_name, 'r', encoding='utf-8') as openfile:
#             Loaded_file = json.load(openfile)
#             print(f"Successfully loaded: {file_name}")
#         return Loaded_file
#     except Exception as e:
#         print(f"Error loading {file_name}: {str(e)}")
#         return None

# def get_content(All_links_data, Total_lenght, page_key_code, page_key_list):
#     all_article = {}
    
#     with ThreadPoolExecutor(max_workers=10) as executor:
#         future_to_url = {}
#         start = 1
#         for i in range(start, Total_lenght+1):
#             page_key = page_key_code + str(i)
#             all_link_page = All_links_data["Links"]
            
#             for url in all_link_page:
#                 future = executor.submit(scrape_article, url, page_key_code)
#                 future_to_url[future] = (page_key, url)
        
#         for future in tqdm(as_completed(future_to_url), total=len(future_to_url)):
#             page_key, url = future_to_url[future]
#             try:
#                 article_content = future.result()
#                 if article_content:
#                     article_key = f"{page_key}_scrape_{file_name_code}_Article_{len([k for k in all_article if k.startswith(page_key)]) + 1}"
#                     all_article[article_key] = article_content
#             except Exception as e:
#                 print(f"Error processing {url}: {str(e)}")
    
#     Failure_count = sum(1 for article in all_article.values() if article["Response"] != 200)
#     print(f"Total Failure in the {page_key_list[1]} article: {Failure_count}")
    
#     save_file_name = f"scrape_{page_key_code}_ALL_content.json"
#     print(save_file_name)
#     path = "./data/parallel_content/"
#     save_json(path, save_file_name, all_article)







In [9]:

class CustomJSONEncoder(json.JSONEncoder):
    def default(self, obj):
        if isinstance(obj, Tag):
            return obj.get_text()
        return str(obj)


def save_json(path, file_name, data):
    try: 
        with open(path + file_name, "w", encoding='utf-8') as outfile:
            json.dump(data, outfile, indent=4, ensure_ascii=False, cls=CustomJSONEncoder)
        print(f"Successfully saved: {file_name}")
    except Exception as e:
        print(f"Error saving {file_name}: {str(e)}")


def scrape_article(url):
    try:
        article_content = each_link_content(url)
        return article_content
    except Exception as e:
        print(f"Error scraping {url}: {str(e)}")
        return None


def get_content(All_links_data, Total_lenght, page_key_code):
    """
    
    """
    try:
        all_article = {}
        all_link_page = All_links_data["Links"]
        for i in tqdm(range(Total_lenght)):
            each_url = all_link_page[i]
            result = scrape_article(each_url)
            article_code = f"{page_key_code}_{i}"
            all_article[article_code] = result

        return all_article


    except json.JSONDecodeError as e:
        print(f"Error decoding JSON in file {page_key_code}: {str(e)}")
    except Exception as e:
        print(f"Error processing url of file {page_key_code}: {str(e)}")




def process_json_file(file_path):
    try:
        with open(file_path, 'r', encoding='utf-8') as file:
            File_name = os.path.basename(file_path)
            All_links_data = json.load(file)
            
            Total_lenght = len(All_links_data["Links"])
            print(f"Total articles in {File_name}: {Total_lenght}")
        
            page_key_list = list(File_name.split("."))
            page_key_code = page_key_list[0]+"_Article"
            print(f"Page key code: {page_key_code}")
            
            processed_data = get_content(All_links_data, Total_lenght, page_key_code)
            save_file_name = f"scrape_{page_key_list[0]}_ALL_content.json"
            if processed_data:
                print(save_file_name)
                path = "./data/parallel_content/"
                save_json(path, save_file_name, processed_data)
            else:
                print(f"failed to save the {save_file_name}")
            
    except json.JSONDecodeError as e:
        print(f"Error decoding JSON in file {os.path.basename(file_path)}: {str(e)}")
    except Exception as e:
        print(f"Error processing file {os.path.basename(file_path)}: {str(e)}")


In [None]:

directory = "./data/links/"
json_files = [os.path.join(directory, f) for f in os.listdir(directory) if f.endswith('.json')]
print(f"Total Files {len(json_files)}")

# json_files = json_files[:5]
with ThreadPoolExecutor(max_workers=10) as executor:
    list(tqdm(executor.map(process_json_file, json_files), total=len(json_files)))

print(f"Processed {len(json_files)} files")


Total Files 43
Total articles in Page 2.json: 345
Page key code: Page 2_Article
Total articles in Page 1.json: 345
Page key code: Page 1_Article
Total articles in Page 4.json: 345
Page key code: Page 4_Article


  0%|          | 0/345 [00:00<?, ?it/s]
  0%|          | 0/345 [00:00<?, ?it/s][A

Total articles in Page 6.json: 345
Page key code: Page 6_Article
Total articles in Page 3.json: 345
Page key code: Page 3_Article
Total articles in Page 5.json: 345
Page key code: Page 5_Article
Total articles in Page 7.json: 345
Page key code: Page 7_Article
Total articles in Page 8.json: 345
Page key code: Page 8_Article
Total articles in Page 9.json: 345
Page key code: Page 9_Article
Total articles in Page 10.json: 345
Page key code: Page 10_Article




  0%|          | 0/345 [00:00<?, ?it/s][A[A



  0%|          | 0/345 [00:00<?, ?it/s][A[A[A[A




  0%|          | 0/345 [00:00<?, ?it/s][A[A[A[A[A


  0%|          | 0/345 [00:00<?, ?it/s][A[A[A





  0%|          | 0/345 [00:00<?, ?it/s][A[A[A[A[A[A








  0%|          | 0/345 [00:00<?, ?it/s][A[A[A[A[A[A[A[A[A









  0%|          | 0/345 [00:00<?, ?it/s][A[A[A[A[A[A[A[A[A[A







  0%|          | 0/345 [00:00<?, ?it/s][A[A[A[A[A[A[A[A






  0%|          | 0/43 [00:00<?, ?it/s][A[A[A[A[A[A[A





  0%|          | 1/345 [00:06<38:04,  6.64s/it][A[A[A[A[A[A









  0%|          | 1/345 [00:07<41:07,  7.17s/it][A[A[A[A[A[A[A[A[A[A
  0%|          | 1/345 [00:07<42:54,  7.48s/it][A



  0%|          | 1/345 [00:07<43:45,  7.63s/it][A[A[A[A

  0%|          | 1/345 [00:07<43:58,  7.67s/it][A[A




  0%|          | 1/345 [00:07<45:43,  7.97s/it][A[A[A[A[A








  0%|          | 1/345 [00:08<4

scrape_Page 3_ALL_content.json
Successfully saved: scrape_Page 3_ALL_content.json
Total articles in Page 11.json: 345
Page key code: Page 11_Article






  0%|          | 0/345 [00:00<?, ?it/s][A[A[A[A


 96%|█████████▌| 331/345 [34:35<01:31,  6.55s/it][A[A[A




 98%|█████████▊| 339/345 [34:36<00:39,  6.65s/it][A[A[A[A[A





 97%|█████████▋| 335/345 [34:38<01:00,  6.09s/it][A[A[A[A[A[A









 96%|█████████▌| 330/345 [34:38<01:26,  5.76s/it][A[A[A[A[A[A[A[A[A[A

 97%|█████████▋| 334/345 [34:38<01:15,  6.84s/it][A[A







 95%|█████████▍| 327/345 [34:38<01:46,  5.92s/it][A[A[A[A[A[A[A[A








 95%|█████████▌| 329/345 [34:39<01:46,  6.63s/it][A[A[A[A[A[A[A[A[A
 95%|█████████▌| 329/345 [34:39<01:52,  7.01s/it][A



  0%|          | 1/345 [00:08<47:18,  8.25s/it][A[A[A[A

 97%|█████████▋| 335/345 [34:40<00:54,  5.44s/it][A[A




 99%|█████████▊| 340/345 [34:40<00:29,  5.97s/it][A[A[A[A[A





 96%|█████████▌| 332/345 [34:40<01:18,  6.06s/it][A[A[A[A[A[A


 96%|█████████▌| 332/345 [34:40<01:19,  6.13s/it][A[A[A







 97%|█████████▋| 336/345 [34:46<00:59,  6.65s/

scrape_Page 5_ALL_content.json
Successfully saved: scrape_Page 5_ALL_content.json
Total articles in Page 12.json: 345
Page key code: Page 12_Article







  0%|          | 0/345 [00:00<?, ?it/s][A[A[A[A[A





 97%|█████████▋| 336/345 [35:09<01:01,  6.82s/it][A[A[A[A[A[A








 99%|█████████▊| 340/345 [35:09<00:31,  6.39s/it][A[A[A[A[A[A[A[A[A
 97%|█████████▋| 335/345 [35:11<00:51,  5.12s/it][A


 98%|█████████▊| 337/345 [35:11<00:48,  6.10s/it][A[A[A







 96%|█████████▌| 332/345 [35:11<01:21,  6.27s/it][A[A[A[A[A[A[A[A




  0%|          | 1/345 [00:02<14:32,  2.54s/it][A[A[A[A[A



  2%|▏         | 6/345 [00:40<36:48,  6.51s/it][A[A[A[A









 97%|█████████▋| 336/345 [35:16<00:58,  6.53s/it][A[A[A[A[A[A[A[A[A[A

 99%|█████████▉| 341/345 [35:16<00:25,  6.38s/it][A[A








 99%|█████████▉| 341/345 [35:17<00:26,  6.62s/it][A[A[A[A[A[A[A[A[A





 98%|█████████▊| 337/345 [35:17<00:56,  7.08s/it][A[A[A[A[A[A


 98%|█████████▊| 338/345 [35:18<00:45,  6.44s/it][A[A[A







 97%|█████████▋| 333/345 [35:19<01:19,  6.58s/it][A[A[A[A[A[A[A[A



  2%|▏  

scrape_Page 2_ALL_content.json
Successfully saved: scrape_Page 2_ALL_content.json
Total articles in Page 13.json: 345
Page key code: Page 13_Article


  0%|          | 0/345 [00:00<?, ?it/s]



  3%|▎         | 10/345 [01:08<36:36,  6.56s/it][A[A[A[A
 98%|█████████▊| 339/345 [35:39<00:38,  6.41s/it][A









 99%|█████████▊| 340/345 [35:39<00:30,  6.08s/it][A[A[A[A[A[A[A[A[A[A




  1%|▏         | 5/345 [00:30<37:02,  6.54s/it][A[A[A[A[A







 97%|█████████▋| 336/345 [35:40<01:02,  6.97s/it][A[A[A[A[A[A[A[A

100%|██████████| 345/345 [35:42<00:00,  6.21s/it][A[A

 99%|█████████▊| 340/345 [35:42<00:26,  5.38s/it][A




scrape_Page 4_ALL_content.json


 99%|█████████▉| 342/345 [35:42<00:18,  6.15s/it][A[A[A





 99%|█████████▉| 341/345 [35:43<00:25,  6.46s/it][A[A[A[A[A[A









 99%|█████████▉| 341/345 [35:43<00:20,  5.15s/it][A[A[A[A[A[A[A[A[A[A







 98%|█████████▊| 337/345 [35:43<00:45,  5.65s/it][A[A[A[A[A[A[A[A








 98%|█████████▊| 339/345 [35:43<00:38,  6.49s/it][A[A[A[A[A[A[A[A[A



  0%|          | 1/345 [00:04<24:19,  4.24s/it]][A[A[A[A

Successfully saved: scrape_Page 4_ALL_content.json
Total articles in Page 14.json: 345
Page key code: Page 14_Article




  0%|          | 0/345 [00:00<?, ?it/s][A[A




  2%|▏         | 6/345 [00:38<39:41,  7.02s/it][A[A[A[A[A


 99%|█████████▉| 343/345 [35:50<00:13,  6.61s/it][A[A[A








 99%|█████████▊| 340/345 [35:50<00:33,  6.78s/it][A[A[A[A[A[A[A[A[A









 99%|█████████▉| 342/345 [35:50<00:17,  5.90s/it][A[A[A[A[A[A[A[A[A[A



  3%|▎         | 12/345 [01:19<34:50,  6.28s/it][A[A[A[A
 99%|█████████▉| 341/345 [35:50<00:24,  6.19s/it][A





 99%|█████████▉| 342/345 [35:50<00:20,  6.97s/it][A[A[A[A[A[A







  1%|          | 2/345 [00:12<36:26,  6.37s/it]t][A[A[A[A[A[A[A[A

  0%|          | 1/345 [00:08<47:03,  8.21s/it][A[A




  2%|▏         | 7/345 [00:43<35:55,  6.38s/it][A[A[A[A[A


  1%|          | 3/345 [00:19<38:01,  6.67s/it]t][A[A[A








 99%|█████████▉| 341/345 [35:58<00:28,  7.06s/it][A[A[A[A[A[A[A[A[A



  4%|▍         | 13/345 [01:26<36:56,  6.68s/it][A[A[A[A





 99%|█████████▉| 343/345 [35:58<00:14,  7.

scrape_Page 6_ALL_content.json
Successfully saved: scrape_Page 6_ALL_content.json
Total articles in Page 15.json: 345
Page key code: Page 15_Article





  0%|          | 0/345 [00:00<?, ?it/s][A[A[A




  3%|▎         | 9/345 [00:58<38:37,  6.90s/it][A[A[A[A[A







 99%|█████████▉| 341/345 [36:09<00:24,  6.17s/it][A[A[A[A[A[A[A[A





100%|██████████| 345/345 [36:10<00:00,  6.29s/it][A[A[A[A[A[A


scrape_Page 7_ALL_content.json
Successfully saved: scrape_Page 7_ALL_content.json
Total articles in Page 16.json: 345
Page key code: Page 16_Article








  0%|          | 0/345 [00:00<?, ?it/s][A[A[A[A[A[A

  1%|▏         | 5/345 [00:31<37:20,  6.59s/it][A[A
100%|█████████▉| 344/345 [36:11<00:06,  6.60s/it][A








 99%|█████████▉| 343/345 [36:11<00:14,  7.07s/it][A[A[A[A[A[A[A[A[A









100%|██████████| 345/345 [36:11<00:00,  6.30s/it][A[A[A[A[A[A[A[A[A[A



  0%|          | 1/345 [00:08<47:24,  8.27s/it][A[A[A



  4%|▍         | 15/345 [01:40<38:17,  6.96s/it][A[A[A[A

scrape_Page 10_ALL_content.json
Successfully saved: scrape_Page 10_ALL_content.json
Total articles in Page 17.json: 345
Page key code: Page 17_Article












  0%|          | 0/345 [00:00<?, ?it/s][A[A[A[A[A[A[A[A[A[A




  3%|▎         | 10/345 [01:03<35:08,  6.29s/it][A[A[A[A[A







 99%|█████████▉| 342/345 [36:12<00:16,  5.37s/it][A[A[A[A[A[A[A[A





  0%|          | 1/345 [00:02<15:15,  2.66s/it][A[A[A[A[A[A

  2%|▏         | 6/345 [00:39<40:01,  7.08s/it][A[A









  0%|          | 1/345 [00:06<39:59,  6.97s/it][A[A[A[A[A[A[A[A[A[A
100%|██████████| 345/345 [36:19<00:00,  6.32s/it][A







  2%|▏         | 1/43 [36:19<25:25:24, 2179.16s/it][A[A[A[A[A[A[A

scrape_Page 1_ALL_content.json
Successfully saved: scrape_Page 1_ALL_content.json
Total articles in Page 18.json: 345
Page key code: Page 18_Article



  0%|          | 0/345 [00:00<?, ?it/s][A








100%|█████████▉| 344/345 [36:19<00:07,  7.36s/it][A[A[A[A[A[A[A[A[A


  1%|          | 2/345 [00:16<45:43,  8.00s/it][A[A[A



  5%|▍         | 16/345 [01:48<39:38,  7.23s/it][A[A[A[A




  3%|▎         | 11/345 [01:10<36:31,  6.56s/it][A[A[A[A[A







 99%|█████████▉| 343/345 [36:20<00:12,  6.01s/it][A[A[A[A[A[A[A[A





  1%|          | 2/345 [00:10<31:32,  5.52s/it][A[A[A[A[A[A





  1%|          | 3/345 [00:13<25:07,  4.41s/it][A[A[A[A[A[A




  3%|▎         | 12/345 [01:14<31:48,  5.73s/it][A[A[A[A[A



  5%|▍         | 17/345 [01:53<34:17,  6.27s/it][A[A[A[A

  2%|▏         | 6/345 [00:41<36:42,  6.50s/it][A[A








100%|██████████| 345/345 [36:24<00:00,  6.47s/it][A[A[A[A[A[A[A[A[A
  2%|▏         | 7/345 [00:45<37:19,  6.63s/it][A









  1%|          | 2/345 [00:12<35:10,  6.15s/it][A[A[A[A[A[A[A[A[A[A


100%|██████████| 345/345 [36:24<00:00,  6.33s

scrape_Page 9_ALL_content.json
Successfully saved: scrape_Page 9_ALL_content.json
Total articles in Page 19.json: 345
Page key code: Page 19_Article











  0%|          | 0/345 [00:00<?, ?it/s][A[A[A[A[A[A[A[A[A







100%|█████████▉| 344/345 [36:28<00:06,  6.74s/it][A[A[A[A[A[A[A[A




  4%|▍         | 13/345 [01:22<35:00,  6.33s/it][A[A[A[A[A



  5%|▌         | 18/345 [02:00<36:35,  6.72s/it][A[A[A[A
  1%|          | 2/345 [00:12<37:25,  6.55s/it][A


  1%|          | 4/345 [00:28<39:14,  6.90s/it][A[A[A





  2%|▏         | 8/345 [00:54<40:36,  7.23s/it][A[A[A[A[A[A

  2%|▏         | 7/345 [00:49<41:31,  7.37s/it][A[A









  1%|          | 3/345 [00:21<41:20,  7.25s/it][A[A[A[A[A[A[A[A[A[A








  0%|          | 1/345 [00:08<49:12,  8.58s/it][A[A[A[A[A[A[A[A[A




  4%|▍         | 14/345 [01:24<28:23,  5.15s/it][A[A[A[A[A







100%|██████████| 345/345 [36:34<00:00,  6.28s/it][A[A[A[A[A[A[A[A



100%|██████████| 345/345 [36:34<00:00,  6.36s/it][A[A[A[A

  1%|          | 3/345 [00:15<26:41,  4.68s/it][A






 19%|█▊        | 8/43 [36:34<1:56:

scrape_Page 8_ALL_content.json
Successfully saved: scrape_Page 8_ALL_content.json
Total articles in Page 20.json: 345
Page key code: Page 20_Article










  0%|          | 0/345 [00:00<?, ?it/s][A[A[A[A[A[A[A[A









  1%|          | 4/345 [00:27<39:57,  7.03s/it][A[A[A[A[A[A[A[A[A[A


  1%|▏         | 5/345 [00:36<42:15,  7.46s/it][A[A[A








  3%|▎         | 9/345 [01:01<41:08,  7.35s/it][A[A[A[A[A[A[A[A[A





  1%|▏         | 5/345 [00:30<39:15,  6.93s/it][A[A[A[A[A[A




  4%|▍         | 15/345 [01:31<31:30,  5.73s/it][A[A[A[A[A

  2%|▏         | 8/345 [00:58<43:21,  7.72s/it][A[A







  0%|          | 1/345 [00:07<41:52,  7.30s/it][A[A[A[A[A[A[A[A
  1%|          | 4/345 [00:23<34:06,  6.00s/it][A



  6%|▌         | 20/345 [02:11<34:12,  6.32s/it][A[A[A[A







  1%|          | 2/345 [00:10<28:31,  4.99s/it][A[A[A[A[A[A[A[A

  3%|▎         | 9/345 [01:02<36:08,  6.45s/it][A[A




  5%|▍         | 16/345 [01:36<29:04,  5.30s/it][A[A[A[A[A


  2%|▏         | 6/345 [00:42<37:32,  6.65s/it][A[A[A









  1%|▏         | 5/345 [00:33<36:59,  6.5

scrape_Page 11_ALL_content.json
Successfully saved: scrape_Page 11_ALL_content.json
Total articles in Page 21.json: 345
Page key code: Page 21_Article






  0%|          | 0/345 [00:00<?, ?it/s][A[A[A[A

 94%|█████████▍| 324/345 [34:51<02:29,  7.11s/it][A[A








 94%|█████████▎| 323/345 [34:10<02:39,  7.26s/it][A[A[A[A[A[A[A[A[A





 93%|█████████▎| 322/345 [34:24<02:38,  6.90s/it][A[A[A[A[A[A







 94%|█████████▍| 325/345 [34:01<02:08,  6.42s/it][A[A[A[A[A[A[A[A
 95%|█████████▌| 328/345 [34:17<01:40,  5.93s/it][A


 94%|█████████▍| 324/345 [34:32<02:15,  6.44s/it][A[A[A




 96%|█████████▌| 332/345 [35:27<01:14,  5.71s/it][A[A[A[A[A



  0%|          | 1/345 [00:07<40:19,  7.03s/it][A[A[A[A









 96%|█████████▌| 331/345 [34:58<01:30,  6.45s/it][A[A[A[A[A[A[A[A[A[A








 94%|█████████▍| 324/345 [34:14<02:16,  6.49s/it][A[A[A[A[A[A[A[A[A







 94%|█████████▍| 326/345 [34:05<01:47,  5.64s/it][A[A[A[A[A[A[A[A

 94%|█████████▍| 325/345 [34:57<02:11,  6.56s/it][A[A




 97%|█████████▋| 333/345 [35:31<01:00,  5.04s/it][A[A[A[A[A


 94%|█████████▍| 32

scrape_Page 12_ALL_content.json
Successfully saved: scrape_Page 12_ALL_content.json
Total articles in Page 22.json: 345
Page key code: Page 22_Article







  0%|          | 0/345 [00:00<?, ?it/s][A[A[A[A[A









 96%|█████████▌| 332/345 [35:45<01:23,  6.45s/it][A[A[A[A[A[A[A[A[A[A



  4%|▍         | 14/345 [01:27<33:55,  6.15s/it][A[A[A[A




  0%|          | 1/345 [00:06<39:14,  6.84s/it][A[A[A[A[A





 97%|█████████▋| 335/345 [35:47<01:03,  6.33s/it][A[A[A[A[A[A








 97%|█████████▋| 336/345 [35:33<00:57,  6.42s/it][A[A[A[A[A[A[A[A[A

100%|█████████▉| 344/345 [36:19<00:06,  6.56s/it][A[A
 99%|█████████▉| 341/345 [35:39<00:26,  6.54s/it][A


 98%|█████████▊| 337/345 [35:54<00:52,  6.59s/it][A[A[A







 98%|█████████▊| 339/345 [35:24<00:39,  6.58s/it][A[A[A[A[A[A[A[A

 98%|█████████▊| 338/345 [36:18<00:40,  5.77s/it][A[A





 97%|█████████▋| 336/345 [35:51<00:52,  5.81s/it][A[A[A[A[A[A



  4%|▍         | 15/345 [01:32<32:09,  5.85s/it][A[A[A[A
 99%|█████████▉| 342/345 [35:43<00:17,  5.79s/it][A




  1%|          | 2/345 [00:11<32:35,  5.70s/it][A[A[A[A

scrape_Page 13_ALL_content.json
Successfully saved: scrape_Page 13_ALL_content.json
Total articles in Page 23.json: 345
Page key code: Page 23_Article


  0%|          | 0/345 [00:00<?, ?it/s]


 98%|█████████▊| 338/345 [36:03<00:49,  7.12s/it][A[A[A

 98%|█████████▊| 339/345 [36:25<00:37,  6.17s/it][A[A




  1%|          | 3/345 [00:18<35:44,  6.27s/it][A[A[A[A[A
 99%|█████████▉| 343/345 [35:50<00:12,  6.18s/it][A







 99%|█████████▉| 341/345 [35:35<00:24,  6.13s/it][A[A[A[A[A[A[A[A








 98%|█████████▊| 338/345 [35:44<00:43,  6.25s/it][A[A[A[A[A[A[A[A[A









 97%|█████████▋| 334/345 [35:57<01:11,  6.54s/it][A[A[A[A[A[A[A[A[A[A





  0%|          | 1/345 [00:08<46:49,  8.17s/it]t][A[A[A[A[A[A



  5%|▍         | 16/345 [01:41<36:38,  6.68s/it][A[A[A[A
100%|█████████▉| 344/345 [35:52<00:05,  5.13s/it][A

 99%|█████████▊| 340/345 [36:28<00:26,  5.26s/it][A[A




  1%|          | 4/345 [00:21<28:12,  4.96s/it][A[A[A[A[A








 98%|█████████▊| 339/345 [35:47<00:31,  5.20s/it][A[A[A[A[A[A[A[A[A


 98%|█████████▊| 339/345 [36:08<00:39,  6.59s/it][A[A[A





  

scrape_Page 18_ALL_content.json
Successfully saved: scrape_Page 18_ALL_content.json
Total articles in Page 24.json: 345
Page key code: Page 24_Article



  0%|          | 0/345 [00:00<?, ?it/s][A








 99%|█████████▊| 340/345 [35:56<00:30,  6.17s/it][A[A[A[A[A[A[A[A[A




  1%|▏         | 5/345 [00:30<35:26,  6.25s/it][A[A[A[A[A








 99%|█████████▉| 341/345 [35:58<00:19,  4.92s/it][A[A[A[A[A[A[A[A[A



  5%|▌         | 18/345 [01:53<33:03,  6.06s/it][A[A[A[A







 99%|█████████▉| 343/345 [35:49<00:12,  6.24s/it][A[A[A[A[A[A[A[A





  1%|          | 3/345 [00:20<37:28,  6.57s/it]t][A[A[A[A[A[A









 97%|█████████▋| 336/345 [36:11<00:58,  6.50s/it][A[A[A[A[A[A[A[A[A[A

 99%|█████████▉| 342/345 [36:40<00:16,  5.34s/it][A[A
  0%|          | 1/345 [00:03<21:12,  3.70s/it][A


 99%|█████████▉| 341/345 [36:19<00:24,  6.01s/it][A[A[A




  2%|▏         | 6/345 [00:37<37:44,  6.68s/it][A[A[A[A[A








 99%|█████████▉| 342/345 [36:04<00:16,  5.48s/it][A[A[A[A[A[A[A[A[A









 98%|█████████▊| 337/345 [36:18<00:52,  6.61s/it][A[A[A[A[A[A[A[A[A[A




scrape_Page 20_ALL_content.json
Successfully saved: scrape_Page 20_ALL_content.json
Total articles in Page 25.json: 345
Page key code: Page 25_Article










  0%|          | 0/345 [00:00<?, ?it/s][A[A[A[A[A[A[A[A





 99%|█████████▉| 341/345 [36:29<00:29,  7.25s/it][A[A[A[A[A[A









 98%|█████████▊| 339/345 [36:28<00:36,  6.16s/it][A[A[A[A[A[A[A[A[A[A
  1%|          | 4/345 [00:21<31:40,  5.57s/it][A








100%|█████████▉| 344/345 [36:16<00:05,  5.87s/it][A[A[A[A[A[A[A[A[A




  2%|▏         | 8/345 [00:51<38:28,  6.85s/it][A[A[A[A[A


100%|█████████▉| 344/345 [36:39<00:06,  6.22s/it][A[A[A

100%|██████████| 345/345 [37:00<00:00,  6.44s/it][A[A








  0%|          | 1/345 [00:05<28:22,  4.95s/it][A[A[A[A[A[A[A[A



  2%|▏         | 6/345 [00:41<37:39,  6.66s/it]][A[A[A[A





 99%|█████████▉| 342/345 [36:33<00:19,  6.55s/it][A[A[A[A[A[A






 33%|███▎      | 14/43 [1:12:44<1:50:26, 228.49s/it][A[A[A[A[A[A[A

scrape_Page 14_ALL_content.json
Successfully saved: scrape_Page 14_ALL_content.json
Total articles in Page 26.json: 345
Page key code: Page 26_Article




  0%|          | 0/345 [00:00<?, ?it/s][A[A









 99%|█████████▊| 340/345 [36:37<00:34,  6.85s/it][A[A[A[A[A[A[A[A[A[A
  1%|▏         | 5/345 [00:29<37:56,  6.70s/it][A




  3%|▎         | 9/345 [00:59<40:24,  7.22s/it][A[A[A[A[A








100%|██████████| 345/345 [36:25<00:00,  6.33s/it][A[A[A[A[A[A[A[A[A


scrape_Page 19_ALL_content.json
Successfully saved: scrape_Page 19_ALL_content.json
Total articles in Page 27.json: 345
Page key code: Page 27_Article











  0%|          | 0/345 [00:00<?, ?it/s][A[A[A[A[A[A[A[A[A


100%|██████████| 345/345 [36:47<00:00,  6.40s/it][A[A[A







 35%|███▍      | 15/43 [1:12:50<1:23:50, 179.68s/it][A[A[A[A[A[A[A

scrape_Page 15_ALL_content.json
Successfully saved: scrape_Page 15_ALL_content.json
Total articles in Page 28.json: 345
Page key code: Page 28_Article





  0%|          | 0/345 [00:00<?, ?it/s][A[A[A

  0%|          | 1/345 [00:06<39:47,  6.94s/it][A[A







  1%|          | 2/345 [00:12<35:46,  6.26s/it][A[A[A[A[A[A[A[A





 99%|█████████▉| 343/345 [36:41<00:13,  6.95s/it][A[A[A[A[A[A



  2%|▏         | 7/345 [00:50<41:11,  7.31s/it]][A[A[A[A
  2%|▏         | 6/345 [00:34<34:32,  6.11s/it][A


  0%|          | 1/345 [00:03<21:20,  3.72s/it][A[A[A




  3%|▎         | 10/345 [01:04<36:37,  6.56s/it][A[A[A[A[A









 99%|█████████▉| 341/345 [36:42<00:25,  6.43s/it][A[A[A[A[A[A[A[A[A[A

  1%|          | 2/345 [00:10<27:56,  4.89s/it][A[A







  1%|          | 3/345 [00:15<28:40,  5.03s/it][A[A[A[A[A[A[A[A








  2%|▏         | 8/345 [00:57<41:21,  7.36s/it][A[A[A[A[A[A[A[A[A





100%|█████████▉| 344/345 [36:50<00:07,  7.47s/it][A[A[A[A[A[A



  7%|▋         | 23/345 [02:31<39:56,  7.44s/it][A[A[A[A
  2%|▏         | 7/345 [00:41<35:44,  6.35s/it][A




 

scrape_Page 16_ALL_content.json
Successfully saved: scrape_Page 16_ALL_content.json
Total articles in Page 29.json: 345
Page key code: Page 29_Article








  0%|          | 0/345 [00:00<?, ?it/s][A[A[A[A[A[A







  1%|▏         | 5/345 [00:31<38:47,  6.85s/it][A[A[A[A[A[A[A[A









100%|█████████▉| 344/345 [37:01<00:06,  6.25s/it][A[A[A[A[A[A[A[A[A[A


  1%|          | 4/345 [00:22<33:29,  5.89s/it][A[A[A
  3%|▎         | 9/345 [00:53<35:14,  6.29s/it][A

  1%|▏         | 5/345 [00:28<33:36,  5.93s/it][A[A




  4%|▍         | 13/345 [01:23<35:50,  6.48s/it][A[A[A[A[A



  7%|▋         | 25/345 [02:44<37:04,  6.95s/it][A[A[A[A








  1%|          | 4/345 [00:23<35:07,  6.18s/it][A[A[A[A[A[A[A[A[A





  3%|▎         | 10/345 [01:12<41:33,  7.44s/it][A[A[A[A[A[A


  1%|▏         | 5/345 [00:24<26:45,  4.72s/it][A[A[A









100%|██████████| 345/345 [37:03<00:00,  6.45s/it][A[A[A[A[A[A[A[A[A[A








  2%|▏         | 6/345 [00:36<35:20,  6.25s/it][A[A[A[A[A[A[A[A






 40%|███▉      | 17/43 [1:13:16<46:24, 107.11s/it]  [A[A[A[A[A[A[A

scrape_Page 17_ALL_content.json
Successfully saved: scrape_Page 17_ALL_content.json
Total articles in Page 30.json: 345
Page key code: Page 30_Article












  0%|          | 0/345 [00:00<?, ?it/s][A[A[A[A[A[A[A[A[A[A




  4%|▍         | 14/345 [01:30<37:22,  6.77s/it][A[A[A[A[A



  8%|▊         | 26/345 [02:51<37:48,  7.11s/it][A[A[A[A

  2%|▏         | 6/345 [00:37<38:23,  6.79s/it][A[A
  3%|▎         | 10/345 [01:02<39:32,  7.08s/it][A








  1%|▏         | 5/345 [00:32<39:11,  6.92s/it][A[A[A[A[A[A[A[A[A





  3%|▎         | 11/345 [01:19<41:43,  7.49s/it][A[A[A[A[A[A







  2%|▏         | 7/345 [00:44<37:33,  6.67s/it][A[A[A[A[A[A[A[A









  0%|          | 1/345 [00:07<45:26,  7.93s/it][A[A[A[A[A[A[A[A[A[A


  2%|▏         | 6/345 [00:33<33:39,  5.96s/it][A[A[A





  1%|          | 3/345 [00:19<33:32,  5.89s/it][A[A[A[A[A[A




  4%|▍         | 15/345 [01:35<34:02,  6.19s/it][A[A[A[A[A









  1%|          | 2/345 [00:10<26:29,  4.64s/it][A[A[A[A[A[A[A[A[A[A







  2%|▏         | 8/345 [00:47<30:35,  5.45s/it][A[A[A[A[A[A[A

scrape_Page 21_ALL_content.json










 94%|█████████▎| 323/345 [35:39<02:46,  7.57s/it][A[A[A[A[A[A[A[A






 49%|████▉     | 21/43 [1:48:18<2:10:14, 355.19s/it][A[A[A[A[A[A[A

Successfully saved: scrape_Page 21_ALL_content.json
Total articles in Page 31.json: 345
Page key code: Page 31_Article






  0%|          | 0/345 [00:00<?, ?it/s][A[A[A[A




 96%|█████████▌| 332/345 [36:28<01:37,  7.53s/it][A[A[A[A[A








 94%|█████████▎| 323/345 [35:32<02:37,  7.15s/it][A[A[A[A[A[A[A[A[A





 93%|█████████▎| 322/345 [35:15<02:32,  6.65s/it][A[A[A[A[A[A
 96%|█████████▌| 330/345 [36:02<01:48,  7.25s/it][A


 96%|█████████▌| 331/345 [35:31<01:35,  6.79s/it][A[A[A

 96%|█████████▌| 330/345 [35:37<01:51,  7.42s/it][A[A









 94%|█████████▍| 324/345 [35:09<02:28,  7.08s/it][A[A[A[A[A[A[A[A[A[A



  0%|          | 1/345 [00:06<39:05,  6.82s/it][A[A[A[A







 97%|█████████▋| 336/345 [36:24<01:05,  7.33s/it][A[A[A[A[A[A[A[A




 97%|█████████▋| 333/345 [36:36<01:33,  7.77s/it][A[A[A[A[A

 96%|█████████▌| 331/345 [35:44<01:41,  7.24s/it][A[A





 94%|█████████▎| 323/345 [35:22<02:30,  6.83s/it][A[A[A[A[A[A








 94%|█████████▍| 324/345 [35:39<02:32,  7.27s/it][A[A[A[A[A[A[A[A[A


 96%|█████████▌| 332/345 [3

scrape_Page 23_ALL_content.json
Successfully saved: scrape_Page 23_ALL_content.json
Total articles in Page 32.json: 345
Page key code: Page 32_Article


  0%|          | 0/345 [00:00<?, ?it/s]




 99%|█████████▉| 341/345 [37:34<00:29,  7.38s/it][A[A[A[A[A



  3%|▎         | 10/345 [01:09<39:33,  7.09s/it][A[A[A[A

 99%|█████████▊| 340/345 [36:44<00:35,  7.01s/it][A[A





 97%|█████████▋| 333/345 [36:24<01:09,  5.82s/it][A[A[A[A[A[A









 97%|█████████▋| 334/345 [36:14<01:12,  6.56s/it][A[A[A[A[A[A[A[A[A[A







 97%|█████████▋| 334/345 [36:51<01:07,  6.17s/it][A[A[A[A[A[A[A[A


 99%|█████████▉| 341/345 [36:40<00:27,  6.93s/it][A[A[A
  0%|          | 1/345 [00:07<44:20,  7.74s/it]t][A








 97%|█████████▋| 335/345 [36:42<01:00,  6.07s/it][A[A[A[A[A[A[A[A[A









 97%|█████████▋| 335/345 [36:17<00:53,  5.39s/it][A[A[A[A[A[A[A[A[A[A







 97%|█████████▋| 335/345 [36:54<00:50,  5.02s/it][A[A[A[A[A[A[A[A





 97%|█████████▋| 334/345 [36:27<00:54,  4.94s/it][A[A[A[A[A[A




 99%|█████████▉| 342/345 [37:43<00:23,  7.77s/it][A[A[A[A[A

 99%|█████████▉

scrape_Page 22_ALL_content.json
Successfully saved: scrape_Page 22_ALL_content.json
Total articles in Page 33.json: 345
Page key code: Page 33_Article







  0%|          | 0/345 [00:00<?, ?it/s][A[A[A[A[A


100%|██████████| 345/345 [37:08<00:00,  6.46s/it][A[A[A










 98%|█████████▊| 339/345 [36:43<00:38,  6.39s/it][A[A[A[A[A[A[A[A[A[A

scrape_Page 28_ALL_content.json
Successfully saved: scrape_Page 28_ALL_content.json
Total articles in Page 34.json: 345
Page key code: Page 34_Article





  0%|          | 0/345 [00:00<?, ?it/s][A[A[A








 98%|█████████▊| 339/345 [37:09<00:41,  6.90s/it][A[A[A[A[A[A[A[A[A

100%|██████████| 345/345 [37:15<00:00,  6.48s/it][A[A


scrape_Page 26_ALL_content.json
Successfully saved: scrape_Page 26_ALL_content.json
Total articles in Page 35.json: 345
Page key code: Page 35_Article




  1%|▏         | 5/345 [00:36<41:22,  7.30s/it]
100%|██████████| 345/345 [37:40<00:00,  6.55s/it][A







 56%|█████▌    | 24/43 [1:50:00<1:05:44, 207.60s/it][A[A[A[A[A[A[A

scrape_Page 24_ALL_content.json
Successfully saved: scrape_Page 24_ALL_content.json
Total articles in Page 36.json: 345
Page key code: Page 36_Article



  0%|          | 0/345 [00:00<?, ?it/s][A







 98%|█████████▊| 339/345 [37:21<00:40,  6.67s/it][A[A[A[A[A[A[A[A



  5%|▍         | 16/345 [01:42<31:04,  5.67s/it][A[A[A[A





 98%|█████████▊| 338/345 [36:55<00:45,  6.47s/it][A[A[A[A[A[A




  0%|          | 1/345 [00:07<43:49,  7.64s/it][A[A[A[A[A
  0%|          | 1/345 [00:03<21:29,  3.75s/it][A









 99%|█████████▊| 340/345 [36:48<00:30,  6.02s/it][A[A[A[A[A[A[A[A[A[A

  0%|          | 1/345 [00:04<25:23,  4.43s/it][A[A








  2%|▏         | 6/345 [00:40<35:50,  6.34s/it]t][A[A[A[A[A[A[A[A[A



  5%|▍         | 17/345 [01:45<27:29,  5.03s/it][A[A[A[A


  0%|          | 1/345 [00:05<32:07,  5.60s/it][A[A[A







 99%|█████████▊| 340/345 [37:25<00:29,  5.88s/it][A[A[A[A[A[A[A[A





 98%|█████████▊| 339/345 [37:02<00:40,  6.75s/it][A[A[A[A[A[A




  1%|          | 2/345 [00:14<40:45,  7.13s/it][A[A[A[A[A



  2%|▏         | 7/345 [00:48<37:59,  6.75s/

scrape_Page 30_ALL_content.json
Successfully saved: scrape_Page 30_ALL_content.json
Total articles in Page 37.json: 345
Page key code: Page 37_Article












  0%|          | 0/345 [00:00<?, ?it/s][A[A[A[A[A[A[A[A[A[A








100%|██████████| 345/345 [37:51<00:00,  6.58s/it][A[A[A[A[A[A[A[A[A


scrape_Page 27_ALL_content.json
Successfully saved: scrape_Page 27_ALL_content.json
Total articles in Page 38.json: 345
Page key code: Page 38_Article







  2%|▏         | 6/345 [00:45<43:12,  7.65s/it][A[A[A[A[A








  0%|          | 0/345 [00:00<?, ?it/s][A[A[A[A[A[A[A[A[A







100%|██████████| 345/345 [38:01<00:00,  6.61s/it][A[A[A[A[A[A[A[A







 58%|█████▊    | 25/43 [1:50:41<52:39, 175.55s/it]  [A[A[A[A[A[A[A

scrape_Page 25_ALL_content.json
Successfully saved: scrape_Page 25_ALL_content.json
Total articles in Page 39.json: 345
Page key code: Page 39_Article










  0%|          | 0/345 [00:00<?, ?it/s][A[A[A[A[A[A[A[A
  2%|▏         | 7/345 [00:41<37:27,  6.65s/it][A


  2%|▏         | 7/345 [00:43<32:57,  5.85s/it][A[A[A



  3%|▎         | 12/345 [01:19<34:50,  6.28s/it][A[A[A[A





100%|█████████▉| 344/345 [37:37<00:07,  7.12s/it][A[A[A[A[A[A

  2%|▏         | 7/345 [00:45<36:09,  6.42s/it][A[A







  0%|          | 1/345 [00:04<25:28,  4.44s/it][A[A[A[A[A[A[A[A
  2%|▏         | 8/345 [00:45<33:35,  5.98s/it][A


  2%|▏         | 8/345 [00:47<29:13,  5.20s/it][A[A[A









  0%|          | 1/345 [00:05<31:23,  5.47s/it][A[A[A[A[A[A[A[A[A[A








  0%|          | 1/345 [00:05<29:09,  5.09s/it][A[A[A[A[A[A[A[A[A



  7%|▋         | 24/345 [02:27<27:47,  5.19s/it][A[A[A[A




  2%|▏         | 7/345 [00:52<43:31,  7.73s/it][A[A[A[A[A





100%|██████████| 345/345 [37:44<00:00,  6.56s/it][A[A[A[A[A[A







 67%|██████▋   | 29/43 [1:50:50<19:49, 84.95s/it] [A[A

scrape_Page 29_ALL_content.json
Successfully saved: scrape_Page 29_ALL_content.json
Total articles in Page 40.json: 345
Page key code: Page 40_Article








  4%|▍         | 13/345 [01:27<37:07,  6.71s/it][A[A[A







  1%|          | 2/345 [00:11<33:45,  5.91s/it][A[A[A[A[A[A[A[A

  2%|▏         | 8/345 [00:52<37:09,  6.62s/it][A[A
  3%|▎         | 9/345 [00:52<35:07,  6.27s/it][A









  1%|          | 2/345 [00:13<39:16,  6.87s/it][A[A[A[A[A[A[A[A[A[A



  7%|▋         | 25/345 [02:35<31:50,  5.97s/it][A[A[A[A


  3%|▎         | 9/345 [00:55<34:53,  6.23s/it][A[A[A








  1%|          | 2/345 [00:13<40:48,  7.14s/it][A[A[A[A[A[A[A[A[A
  3%|▎         | 10/345 [00:55<29:24,  5.27s/it][A







  1%|          | 3/345 [00:14<27:11,  4.77s/it][A[A[A[A[A[A[A[A





  4%|▍         | 14/345 [01:32<33:54,  6.15s/it][A[A[A[A[A[A




  2%|▏         | 8/345 [01:00<42:19,  7.54s/it][A[A[A[A[A



  8%|▊         | 26/345 [02:42<33:12,  6.25s/it][A[A[A[A

  3%|▎         | 9/345 [01:01<40:32,  7.24s/it][A[A









  1%|          | 3/345 [00:21<42:35,  7.47s/it][A[A[A[A[

scrape_Page 31_ALL_content.json
Successfully saved: scrape_Page 31_ALL_content.json
Total articles in Page 41.json: 345
Page key code: Page 41_Article






 99%|█████████▉| 342/345 [38:37<00:21,  7.22s/it][A




 88%|████████▊ | 303/345 [38:06<05:30,  7.88s/it][A[A[A[A[A









 94%|█████████▍| 324/345 [37:24<02:20,  6.68s/it][A[A[A[A[A[A[A[A[A[A
 97%|█████████▋| 333/345 [38:04<01:20,  6.70s/it][A

 98%|█████████▊| 337/345 [38:05<01:00,  7.58s/it][A[A







 95%|█████████▌| 329/345 [37:24<02:00,  7.56s/it][A[A[A[A[A[A[A[A





 99%|█████████▉| 343/345 [38:43<00:13,  6.86s/it][A[A[A[A[A[A








 97%|█████████▋| 333/345 [37:26<01:25,  7.16s/it][A[A[A[A[A[A[A[A[A



  0%|          | 1/345 [00:06<36:43,  6.40s/it][A[A[A[A


 97%|█████████▋| 336/345 [38:08<01:06,  7.37s/it][A[A[A




 88%|████████▊ | 304/345 [38:13<05:05,  7.45s/it][A[A[A[A[A









 94%|█████████▍| 325/345 [37:30<02:14,  6.72s/it][A[A[A[A[A[A[A[A[A[A
 97%|█████████▋| 334/345 [38:11<01:13,  6.72s/it][A

 98%|█████████▊| 338/345 [38:12<00:51,  7.36s/it][A[A







 96%|█████████▌| 330/345 [37:32<01:57,

scrape_Page 32_ALL_content.json
Successfully saved: scrape_Page 32_ALL_content.json
Total articles in Page 42.json: 345
Page key code: Page 42_Article


  0%|          | 0/345 [00:00<?, ?it/s]




 89%|████████▊ | 306/345 [38:28<04:58,  7.65s/it][A[A[A[A[A
 97%|█████████▋| 336/345 [38:25<01:00,  6.72s/it][A








 97%|█████████▋| 336/345 [37:44<00:58,  6.52s/it][A[A[A[A[A[A[A[A[A

 99%|█████████▊| 340/345 [38:26<00:36,  7.24s/it][A[A


 98%|█████████▊| 339/345 [38:27<00:40,  6.74s/it][A[A[A









 95%|█████████▍| 327/345 [37:46<02:10,  7.27s/it][A[A[A[A[A[A[A[A[A[A







  0%|          | 1/345 [00:07<40:41,  7.10s/it]t][A[A[A[A[A[A[A[A



  1%|          | 4/345 [00:28<41:18,  7.27s/it][A[A[A[A





 95%|█████████▌| 329/345 [37:39<02:05,  7.86s/it][A[A[A[A[A[A








 98%|█████████▊| 337/345 [37:51<00:52,  6.55s/it][A[A[A[A[A[A[A[A[A
 98%|█████████▊| 337/345 [38:32<00:55,  6.94s/it][A




 89%|████████▉ | 307/345 [38:36<04:53,  7.72s/it][A[A[A[A[A

 99%|█████████▉| 341/345 [38:34<00:30,  7.51s/it][A[A


 99%|█████████▊| 340/345 [38:35<00:35,  7.10s/it][A[A[A






scrape_Page 35_ALL_content.json
Successfully saved: scrape_Page 35_ALL_content.json
Total articles in Page 43.json: 345
Page key code: Page 43_Article




  2%|▏         | 6/345 [00:42<39:17,  6.95s/it]









 96%|█████████▌| 332/345 [38:25<01:40,  7.77s/it][A[A[A[A[A[A[A[A[A[A



  3%|▎         | 9/345 [01:05<42:00,  7.50s/it][A[A[A[A







 98%|█████████▊| 337/345 [38:25<01:01,  7.66s/it][A[A[A[A[A[A[A[A





 97%|█████████▋| 334/345 [38:16<01:24,  7.65s/it][A[A[A[A[A[A
 99%|█████████▉| 342/345 [39:07<00:20,  6.92s/it][A




 90%|█████████ | 312/345 [39:12<03:59,  7.24s/it][A[A[A[A[A


100%|██████████| 345/345 [39:10<00:00,  6.81s/it][A[A[A


scrape_Page 34_ALL_content.json
Successfully saved: scrape_Page 34_ALL_content.json











  2%|▏         | 7/345 [00:49<39:56,  7.09s/it]t][A[A[A[A[A[A[A[A[A



  3%|▎         | 10/345 [01:11<39:12,  7.02s/it][A[A[A[A
 99%|█████████▉| 343/345 [39:12<00:12,  6.32s/it][A









 97%|█████████▋| 333/345 [38:32<01:31,  7.59s/it][A[A[A[A[A[A[A[A[A[A

  0%|          | 1/345 [00:08<49:06,  8.56s/it][A[A







 98%|█████████▊| 338/345 [38:33<00:54,  7.78s/it][A[A[A[A[A[A[A[A




 91%|█████████ | 313/345 [39:19<03:48,  7.14s/it][A[A[A[A[A





 97%|█████████▋| 335/345 [38:25<01:18,  7.85s/it][A[A[A[A[A[A








 99%|█████████▉| 343/345 [38:35<00:14,  7.11s/it][A[A[A[A[A[A[A[A[A



  2%|▏         | 8/345 [00:58<41:53,  7.46s/it]][A[A[A[A

  1%|          | 2/345 [00:16<45:33,  7.97s/it][A[A
100%|█████████▉| 344/345 [39:20<00:06,  6.87s/it][A









 97%|█████████▋| 334/345 [38:40<01:26,  7.83s/it][A[A[A[A[A[A[A[A[A[A







 98%|█████████▊| 339/345 [38:40<00:44,  7.48s/it][A[A[A[A[A[A[A[A





scrape_Page 36_ALL_content.json
Successfully saved: scrape_Page 36_ALL_content.json




  1%|          | 3/345 [00:24<46:33,  8.17s/it][A[A





 98%|█████████▊| 337/345 [38:38<00:58,  7.30s/it][A[A[A[A[A[A







 99%|█████████▊| 340/345 [38:48<00:38,  7.67s/it][A[A[A[A[A[A[A[A




 91%|█████████▏| 315/345 [39:34<03:34,  7.14s/it][A[A[A[A[A








100%|██████████| 345/345 [38:50<00:00,  6.76s/it][A[A[A[A[A[A[A[A[A


scrape_Page 38_ALL_content.json
Successfully saved: scrape_Page 38_ALL_content.json


  3%|▎         | 10/345 [01:11<40:10,  7.20s/it]



  4%|▍         | 13/345 [01:33<41:05,  7.43s/it][A[A[A[A









 97%|█████████▋| 336/345 [38:55<01:08,  7.56s/it][A[A[A[A[A[A[A[A[A[A





 98%|█████████▊| 338/345 [38:45<00:50,  7.22s/it][A[A[A[A[A[A







 99%|█████████▉| 341/345 [38:56<00:30,  7.53s/it][A[A[A[A[A[A[A[A




 92%|█████████▏| 316/345 [39:41<03:31,  7.30s/it][A[A[A[A[A

  1%|          | 4/345 [00:32<46:53,  8.25s/it][A[A



  3%|▎         | 11/345 [01:19<40:47,  7.33s/it][A[A[A[A









 98%|█████████▊| 337/345 [39:02<00:59,  7.42s/it][A[A[A[A[A[A[A[A[A[A







 99%|█████████▉| 342/345 [39:03<00:22,  7.47s/it][A[A[A[A[A[A[A[A

  1%|▏         | 5/345 [00:39<43:40,  7.71s/it][A[A





 98%|█████████▊| 339/345 [38:53<00:44,  7.47s/it][A[A[A[A[A[A




 92%|█████████▏| 317/345 [39:50<03:33,  7.64s/it][A[A[A[A[A



  3%|▎         | 12/345 [01:26<40:49,  7.36s/it][A[A[A[A









 98%|█████████▊|

scrape_Page 39_ALL_content.json
Successfully saved: scrape_Page 39_ALL_content.json








 99%|█████████▉| 342/345 [39:15<00:21,  7.33s/it][A[A[A[A[A[A

  2%|▏         | 8/345 [01:01<41:33,  7.40s/it][A[A




 93%|█████████▎| 320/345 [40:13<03:17,  7.88s/it][A[A[A[A[A



  5%|▌         | 18/345 [02:10<40:43,  7.47s/it][A[A[A[A









  4%|▍         | 15/345 [01:49<40:50,  7.43s/it]][A[A[A[A[A[A[A[A[A[A





 99%|█████████▉| 343/345 [39:23<00:15,  7.62s/it][A[A[A[A[A[A

  3%|▎         | 9/345 [01:09<41:36,  7.43s/it][A[A




 93%|█████████▎| 321/345 [40:21<03:09,  7.88s/it][A[A[A[A[A









  5%|▍         | 16/345 [01:56<40:03,  7.31s/it]][A[A[A[A[A[A[A[A[A[A



  6%|▌         | 19/345 [02:17<40:38,  7.48s/it][A[A[A[A





100%|█████████▉| 344/345 [39:31<00:07,  7.76s/it][A[A[A[A[A[A

  3%|▎         | 10/345 [01:17<43:06,  7.72s/it][A[A




 93%|█████████▎| 322/345 [40:29<03:02,  7.93s/it][A[A[A[A[A









  5%|▍         | 17/345 [02:04<40:29,  7.41s/it]][A[A[A[A[A[A[A[A[A[A



  6%|▌   

scrape_Page 40_ALL_content.json
Successfully saved: scrape_Page 40_ALL_content.json







 94%|█████████▎| 323/345 [40:36<02:47,  7.63s/it][A[A[A[A[A









100%|█████████▉| 344/345 [39:52<00:07,  7.05s/it][A[A[A[A[A[A[A[A[A[A



  5%|▌         | 18/345 [02:11<39:45,  7.30s/it][A[A[A[A

  3%|▎         | 12/345 [01:31<40:31,  7.30s/it][A[A




 94%|█████████▍| 324/345 [40:44<02:39,  7.57s/it][A[A[A[A[A









100%|██████████| 345/345 [40:00<00:00,  6.96s/it][A[A[A[A[A[A[A[A[A[A


scrape_Page 37_ALL_content.json
Successfully saved: scrape_Page 37_ALL_content.json


  6%|▌         | 19/345 [02:18<39:43,  7.31s/it]



  6%|▋         | 22/345 [02:40<39:52,  7.41s/it][A[A[A[A

  4%|▍         | 13/345 [01:40<42:17,  7.64s/it][A[A




  6%|▌         | 20/345 [02:26<40:18,  7.44s/it]][A[A[A[A[A



  7%|▋         | 23/345 [02:48<40:33,  7.56s/it][A[A[A[A

  4%|▍         | 14/345 [01:46<40:31,  7.34s/it][A[A




 94%|█████████▍| 326/345 [40:59<02:26,  7.70s/it][A[A[A[A[A



  6%|▌         | 21/345 [02:33<40:34,  7.51s/it][A[A[A[A

  4%|▍         | 15/345 [01:54<41:41,  7.58s/it][A[A



  7%|▋         | 25/345 [03:01<38:11,  7.16s/it][A[A[A[A




  6%|▋         | 22/345 [02:41<41:15,  7.66s/it]][A[A[A[A[A

  5%|▍         | 16/345 [02:02<41:58,  7.66s/it][A[A



  8%|▊         | 26/345 [03:09<38:45,  7.29s/it][A[A[A[A




  7%|▋         | 23/345 [02:49<41:34,  7.75s/it]][A[A[A[A[A

  5%|▍         | 17/345 [02:09<40:56,  7.49s/it][A[A




 95%|█████████▌| 329/345 [41:22<02:00,  7.53s/it][A[A[A[A[A



  7

scrape_Page 33_ALL_content.json
Successfully saved: scrape_Page 33_ALL_content.json






 12%|█▏        | 43/345 [04:58<32:03,  6.37s/it][A[A[A[A

 11%|█         | 37/345 [04:17<33:56,  6.61s/it][A[A



 13%|█▎        | 44/345 [05:02<29:00,  5.78s/it][A[A[A[A

 13%|█▎        | 45/345 [05:10<31:45,  6.35s/it][A[A



 14%|█▍        | 48/345 [05:32<32:32,  6.57s/it][A[A[A[A

 13%|█▎        | 46/345 [05:18<34:06,  6.84s/it][A[A



 14%|█▍        | 49/345 [05:40<34:12,  6.93s/it][A[A[A[A

 12%|█▏        | 40/345 [04:37<33:18,  6.55s/it][A[A



 14%|█▍        | 50/345 [05:45<31:01,  6.31s/it][A[A[A[A

 14%|█▍        | 48/345 [05:30<32:28,  6.56s/it][A[A

 12%|█▏        | 42/345 [04:48<31:33,  6.25s/it][A[A



 15%|█▍        | 51/345 [05:52<32:33,  6.64s/it][A[A[A[A



 14%|█▍        | 49/345 [05:35<29:56,  6.07s/it][A[A[A[A

 12%|█▏        | 43/345 [04:53<29:28,  5.86s/it][A[A



 14%|█▍        | 50/345 [05:43<32:15,  6.56s/it][A[A[A[A

 13%|█▎        | 44/345 [05:02<33:11,  6.62s/it][A[A



 15%|█▍        | 51/345 [05:47<29:00