In [4]:
import requests
from bs4 import BeautifulSoup
url = f'https://shbabbek.com/natega/305600'
page = requests.get(url)
soup = BeautifulSoup(page.text.strip(), 'html.parser')

In [8]:
import requests
from bs4 import BeautifulSoup
import logging
from concurrent.futures import ThreadPoolExecutor
import csv

# Configure logging
logging.basicConfig(level=logging.INFO, format='%(levelname)s: %(message)s')

def fetch_student_result(student_id: str) -> tuple:
    """Fetches the student's result from the given URL and returns it as a tuple."""
    url = f'https://shbabbek.com/natega/{student_id}'
    
    try:
        # Fetch the page with a timeout to avoid infinite waiting
        response = requests.get(url, timeout=20)
        response.raise_for_status()  # Raise HTTPError for bad responses (4xx, 5xx)
        
        # Parse the response
        soup = BeautifulSoup(response.content, 'html.parser')
        
        # Extract specific student details
        result_item = soup.find('li', class_="col resultItem")
        if result_item:
            student_id = result_item.text.strip()
            name = soup.find_all('li')[3].text.strip() if len(soup.find_all('li')) > 3 else 'N/A'
            district = soup.find_all('li')[5].text.strip() if len(soup.find_all('li')) > 5 else 'N/A'
            specialty = soup.find_all('li')[7].text.strip() if len(soup.find_all('li')) > 7 else 'N/A'
            total_grade = soup.find_all('li')[-4].text.strip() if len(soup.find_all('li')) > 4 else 'N/A'
            
            # Extract subject grades
            firstLan = soup.find_all('li')[9].text.strip() if len(soup.find_all('li')) > 9 else 'N/A'
            secLan = soup.find_all('li')[10].text.strip() if len(soup.find_all('li')) > 10 else 'N/A'
            pureMaths = soup.find_all('li')[11].text.strip() if len(soup.find_all('li')) > 11 else 'N/A'
            history = soup.find_all('li')[12].text.strip() if len(soup.find_all('li')) > 12 else 'N/A'
            geo = soup.find_all('li')[13].text.strip() if len(soup.find_all('li')) > 13 else 'N/A'
            philLog = soup.find_all('li')[14].text.strip() if len(soup.find_all('li')) > 14 else 'N/A'
            psych = soup.find_all('li')[15].text.strip() if len(soup.find_all('li')) > 15 else 'N/A'
            chem = soup.find_all('li')[16].text.strip() if len(soup.find_all('li')) > 16 else 'N/A'
            bio = soup.find_all('li')[17].text.strip() if len(soup.find_all('li')) > 17 else 'N/A'
            geology = soup.find_all('li')[18].text.strip() if len(soup.find_all('li')) > 18 else 'N/A'
            appliedMaths = soup.find_all('li')[19].text.strip() if len(soup.find_all('li')) > 19 else 'N/A'
            phys = soup.find_all('li')[20].text.strip() if len(soup.find_all('li')) > 20 else 'N/A'

            logging.info(f"Student ID: {student_id}, Name: {name}, District: {district}, Specialty: {specialty}, Total Grade: {total_grade}")
            return (student_id, name, district, specialty, total_grade,
                    firstLan, secLan, pureMaths, history, geo, 
                    philLog, psych, chem, bio, geology, 
                    appliedMaths, phys)
        else:
            logging.warning(f"No result found for Student ID: {student_id}")
            return (student_id, "No result found", "N/A", "N/A", "N/A",
                    "N/A", "N/A", "N/A", "N/A", "N/A", 
                    "N/A", "N/A", "N/A", "N/A", "N/A", 
                    "N/A", "N/A")

    except requests.exceptions.RequestException as e:
        logging.error(f"Error fetching result for Student ID {student_id}: {e}")
        return (student_id, f"Error: {e}", "N/A", "N/A", "N/A",
                "N/A", "N/A", "N/A", "N/A", "N/A", 
                "N/A", "N/A", "N/A", "N/A", "N/A", 
                "N/A", "N/A")

def main():
    # List of student IDs (adjust the range as needed)
    student_ids = [str(i) for i in range(312233, 313231)]  # Modify range for more IDs

    # Prepare the CSV file to store the results
    with open('student_results.csv', mode='w', newline='', encoding='utf-8') as csvfile:
        writer = csv.writer(csvfile)
        writer.writerow(['Student ID', 'Name', 'District', 'Specialty', 'Total Grade',
                         'First Language', 'Second Language', 'Pure Maths',
                         'History', 'Geography', 'Philosophy/Logic', 'Psychology',
                         'Chemistry', 'Biology', 'Geology', 'Applied Maths', 'Physics'])  # Write the header row

        # Use ThreadPoolExecutor for concurrent requests
        with ThreadPoolExecutor(max_workers=1000) as executor:
            # Map fetch_student_result to student_ids and write results to the CSV
            for result in executor.map(fetch_student_result, student_ids):
                writer.writerow(result)  # Write each result (tuple) to the CSV file

if __name__ == "__main__":
    main()


ERROR: Error fetching result for Student ID 312257: HTTPSConnectionPool(host='shbabbek.com', port=443): Read timed out. (read timeout=20)
ERROR: Error fetching result for Student ID 312247: HTTPSConnectionPool(host='shbabbek.com', port=443): Read timed out. (read timeout=20)
ERROR: Error fetching result for Student ID 312325: HTTPSConnectionPool(host='shbabbek.com', port=443): Read timed out. (read timeout=20)
ERROR: Error fetching result for Student ID 312359: HTTPSConnectionPool(host='shbabbek.com', port=443): Read timed out. (read timeout=20)
ERROR: Error fetching result for Student ID 312332: HTTPSConnectionPool(host='shbabbek.com', port=443): Read timed out. (read timeout=20)
ERROR: Error fetching result for Student ID 312318: HTTPSConnectionPool(host='shbabbek.com', port=443): Read timed out. (read timeout=20)
ERROR: Error fetching result for Student ID 312376: HTTPSConnectionPool(host='shbabbek.com', port=443): Read timed out. (read timeout=20)
ERROR: Error fetching result for S