In [1]:
import requests
import pandas as pd
import time

def get_max_vacancies(search_term):
    url = 'https://api.hh.ru/vacancies'
    params = {
        'text': search_term,
        'area': '40',  # area 40 == Kazakhstan
        'per_page': '1',  # Only need to get one item to find the total count
        'page': 0
    }
    response = requests.get(url, params=params)
    response.raise_for_status()  # Raise an exception if the request failed
    return response.json()['found']  # Total number of vacancies found

def get_raw_vacancies(search_term, amount):
    raw_vacancies_data = []
    url = 'https://api.hh.ru/vacancies'
    
    total_pages = (amount // 10) + (1 if amount % 10 > 0 else 0)  # Calculate total pages to fetch

    for page in range(total_pages):
        params = {
            'text': search_term,
            'area': '40',  # area 40 == Kazakhstan
            'per_page': '10',
            'page': page
        }

        # Retry mechanism
        retries = 3
        for attempt in range(retries):
            try:
                response = requests.get(url, params=params)
                response.raise_for_status()  # Raise an exception if the request failed
                raw_vacancies_data.append(response.json())
                break  # Exit the retry loop if successful
            except requests.exceptions.HTTPError as e:
                if e.response.status_code == 403:
                    print(f"403 Forbidden for page {page}. Retrying...")
                    time.sleep(5)  # Wait before retrying
                else:
                    print(f"HTTP error occurred: {e}")
                    return []  # Return empty list on other errors
            except Exception as e:
                print(f"An error occurred: {e}")
                return []

    return raw_vacancies_data

def extract_salary(vacancy):
    if vacancy['salary'] is not None:
        salary_from = vacancy['salary']['from']
        salary_to = vacancy['salary']['to']
        currency = vacancy['salary']['currency']
    else:
        salary_from = None
        salary_to = None
        currency = None
    return salary_from, salary_to, currency

def extract_employment_type(vacancy):
    if vacancy['employment'] is not None:
        employment = vacancy['employment']['name']
    else:
        employment = None
    return employment

def extract_schedule(vacancy):
    if vacancy['schedule'] is not None:
        schedule = vacancy['schedule']['name']
    else:
        schedule = None
    return schedule 

def extract_experience(vacancy):
    if vacancy['experience'] is not None:
        experience = vacancy['experience']['name']
    else:
        experience = None
    return experience

def extract_address(vacancy):
    if vacancy['address'] is not None:
        address_raw = vacancy['address']['raw']
        address_latitude = vacancy['address']['lat']
        address_longitude = vacancy['address']['lng']
    else:
        address_raw = None
        address_latitude = None
        address_longitude = None
    return address_raw, address_latitude, address_longitude

def extract_profesisonal_roles(vacancy):
    if vacancy['professional_roles'] is not None:
        professional_roles = ", ".join([role['name'] for role in vacancy['professional_roles']])
    else:
        professional_roles = None
    return professional_roles

def save_to_csv(data, filename):
    columns = [
        'Job Title',
        'Company Name',
        'Company ID',
        'Employment Type',
        'Schedule',
        'Salary From',
        'Salary To',
        'Salary Currency',
        'Professional Roles',
        'Required Experience',
        'Region',
        'Full Address',
        'Address Latitude',
        'Address Longitude',
        'Apply URL',
        'Vacancy URL',
        'Publication Time',
        'Archived',
        'Requirements',
        'Responsibilities'
    ]
    df = pd.DataFrame(data, columns=columns)
    df.to_csv(filename, index=True, encoding='utf-8')

def extract_information(search_term):
    clear_data = []
    amount = get_max_vacancies(search_term)

    for elem in get_raw_vacancies(search_term, amount):
        page = elem['items']
        for vacancy in page:
            
            salary_from, salary_to, currency = extract_salary(vacancy)
            employment = extract_employment_type(vacancy)
            experience = extract_experience(vacancy)
            schedule = extract_schedule(vacancy)
            address_raw, address_latitude, address_longitude = extract_address(vacancy)
            professional_roles = extract_profesisonal_roles(vacancy)
            
            clear_data.append([
                vacancy['name'],
                vacancy['employer']['name'],
                vacancy['employer']['id'],
                employment,
                schedule,
                salary_from,
                salary_to,
                currency,
                professional_roles,
                experience,
                vacancy['area']['name'],
                address_raw,
                address_latitude,
                address_longitude,
                vacancy['apply_alternate_url'],
                vacancy['alternate_url'],
                vacancy['published_at'],
                vacancy['archived'],
                vacancy['snippet']['requirement'],
                vacancy['snippet']['responsibility']
            ])

    return clear_data

if __name__ == '__main__':
    print('Please enter vacancy name:')
    search_term = input().strip()
    extracted_data = extract_information(search_term)

    filename = f'hh_kz_vacancies_{search_term}.csv'
    save_to_csv(extracted_data, filename)
    print(f'Successfully saved {len(extracted_data)} vacancies to {filename}')

Please enter vacancy name:
учитель
403 Forbidden for page 119. Retrying...
403 Forbidden for page 119. Retrying...
403 Forbidden for page 119. Retrying...
Successfully saved 1385 vacancies to hh_kz_vacancies_учитель.csv
