In [12]:
import requests
from bs4 import BeautifulSoup
from typing import Dict, List

In [24]:
class MeteoInfoParser:
    def __init__(self, *args, **kwargs):
        self.url_mainPage = "https://meteoinfo.ru/forecasts/"
        self.response_mainPage = requests.get(self.url_mainPage)
        self.soup_mainPage = BeautifulSoup(self.response_mainPage.content,
                                           'html.parser')
        self.pages = {
            'mainPage': self.url_mainPage,
            'countries': {},
            'regions': {},
            'cities': {},
        }

    def __call__(self, *args, **kwargs) -> Dict:

        self.__page_parser('countries', 'sel_country')

        for country_name, country_info in self.pages['countries'].items():
            if country_info['has_regions']:
                self.__page_parser('regions', 'sel_reg', has_regions = True, field_territory_name = country_name)

                for region_name, region_info in self.pages['regions'].items():
                    self.__page_parser('cities', 'sel_city', has_regions = True, field_territory_name = region_name)
            else:
                self.__page_parser('cities', 'sel_city', has_regions = False, field_territory_name = country_name)

        for city_name, city_info in self.pages['cities'].items():
            self.pages['cities'][city_name]['information'] = self.__weather_parser(city_info['url'])

        return self.pages

    def __page_parser(self, type_territory: str, field_id: str, has_regions: bool = False, field_territory_name: str = '', *args, **kwargs) -> None:
        url_value = 0
        url = self.pages['mainPage']
        soup = ''

        if type_territory == 'countries':
            url_value = 0
        elif type_territory == 'regions':
            url_value = 1
            url = self.pages['countries'][field_territory_name]['url'] + '/'
        elif type_territory == 'cities':
            url_value = -1
            if has_regions:
                url = self.pages['regions'][field_territory_name]['url'] + '/'
            else:
                url = self.pages['countries'][field_territory_name]['url'] + '/'
        else:
            raise Exception("Invalid url_value type")

        if type_territory == 'countries':
          soup = BeautifulSoup(requests.get(url).content, 'html.parser')
        else:
          soup = BeautifulSoup(requests.get(url[0:-1]).content, 'html.parser')

        for fields in soup.find_all('select', id=field_id):
            for field in fields.find_all('option'):

                pushed_url = url + field['value'].split("/")[url_value]
                pushed_request = requests.get(pushed_url)
                has_reg = True if BeautifulSoup(pushed_request.content, 'html.parser').find('select', id='sel_reg') else False

                self.pages[type_territory][field['value'].split("/")[url_value]] = {
                        'name': field.contents[0],
                        'url': pushed_url,
                        'response': pushed_request,
                        'has_regions': has_reg
                      }

                if type_territory == 'regions':
                    self.pages[type_territory][field['value'].split("/")[url_value]]['country'] = {
                        'name': field_territory_name,
                        'url': self.pages['countries'][field_territory_name]['url']
                    }
                elif type_territory == 'cities':
                    if has_regions:
                        self.pages[type_territory][field['value'].split("/")[url_value]]['region'] = {
                            'name': field_territory_name,
                            'url': self.pages['regions'][field_territory_name]['url']
                        }
                        self.pages[type_territory][field['value'].split("/")[url_value]]['country'] = {
                            'name': self.pages['regions'][field_territory_name]['country']['name'],
                            'url': self.pages['regions'][field_territory_name]['country']['url']
                        }
                    else:
                        self.pages[type_territory][field['value'].split("/")[url_value]]['country'] = {
                            'name': field_territory_name,
                            'url': self.pages['countries'][field_territory_name]['url']
                        }

    @staticmethod
    def __weather_parser(url, *args, **kwargs) -> List[Dict[str, str]]:

        response = requests.get(url)
        soup = BeautifulSoup(response.content, 'html.parser')
        information = []

        table_div = soup.find('div', class_='hidden-desktop')
        if not table_div:
            print("Таблица с погодными данными не найдена.")
            return information

        table = table_div.find('table')
        if not table:
            print("Таблица не найдена.")
            return information

        rows = table.find_all('tr')[2:]

        for row in rows:
            cells = row.find_all('td')

            date = ''
            temperature = ''
            for info in range(0, 5):
              try:
                date_cell = cells[info].find('nobr')
                temp_span = cells[info].find('span', class_='fc_temp_short')
                if temp_span:
                  temperature = temp_span.text.strip()
                if date_cell:
                  date = date_cell.text.strip()
              except IndexError:
                temperature = 'Unknown'
                date = 'Unknown'
                break

            day_period = 'Ночь' if 'Ночь' in row.text else 'День'
            precipitation = cells[-3].text.strip()

            wind_speed = cells[-2].text.strip()

            pressure = cells[-1].text.strip()

            if date != 'Unknown':
              if date == '':
                date = information[-1]['date']
              information.append({
                  'date': date,
                  'day_period': day_period,
                  'temperature': temperature,
                  'precipitation': precipitation,
                  'wind_speed': wind_speed,
                  'pressure': pressure
              })

        return information


meteo = MeteoInfoParser()
meteo()


KeyboardInterrupt: 