In [1]:
import re

import pandas as pd
from bs4 import BeautifulSoup
from bs4.element import Tag

In [2]:
with open('FlightResults.html') as file:
    soup = BeautifulSoup(file, 'html.parser')

In [3]:
flight_card: Tag = soup.children.__next__()

In [4]:
def get_airlines(card: Tag) -> str:
    airlines: str = card \
        .find(class_=re.compile('LogoImage_container')) \
        .find('span').text.strip()
    
    return airlines

In [5]:
def get_departure_time(card: Tag) -> str:
    departure_time: str = card \
        .find(class_=re.compile('LegInfo_routePartialDepart')) \
        .find(class_=re.compile('LegInfo_routePartialTime')) \
        .find('span').text.strip()
        
    return departure_time

In [6]:
def get_arrival_time(card: Tag) -> str:
    arrival_time: str = card \
        .find(class_=re.compile('LegInfo_routePartialArrive')) \
        .find(class_=re.compile('LegInfo_routePartialTime')) \
        .find('span').text.strip()
        
    return arrival_time

In [7]:
def get_duration(card: Tag) -> str:
    duration: str = card \
        .find(class_=re.compile('Duration_duration')) \
        .text.strip()
        
    return duration

In [8]:
def get_departure_airport(card: Tag) -> str:
    departure_airport: str = card \
        .find(class_=re.compile('LegInfo_routePartialDepart')) \
        .find(class_=re.compile('LegInfo_routePartialCityTooltip')) \
        .text.strip()
        
    return departure_airport

In [9]:
def get_arrival_airport(card: Tag) -> str:
    arrival_airport: str = card \
        .find(class_=re.compile('LegInfo_routePartialArrive')) \
        .find(class_=re.compile('LegInfo_routePartialCityTooltip')) \
        .text.strip()
        
    return arrival_airport

In [10]:
def get_min_price(card: Tag) -> str:
    min_price: str = card \
        .find(class_=re.compile('Price_mainPriceContainer')) \
        .find('span').text.strip()
        
    return min_price

In [11]:
def get_stops_count(card: Tag) -> str:
    stops_count: str = card \
        .find(class_=re.compile('LegInfo_stopsLabelContainer')) \
        .find('span').text.strip()
        
    return stops_count

In [12]:
columns = {
    'airlines': get_airlines,
    'departure_time': get_departure_time,
    'arrival_time': get_arrival_time,
    'duration': get_duration,
    'departure_airport': get_departure_airport,
    'arrival_airport': get_arrival_airport,
    'min_price': get_min_price,
    'stops_count': get_stops_count
}

df = pd.concat(
    [
        pd.DataFrame(
            [[getter(child) for getter in columns.values()]],
            columns=columns.keys()
        )
        for child in soup.children
        if not (child.has_attr('id') or child.has_attr('class'))
    ],
    ignore_index=True
)

df

Unnamed: 0,airlines,departure_time,arrival_time,duration,departure_airport,arrival_airport,min_price,stops_count
0,S7 Airlines,13:45,14:50,3 ч. 05 мин.,DME,BER,7 362 ₽,Прямой
1,Aeroflot,12:00,12:55,2 ч. 55 мин.,SVO,BER,10 727 ₽,Прямой
2,KLM,05:30,09:25,5 ч. 55 мин.,SVO,BER,15 403 ₽,1 пересадка
3,Lufthansa,06:05,09:40,5 ч. 35 мин.,DME,BER,18 409 ₽,1 пересадка
4,KLM,17:55,21:55,6 ч.,SVO,BER,17 398 ₽,1 пересадка
...,...,...,...,...,...,...,...,...
265,Air France + KLM + Lufthansa,16:20,07:55,41 ч. 35 мин.,SVO,BER,178 750 ₽,4 пересадки
266,Belavia + KLM + Air France,19:55,11:40,41 ч. 45 мин.,DME,BER,199 770 ₽,4 пересадки
267,Belavia + KLM + Air France,20:15,11:40,41 ч. 25 мин.,SVO,BER,201 630 ₽,4 пересадки
268,Belavia + KLM + Lufthansa,10:05,07:55,47 ч. 50 мин.,SVO,BER,178 590 ₽,4 пересадки
