### Imports

In [156]:
import requests
import pandas as pd
import numpy as np
from bs4 import BeautifulSoup
# from concurrent.futures import ThreadPoolExecutor
from tqdm.contrib.concurrent import thread_map
pd.set_option('display.max_colwidth', None)

### GET AIRLINES URLS ###

In [157]:
url = 'https://www.pilotjobsnetwork.com/'
max_urls = 5

def get_airlines_urls(url):
    response = requests.get(url)
    soup = BeautifulSoup(response.text, 'html.parser')
    links = soup.find_all('a')
    for link in links:
        href = link.get('href')
        if href and 'jobs/' in href:
            airline_url = url + href
            airline_name = link.text.strip()
            yield airline_url, airline_name # Generator creation

### GET INFO FROM PAGES ###

In [158]:
def get_airline_tables(airline_tuple):
    airline_url, airline_name = airline_tuple
    try:
        list_tables = pd.read_html(airline_url)
        salary = list_tables[3].iloc[2:4,1].tolist()
        # iloc iloc[2:4,2] gets you the dates, but remove the salary ... smthing to 
        last_update = list_tables[3].iloc[2:4,2].tolist()
        return [airline_url, airline_name] + salary + last_update
    except Exception as e:
        print(e)
        try:
            return list_tables[3]
        except NameError:
            return airline_url

### MAP IT !! ###

In [159]:
list_salaries = list(thread_map(get_airline_tables, get_airlines_urls(url)))
# print dans un excel le list_salaries

0it [00:00, ?it/s]

'ascii' codec can't encode characters in position 18-19: ordinal not in range(128)


### DF AS OUTPUT ###

In [160]:
df_salary = pd.DataFrame([salary for salary in list_salaries if type(salary) == list])
df_salary

Unnamed: 0,0,1,2,3,4,5
0,https://www.pilotjobsnetwork.com/jobs/Eurowings_GmbH,7Mar - Germany (Ma) - Eurowings GmbH,"14.540 x 13 = 189.021,43 euro TRE = +1.700 euro/month TRI = +1.600 euro/month LTC = +800 euro/month (+variable allowances for each check/training event)","8.611,37 x 13 = 111.947,81 euro (or at least +1.300 euro/month after upgrade to CPT). That means all FO with high seniority, who used to be capped at rank 11, will move to CPT rank 4 after upgrade.",15/Jan/23,15/Jan/23
1,https://www.pilotjobsnetwork.com/jobs/Greater_Bay_Airlines,7Mar - Hong Kong (Ma) - Greater Bay Airlines,,$80000 HKD/MTH,,22/Aug/22
2,https://www.pilotjobsnetwork.com/jobs/Eurowings_Europe_Ltd,6Mar - Malta (Ma) - Eurowings Europe Ltd,AT 117.252 â¬ gross/year ES 126.948 â¬ gross/year CZ 60.872 â¬ gross/year SE 811.368 SEK gross/year (This does not include 13th and 14th additional payment),AT 73.296 â¬ gross/year ES 87.132 â¬ gross/year CZ 43.470 â¬ gross/year SE 521.592 SEK gross /year (This does not include 13th and 14th additional payment),6/Mar/23,6/Mar/23
3,https://www.pilotjobsnetwork.com/jobs/Avion_Express,6Mar - Lithuania (Ch) - Avion Express,Captains 5500 EUR basic pay + 50 EUR/BH. Loyalty bonus 2500 EUR paid twice per year (applicable from 2nd year of service),Winter basic for CPT 5000 + 50 EUR / BHÂ,4/Jan/23,23/Jan/23
4,https://www.pilotjobsnetwork.com/jobs/Corendon_Airlines,5Mar - Turkey (Ma) - Corendon Airlines,6250,5250,5/Mar/23,5/Mar/23
...,...,...,...,...,...,...
494,https://www.pilotjobsnetwork.com/jobs/FlyinGroup,13Aug - Belgium (Fr) - FlyinGroup,,,,
495,https://www.pilotjobsnetwork.com/jobs/MS_AVIATION,12Aug - Austria (Fr) - MS AVIATION,,,,
496,https://www.pilotjobsnetwork.com/jobs/ESMA_Aviation,12Aug - France (Re) - ESMA Aviation,,,,
497,https://www.pilotjobsnetwork.com/jobs/Compass_Airlines,9Aug - USA (Ma) - Compass Airlines,,,,


### ERRORS LIST ###

In [161]:
[salary for salary in list_salaries if type(salary) == str]

['https://www.pilotjobsnetwork.com/jobs/Air_CaraÃ¯bes_Atlantique',

### DF REWORK ###

In [162]:
# First name columns
df_salary.columns = ["URL", "AirlineName", "CaptMax", "CaptMin", "DateCaptMax", "DateCaptMin"]
df_salary

Unnamed: 0,URL,AirlineName,CaptMax,CaptMin,DateCaptMax,DateCaptMin
0,https://www.pilotjobsnetwork.com/jobs/Eurowings_GmbH,7Mar - Germany (Ma) - Eurowings GmbH,"14.540 x 13 = 189.021,43 euro TRE = +1.700 euro/month TRI = +1.600 euro/month LTC = +800 euro/month (+variable allowances for each check/training event)","8.611,37 x 13 = 111.947,81 euro (or at least +1.300 euro/month after upgrade to CPT). That means all FO with high seniority, who used to be capped at rank 11, will move to CPT rank 4 after upgrade.",15/Jan/23,15/Jan/23
1,https://www.pilotjobsnetwork.com/jobs/Greater_Bay_Airlines,7Mar - Hong Kong (Ma) - Greater Bay Airlines,,$80000 HKD/MTH,,22/Aug/22
2,https://www.pilotjobsnetwork.com/jobs/Eurowings_Europe_Ltd,6Mar - Malta (Ma) - Eurowings Europe Ltd,AT 117.252 â¬ gross/year ES 126.948 â¬ gross/year CZ 60.872 â¬ gross/year SE 811.368 SEK gross/year (This does not include 13th and 14th additional payment),AT 73.296 â¬ gross/year ES 87.132 â¬ gross/year CZ 43.470 â¬ gross/year SE 521.592 SEK gross /year (This does not include 13th and 14th additional payment),6/Mar/23,6/Mar/23
3,https://www.pilotjobsnetwork.com/jobs/Avion_Express,6Mar - Lithuania (Ch) - Avion Express,Captains 5500 EUR basic pay + 50 EUR/BH. Loyalty bonus 2500 EUR paid twice per year (applicable from 2nd year of service),Winter basic for CPT 5000 + 50 EUR / BHÂ,4/Jan/23,23/Jan/23
4,https://www.pilotjobsnetwork.com/jobs/Corendon_Airlines,5Mar - Turkey (Ma) - Corendon Airlines,6250,5250,5/Mar/23,5/Mar/23
...,...,...,...,...,...,...
494,https://www.pilotjobsnetwork.com/jobs/FlyinGroup,13Aug - Belgium (Fr) - FlyinGroup,,,,
495,https://www.pilotjobsnetwork.com/jobs/MS_AVIATION,12Aug - Austria (Fr) - MS AVIATION,,,,
496,https://www.pilotjobsnetwork.com/jobs/ESMA_Aviation,12Aug - France (Re) - ESMA Aviation,,,,
497,https://www.pilotjobsnetwork.com/jobs/Compass_Airlines,9Aug - USA (Ma) - Compass Airlines,,,,


### PIVOT TABLE

In [166]:
df_stack = (df_salary.set_index(['URL', 'AirlineName', 'DateCaptMax', 'DateCaptMin'])
   .rename_axis(['Top/Base'], axis=1)
   .stack(dropna=False) # Put True to remove NaNs
   .reset_index())
df_stack.columns = ['URL', 'AirlineName', 'DateCaptMax', 'DateCaptMin', 'Top/Base', 'Salary']
df_stack

Unnamed: 0,URL,AirlineName,DateCaptMax,DateCaptMin,Top/Base,Salary
0,https://www.pilotjobsnetwork.com/jobs/Eurowings_GmbH,7Mar - Germany (Ma) - Eurowings GmbH,15/Jan/23,15/Jan/23,CaptMax,"14.540 x 13 = 189.021,43 euro TRE = +1.700 euro/month TRI = +1.600 euro/month LTC = +800 euro/month (+variable allowances for each check/training event)"
1,https://www.pilotjobsnetwork.com/jobs/Eurowings_GmbH,7Mar - Germany (Ma) - Eurowings GmbH,15/Jan/23,15/Jan/23,CaptMin,"8.611,37 x 13 = 111.947,81 euro (or at least +1.300 euro/month after upgrade to CPT). That means all FO with high seniority, who used to be capped at rank 11, will move to CPT rank 4 after upgrade."
2,https://www.pilotjobsnetwork.com/jobs/Greater_Bay_Airlines,7Mar - Hong Kong (Ma) - Greater Bay Airlines,,22/Aug/22,CaptMax,
3,https://www.pilotjobsnetwork.com/jobs/Greater_Bay_Airlines,7Mar - Hong Kong (Ma) - Greater Bay Airlines,,22/Aug/22,CaptMin,$80000 HKD/MTH
4,https://www.pilotjobsnetwork.com/jobs/Eurowings_Europe_Ltd,6Mar - Malta (Ma) - Eurowings Europe Ltd,6/Mar/23,6/Mar/23,CaptMax,AT 117.252 â¬ gross/year ES 126.948 â¬ gross/year CZ 60.872 â¬ gross/year SE 811.368 SEK gross/year (This does not include 13th and 14th additional payment)
...,...,...,...,...,...,...
993,https://www.pilotjobsnetwork.com/jobs/ESMA_Aviation,12Aug - France (Re) - ESMA Aviation,,,CaptMin,
994,https://www.pilotjobsnetwork.com/jobs/Compass_Airlines,9Aug - USA (Ma) - Compass Airlines,,,CaptMax,
995,https://www.pilotjobsnetwork.com/jobs/Compass_Airlines,9Aug - USA (Ma) - Compass Airlines,,,CaptMin,
996,https://www.pilotjobsnetwork.com/jobs/Luxaviation_UK,3Aug - UK (Ch) - Luxaviation UK,3/Nov/14,11/Jan/08,CaptMax,"GBP53,200 + loyalty bonus (see above)"


### Select date (min/max) based on "CaptMin/CaptMax"

In [167]:
# Create a unique column for date depending on Top/Base column value. Extract Year
df_stack['Date_reworked'] = np.where(df_stack['Top/Base'] == 'CaptMax', df_stack['DateCaptMax'], df_stack['DateCaptMin'])
df_stack['Date_reworked'] = pd.to_datetime(df_stack['Date_reworked'])
df_stack['Year'] = df_stack['Date_reworked'].dt.year
df_stack['Year'] = df_stack['Year'].astype('Int64')


# Split column AirlineName on " - " seperator
df_stack[["DateFromLink", "Country", "Name"]] = df_stack["AirlineName"].str.split(" - ", expand=True)

# # Cleaning columns. Droping non necessary
clean_order = ['URL', 'Country', 'Name', 'Year', 'Top/Base', 'Salary']
df_stack = df_stack[clean_order]
df_stack.to_excel('airline_list.xlsx')
df_stack

Unnamed: 0,URL,Country,Name,Year,Top/Base,Salary
0,https://www.pilotjobsnetwork.com/jobs/Eurowings_GmbH,Germany (Ma),Eurowings GmbH,2023,CaptMax,"14.540 x 13 = 189.021,43 euro TRE = +1.700 euro/month TRI = +1.600 euro/month LTC = +800 euro/month (+variable allowances for each check/training event)"
1,https://www.pilotjobsnetwork.com/jobs/Eurowings_GmbH,Germany (Ma),Eurowings GmbH,2023,CaptMin,"8.611,37 x 13 = 111.947,81 euro (or at least +1.300 euro/month after upgrade to CPT). That means all FO with high seniority, who used to be capped at rank 11, will move to CPT rank 4 after upgrade."
2,https://www.pilotjobsnetwork.com/jobs/Greater_Bay_Airlines,Hong Kong (Ma),Greater Bay Airlines,,CaptMax,
3,https://www.pilotjobsnetwork.com/jobs/Greater_Bay_Airlines,Hong Kong (Ma),Greater Bay Airlines,2022,CaptMin,$80000 HKD/MTH
4,https://www.pilotjobsnetwork.com/jobs/Eurowings_Europe_Ltd,Malta (Ma),Eurowings Europe Ltd,2023,CaptMax,AT 117.252 â¬ gross/year ES 126.948 â¬ gross/year CZ 60.872 â¬ gross/year SE 811.368 SEK gross/year (This does not include 13th and 14th additional payment)
...,...,...,...,...,...,...
993,https://www.pilotjobsnetwork.com/jobs/ESMA_Aviation,France (Re),ESMA Aviation,,CaptMin,
994,https://www.pilotjobsnetwork.com/jobs/Compass_Airlines,USA (Ma),Compass Airlines,,CaptMax,
995,https://www.pilotjobsnetwork.com/jobs/Compass_Airlines,USA (Ma),Compass Airlines,,CaptMin,
996,https://www.pilotjobsnetwork.com/jobs/Luxaviation_UK,UK (Ch),Luxaviation UK,2014,CaptMax,"GBP53,200 + loyalty bonus (see above)"
