# UNESCO

## UNESCO: World Heritage List

In [1]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
from tqdm import tqdm

# Page URL
url = 'https://whc.unesco.org/en/list'

# Make the GET request to the page
response = requests.get(url)

# Parse HTML with BeautifulSoup
soup = BeautifulSoup(response.text, 'html.parser')

# Find all <h4> elements containing country names.
countries = soup.find_all('h4', class_=None)

# List to store data
data = []

# Initialize the progress bar
progress_bar = tqdm(total=len(countries), desc="Scraping")

# Iterate on countries
for country in countries:
    # Extract the country name
    country_name = country.find('a').get_text(strip=True)
    
    # Find all <li> elements within the <div> section containing the protected assets.
    sites = country.find_next_sibling('div', class_='list_site').find_all('li')
    
    # Iterate on protected assets and add them to the data list
    for site in sites:
        site_name = site.get_text(strip=True)
        # Obtain the type of danger
        danger_type = ""
        if "cultural_danger" in site.get('class', []):
            danger_type = "en peligro"
        elif "mixed" in site.get('class', []):
            danger_type = "mixto"
        elif "cultural" in site.get('class', []):
            danger_type = "cultural"
        elif "natural" in site.get('class', []):
            danger_type = "natural"
        
        data.append({'País': country_name, 'Bien Protegido': site_name, 'Catalogación': danger_type})
    
    # Update the progress bar
    progress_bar.update(1)

# End progress bar
progress_bar.close()

# Create a DataFrame with the data
WHLdf = pd.DataFrame(data)


WHLdf

Scraping: 100%|████████████████████████████| 169/169 [00:00<00:00, 15348.12it/s]


Unnamed: 0,País,Bien Protegido,Catalogación
0,Afghanistan,Minaret and Archaeological Remains of Jam,en peligro
1,Afghanistan,Cultural Landscape and Archaeological Remains ...,en peligro
2,Albania,Natural and Cultural Heritage of the Ohrid reg...,mixto
3,Albania,Butrint,cultural
4,Albania,Historic Centres of Berat and Gjirokastra,cultural
...,...,...,...
1298,Zimbabwe,"Mana Pools National Park, Sapi and Chewore Saf...",natural
1299,Zimbabwe,Great Zimbabwe National Monument,cultural
1300,Zimbabwe,Khami Ruins National Monument,cultural
1301,Zimbabwe,Mosi-oa-Tunya / Victoria Falls#*,natural


In [2]:
unique_values_WHLdf = WHLdf['País'].unique()
unique_values_WHLdf

array(['Afghanistan', 'Albania', 'Algeria', 'Andorra', 'Angola',
       'Antigua and Barbuda', 'Argentina', 'Armenia', 'Australia',
       'Austria', 'Azerbaijan', 'Bahrain', 'Bangladesh', 'Barbados',
       'Belarus', 'Belgium', 'Belize', 'Benin',
       'Bolivia (Plurinational State of)', 'Bosnia and Herzegovina',
       'Botswana', 'Brazil', 'Bulgaria', 'Burkina Faso', 'Cabo Verde',
       'Cambodia', 'Cameroon', 'Canada', 'Central African Republic',
       'Chad', 'Chile', 'China', 'Colombia', 'Congo', 'Costa Rica',
       "Côte d'Ivoire", 'Croatia', 'Cuba', 'Cyprus', 'Czechia',
       "Democratic People's Republic of Korea",
       'Democratic Republic of the Congo', 'Denmark', 'Dominica',
       'Dominican Republic', 'Ecuador', 'Egypt', 'El Salvador', 'Eritrea',
       'Estonia', 'Ethiopia', 'Fiji', 'Finland', 'France', 'Gabon',
       'Gambia', 'Georgia', 'Germany', 'Ghana', 'Greece', 'Guatemala',
       'Guinea', 'Haiti', 'Holy See', 'Honduras', 'Hungary', 'Iceland',
       'In

## UNESCO: List of World Heritage in Danger

In [3]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
from tqdm import tqdm  # Import tqdm for progress bar

url = 'https://whc.unesco.org/es/list/'
response = requests.get(url)
soup = BeautifulSoup(response.text, 'html.parser')

# List for storing the data of the protected assets of each country
datos_bienes_protegidos = []

# Find all links containing countries
links = soup.find_all('a', href=lambda href: href and '/es/list/?iso=' in href)

# Configuring the progress bar
barra_progreso = tqdm(total=len(links), desc='Descargando datos')

for link in links:
    country_url = 'https://whc.unesco.org' + link['href']
    country_name = link.text.strip()
    
    country_response = requests.get(country_url)
    country_soup = BeautifulSoup(country_response.text, 'html.parser')
    
    # Find all the items in the list containing the protected properties
    heritage_items = country_soup.find_all('li', class_='cultural_danger')
    
    # Store data in the list
    for item in heritage_items:
        item_name = item.text.strip()
        datos_bienes_protegidos.append({'País': country_name, 'Bien Protegido': item_name})
    
    # Update the progress bar
    barra_progreso.update(1)


barra_progreso.close()

# Create DataFrame from the data list
df_bienes_protegidos = pd.DataFrame(datos_bienes_protegidos)


df_bienes_protegidos


Descargando datos: 100%|██████████████████████| 169/169 [00:40<00:00,  4.15it/s]


Unnamed: 0,País,Bien Protegido
0,Afganistán,Minarete y vestigios arqueológicos de Jam (2002)
1,Afganistán,Paisaje cultural y vestigios arqueológicos del...
2,Austria,Centro histórico de Viena (2001)
3,Bolivia (Estado Plurinacional de),Ciudad de Potosí­ (1987)
4,Egipto,Abu Mena (1979)
...,...,...
74,Venezuela (República Bolivariana de),Coro y su puerto (1993)
75,Yemen,Ciudad vieja amurallada de Shibam (1982)
76,Yemen,Ciudad vieja de Sana’a (1986)
77,Yemen,Ciudad histórica de Zabid (1993)


In [4]:
unique_values = df_bienes_protegidos['País'].unique()
unique_values

array(['Afganistán', 'Austria', 'Bolivia (Estado Plurinacional de)',
       'Egipto', 'Estado de Palestina', 'Iraq',
       'Jerusalem (Site proposed by Jordan)', 'Líbano', 'Libia', 'Malí',
       'Micronesia (Estados Federados de)', 'Panamá', 'Perú',
       'República Árabe Siria', 'Rumania', 'Serbia', 'Ucrania',
       'Uzbekistán', 'Venezuela (República Bolivariana de)', 'Yemen'],
      dtype=object)

# List of flight arrivals by airport in UNESCO WHL: International

In [5]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
from tqdm import tqdm

def extraer_datos_vuelos(aeropuertos):
    datos_totales = []
    for aeropuerto in tqdm(aeropuertos, desc="Extrayendo datos", unit="aeropuerto"):
        url = f'https://es.flightaware.com/live/airport/{aeropuerto}'
        response = requests.get(url)
        soup = BeautifulSoup(response.text, 'html.parser')

        # Find the arrivals table
        tabla_llegadas = soup.find('table', {'data-type': 'arrivals'})
        if not tabla_llegadas:
            print(f"No se encontró la tabla de llegadas para el aeropuerto {aeropuerto}.")
            continue

        # Extract arrivals data
        filas = tabla_llegadas.find_all('tr', class_=lambda x: x and 'smallrow' in x)
        for fila in filas:
            elementos = fila.find_all('td')
            if len(elementos) >= 6:
                identificacion = elementos[0].text.strip()
                tipo = elementos[1].text.strip()
                origen = elementos[2].text.strip()
                hora_salida = elementos[3].text.strip()
                hora_llegada = elementos[5].text.strip()
                datos_totales.append({
                    'Aeropuerto': aeropuerto,
                    'Identificación': identificacion,
                    'Tipo': tipo,
                    'Origen': origen,
                    'Hora de salida': hora_salida,
                    'Hora de llegada': hora_llegada
                })

    return datos_totales

# List of airport codes
aeropuertos = ['KBL', 'TIA', 'ALG', 'LAD', 'ANU', 'EZE', 'EVN', 'SYD', 'VIE', 'GYD',
               'BAH', 'DAC', 'BGI', 'MSQ', 'BRU', 'BZE', 'COO', 'VVI', 'SJJ', 'GBE',
               'GRU', 'SOF', 'OUA', 'SID', 'PNH', 'DLA', 'YYZ', 'BGF', 'NDJ', 'SCL',
               'PEK', 'BOG', 'FIH', 'SJO', 'ABJ', 'ZAG', 'HAV', 'LCA', 'PRG', 'FIH',
               'CPH', 'SDQ', 'UIO', 'CAI', 'SAL', 'TLL', 'ADD', 'NAN', 'HEL', 'CDG',
               'LBV', 'BJL', 'TBS', 'FRA', 'ACC', 'ATH', 'GUA', 'CKY', 'PAP', 'SAP',
               'BUD', 'KEF', 'DEL', 'CGK', 'IKA', 'BGW', 'DUB', 'TLV', 'FCO', 'MBJ',
               'HND', 'AMM', 'ALA', 'NBO', 'FRU', 'VTE', 'RIX', 'BEY', 'MSU', 'MJI',
               'VNO', 'LUX', 'TNR', 'LLW', 'KUL', 'BKO', 'MLA', 'NKC', 'MRU', 'MEX',
               'PNI', 'ULN', 'TGD', 'CMN', 'MPM', 'RGN', 'WDH', 'KTM', 'AMS', 'AKL',
               'MGA', 'NIM', 'LOS', 'SKP', 'OSL', 'MCT', 'ISB', 'ROR', 'PTY', 'POM',
               'ASU', 'LIM', 'MNL', 'WAW', 'LIS', 'DOH', 'ICN', 'KIV', 'OTP', 'SVO',
               'KGL', 'SKB', 'UVF', 'RUH', 'DKR', 'BEG', 'SEZ', 'SIN', 'BTS', 'LJU',
               'HIR', 'JNB', 'MAD', 'CMB', 'GZA', 'KRT', 'PBM', 'ARN', 'ZRH', 'DAM',
               'DYU', 'BKK', 'LFW', 'TUN', 'IST', 'ASB', 'EBB', 'KBP', 'DXB', 'LHR',
               'JRO', 'LAX', 'MVD', 'TAS', 'VLI', 'CCS', 'SGN', 'SNA', 'LUN', 'HRE']  # Cód. IATA aeropuertos

datos_vuelos = extraer_datos_vuelos(aeropuertos)

# Create DataFrame with arrivals data
if datos_vuelos:
    datos_vuelos_df = pd.DataFrame(datos_vuelos)
    print(datos_vuelos_df)
else:
    print("No se pudieron obtener datos de llegadas para los aeropuertos")

datos_vuelos_df

Extrayendo datos: 100%|███████████████| 160/160 [03:09<00:00,  1.18s/aeropuerto]

     Aeropuerto Identificación  Tipo                                  Origen  \
0           KBL         ABY718  A320                  Int'l de Sharjah (SHJ)   
1           KBL         FDB301  B38M                    Int'l de Dubái (DXB)   
2           KBL         KMF904   737                 Int'l de Abu Dabi (AUH)   
3           KBL         AFG402   737                    Int'l de Dubái (DXB)   
4           KBL         KMF902  A343                    Int'l de Dubái (DXB)   
...         ...            ...   ...                                     ...   
2832        HRE          FJW6V  E135  Cerca de Hwange (Hwange National Park)   
2833        HRE         LNK382  E135       Int'l de la Ciudad del Cabo (CPT)   
2834        HRE         AZW456  B762              Int'l Soekarno-Hatta (CGK)   
2835        HRE          MWI34  DH8B      Lusaka International Airport (LUN)   
2836        HRE         ETH873  A359                        Int'l Bole (ADD)   

     Hora de salida    Hora de llegada 




Unnamed: 0,Aeropuerto,Identificación,Tipo,Origen,Hora de salida,Hora de llegada
0,KBL,ABY718,A320,Int'l de Sharjah (SHJ),04:17a +04,11:20a +0430
1,KBL,FDB301,B38M,Int'l de Dubái (DXB),04:59a +04,08:05a +0430
2,KBL,KMF904,737,Int'l de Abu Dabi (AUH),03:37a +04,06:50AM +0430 (?)
3,KBL,AFG402,737,Int'l de Dubái (DXB),03:24a +04,06:34AM +0430 (?)
4,KBL,KMF902,A343,Int'l de Dubái (DXB),03:18a +04,06:05AM +0430 (?)
...,...,...,...,...,...,...
2832,HRE,FJW6V,E135,Cerca de Hwange (Hwange National Park),01:38p CAT,02:15p CAT
2833,HRE,LNK382,E135,Int'l de la Ciudad del Cabo (CPT),11:27a SAST,02:04p CAT
2834,HRE,AZW456,B762,Int'l Soekarno-Hatta (CGK),08:02a WIB,01:35p CAT
2835,HRE,MWI34,DH8B,Lusaka International Airport (LUN),12:32p CAT,01:14p CAT


### Calculate the number of flights by origin in rankings

In [145]:
# Calculate the number of flights by origin
ranking_destino = datos_vuelos_df['Origen'].value_counts()

# Sort ranking by number of flights (from highest to lowest)
ranking_destino = ranking_origen.sort_values(ascending=False)


ranking_destino

                                          194
Istanbul Airport (IST)                     70
Int'l de Dubái (DXB)                       48
OR Tambo Int'l (JNB)                       45
Int'l de Miami (MIA)                       37
                                         ... 
Varanasi (VNS)                              1
Stavanger, Sola (SVG)                       1
Banjul Int'l (Yundum Int'l) (BJL)           1
Syamsudin Noor (BDJ)                        1
Cerca de Hwange (Hwange National Park)      1
Name: Origen, Length: 856, dtype: int64

## Check data: flights by airport

In [6]:
# Check the flights received by a specific airport:

filtro_eze = datos_vuelos_df[datos_vuelos_df['Aeropuerto'] == 'EZE']

# Count how many times 'EZE' appears in the column 'Airport'.
count_eze = len(filtro_eze)

# Count how many times 'EZE' appears in the 'Origin' column within the filtered DataFrame.
count_eze_origen = filtro_eze['Origen'].value_counts().get('EZE', 0)

print("El valor 'EZE' aparece {} veces en la columna 'Aeropuerto'.".format(count_eze))
print("En la columna 'Origen' correspondiente a 'EZE' hay {} datos.".format(count_eze_origen))

El valor 'EZE' aparece 20 veces en la columna 'Aeropuerto'.
En la columna 'Origen' correspondiente a 'EZE' hay 0 datos.


# List of flight arraivals by airport in UNESCO WHL: Spain

In [7]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
from tqdm import tqdm

def extraer_datos_vuelos_nacionales(aeropuertos):
    datos_totales = []
    for aeropuerto in tqdm(aeropuertos, desc="Extrayendo datos", unit="aeropuerto"):
        url = f'https://es.flightaware.com/live/airport/{aeropuerto}'
        response = requests.get(url)
        soup = BeautifulSoup(response.text, 'html.parser')

        # Find the arrivals table
        tabla_llegadas = soup.find('table', {'data-type': 'arrivals'})
        if not tabla_llegadas:
            print(f"No se encontró la tabla de llegadas para el aeropuerto {aeropuerto}.")
            continue

        # Extract arrivals data
        filas = tabla_llegadas.find_all('tr', class_=lambda x: x and 'smallrow' in x)
        for fila in filas:
            elementos = fila.find_all('td')
            if len(elementos) >= 6:
                identificacion = elementos[0].text.strip()
                tipo = elementos[1].text.strip()
                origen = elementos[2].text.strip()
                hora_salida = elementos[3].text.strip()
                hora_llegada = elementos[5].text.strip()
                datos_totales.append({
                    'Aeropuerto': aeropuerto,
                    'Identificación': identificacion,
                    'Tipo': tipo,
                    'Origen': origen,
                    'Hora de salida': hora_salida,
                    'Hora de llegada': hora_llegada
                })

    return datos_totales

# List of national airport codes
aeropuertos_nacionales = ['LCG', 'BJZ', 'ODB', 'IBZ', 'MAD', 'SLM', 'TCI']

datos_vuelos_nacionales = extraer_datos_vuelos_nacionales(aeropuertos_nacionales)

# Create DataFrame with arrivals data
if datos_vuelos_nacionales:
    datos_vuelos_nacionales_df = pd.DataFrame(datos_vuelos_nacionales)
    print(datos_vuelos_nacionales_df)
else:
    print("No se pudieron obtener datos de llegadas para los aeropuertos nacionales")

datos_vuelos_nacionales_df

Extrayendo datos: 100%|███████████████████| 7/7 [00:07<00:00,  1.07s/aeropuerto]

No se encontró la tabla de llegadas para el aeropuerto TCI.
   Aeropuerto Identificación  Tipo                Origen Hora de salida  \
0         LCG        VOE3817  A319        Valencia (VLC)    08:40a CEST   
1         LCG        ANE8978  CRJX  Madrid-Barajas (MAD)    07:34a CEST   
2         LCG        VLG1292  A321       Barcelona (BCN)    06:42a CEST   
3         LCG        OVA7233  B738  Madrid-Barajas (MAD)    06:57a CEST   
4         LCG         IBE516  A320  Madrid-Barajas (MAD)    10:21p CEST   
..        ...            ...   ...                   ...            ...   
90        SLM                                                             
91        SLM                                                             
92        SLM                                                             
93        SLM                                                             
94        SLM                                                             

   Hora de llegada  
0      10:00a CEST




Unnamed: 0,Aeropuerto,Identificación,Tipo,Origen,Hora de salida,Hora de llegada
0,LCG,VOE3817,A319,Valencia (VLC),08:40a CEST,10:00a CEST
1,LCG,ANE8978,CRJX,Madrid-Barajas (MAD),07:34a CEST,08:30a CEST
2,LCG,VLG1292,A321,Barcelona (BCN),06:42a CEST,08:13a CEST
3,LCG,OVA7233,B738,Madrid-Barajas (MAD),06:57a CEST,07:56a CEST
4,LCG,IBE516,A320,Madrid-Barajas (MAD),10:21p CEST,11:16p CEST
...,...,...,...,...,...,...
90,SLM,,,,,
91,SLM,,,,,
92,SLM,,,,,
93,SLM,,,,,


# Import data tables in IATA data format

In [8]:
# table name airports
import requests
from bs4 import BeautifulSoup
import pandas as pd

# Page URL
url = "https://www.flights.com.ar/codigos-aeropuertos-del-mundo-iata-icao/"

# Perform GET request to the URL
response = requests.get(url)

# Check if the request was successful
if response.status_code == 200:
    # Parse HTML content with BeautifulSoup
    soup = BeautifulSoup(response.content, "html.parser")
    
    # Find all <b> elements containing airport codes.
    codigos_aeropuertos = soup.find_all("b")
    
    # Create lists for storing airport codes and airport names
    codigos = []
    nombres = []
    
    # Iterate over the <b> elements to obtain the airport codes.
    for codigo in codigos_aeropuertos:
        codigos.append(codigo.text.strip().split(" ")[0])  # Get only the code
        # Get the following airport name
        nombre_aeropuerto = codigo.find_next_sibling(text=True).strip()
        nombres.append(nombre_aeropuerto)
    
    # Create a DataFrame with the data
    df_aeropuertos = pd.DataFrame({"Código de Aeropuerto": codigos, "Nombre de Aeropuerto": nombres})
    
else:
    print("Error al obtener el contenido de la página.")

In [9]:
df_aeropuertos

Unnamed: 0,Código de Aeropuerto,Nombre de Aeropuerto
0,AAE,"Annaba, Algeria – Les Salines"
1,AAL,"Aalborg, Denmark – Aalborg"
2,AAR,"Aarhus, Denmark – Tirstrup"
3,ABE,"Allentown, PA, USA – Allentown-Bethlehem-Easto..."
4,ABI,"Abilene, TX, USA – Municipal"
...,...,...
1898,ZQN,"Queenstown, New Zealand – Frankton"
1899,ZRF,"Rockford, IL, USA"
1900,ZRH,"Zurich, Switzerland – Zurich"
1901,ZSA,"San Salvador, Bahamas"


In [10]:
# Importar equivalencia de paises y códigos IATA

!pip install tabula-py



In [11]:
# Create df with the columns of the pdf of the IATA codes by country to cross-reference with the column of countries with protected properties UNESCO
import pandas as pd
import tabula

# PDF file path
file_path = "/Users/juanfransf/IRONHACK/COURSE/GitHub_LESSONS/3. WEEK 3/MINI PROJECT WEEK 3/DB axuliar/pdf_ciudad codigo IATA país.pdf"

# Extract data from PDF
IATA_country_df = tabula.read_pdf(file_path, pages='all')

# Combine the extracted data in a single DataFrame
IATA_country_df = pd.concat(IATA_country_df)

# Rename columns 
IATA_country_df.columns = ['Aeropuerto', 'País', 'IATA']

# Delete rows with null values
IATA_country_df = IATA_country_df.dropna()

# Reset the DataFrame index
IATA_country_df = IATA_country_df.reset_index(drop=True)


IATA_country_df

Error importing jpype dependencies. Fallback to subprocess.
No module named 'jpype'


Unnamed: 0,Aeropuerto,País,IATA
0,Aarhus,Denmark,AAR
1,Abadan,Iran,ABD
2,Abeche,Chad,AEH
3,Dhabi,Arab,AUH
4,Azikiwe,Nigeria,ABV
...,...,...,...
936,Zakynthos,Greece,ZTH
937,Zaragoza,Spain,ZAZ
938,Zhob,Pakistan,PZH
939,Zinder,Niger,ZND


## Cross table UNESCO countries with IATA code per country

In [12]:
# Cross data from WHLdf - df of UNESCO world heritage properties -> cross column Country from - IATA_country_df
# to set up a df including UNESCO countries with IATA codes to extract then the number of daily flights.
import pandas as pd

# Cross the DataFrames based on the column 'Country'
merged_df = pd.merge(WHLdf, IATA_country_df, left_on='País', right_on='País', how='inner')

IATAxPaís_df = merged_df[['IATA', 'País', 'Aeropuerto']]


In [13]:
IATAxPaís_df

Unnamed: 0,IATA,País,Aeropuerto
0,AAE,Algeria,Annaba
1,GJL,Algeria,Jijel
2,AAE,Algeria,Annaba
3,GJL,Algeria,Jijel
4,AAE,Algeria,Annaba
...,...,...,...
14828,BUQ,Zimbabwe,Bulawayo
14829,GWE,Zimbabwe,Gweru
14830,HWN,Zimbabwe,National
14831,MVZ,Zimbabwe,Masvingo


In [14]:
unique_values = IATAxPaís_df['IATA'].unique()
unique_values

array(['AAE', 'GJL', 'ALV', 'BUG', 'CAB', 'JMB', 'LAD', 'UGO', 'EZE',
       'COR', 'JUJ', 'JNI', 'MDZ', 'ROS', 'BRC', 'RSA', 'EVN', 'ADL',
       'ALH', 'ABX', 'AYR', 'BNK', 'ABM', 'BLT', 'ZBO', 'BNE', 'BHQ',
       'BME', 'BDB', 'CNS', 'CBR', 'CVQ', 'CSI', 'CED', 'CES', 'CMQ',
       'KCE', 'CTN', 'OOM', 'DBY', 'DRW', 'DRB', 'DPO', 'DBO', 'DYA',
       'EDR', 'EMD', 'EPR', 'GEX', 'GET', 'GLT', 'OOL', 'GOV', 'GKL',
       'GFF', 'GTE', 'GYP', 'HLT', 'HBA', 'HMH', 'IGH', 'IFL', 'JAD',
       'JCK', 'JUN', 'KGI', 'KTA', 'KRB', 'KTR', 'KGC', 'KNX', 'LVO',
       'LER', 'LNO', 'LSY', 'LRE', 'MKY', 'MTL', 'MIM', 'MQL', 'MOV',
       'MRZ', 'MYA', 'ISA', 'NAA', 'NRA', 'ZNE', 'NSA', 'OAG', 'PER',
       'PTJ', 'NSO', 'SIX', 'SOI', 'SYD', 'TMW', 'TRO', 'TEM', 'TPR',
       'TSV', 'WGA', 'WEI', 'WYA', 'WHM', 'WUN', 'UMR', 'WYN', 'GRZ',
       'INN', 'KLU', 'LNZ', 'SZG', 'VIE', 'BAK', 'BGI', 'MSQ', 'ANR',
       'BRU', 'LGG', 'GBE', 'JWA', 'MUB', 'AJU', 'BVB', 'CGB', 'CWB',
       'JCM', 'JLS',

# Tourism data with influence in historical and artistic heritage goods

## Tourism inbound data by country in WHL

In [15]:
import pandas as pd

# Specify the path to the CSV file
url = '/Users/juanfransf/IRONHACK/COURSE/GitHub_LESSONS/3. WEEK 3/MINI PROJECT WEEK 3/DB axuliar/datos de turismo/datos de recepción de turistas por países /datos de llegadas de turistas por paises.csv'

# Specify ';' delimiter when reading CSV file
tourism_data_df = pd.read_csv(url, sep=';')

# Display the loaded DataFrame
print(tourism_data_df)

              Países  Fecha Llegadas anuales      Var.
0         España [+]   2023       85.169.050   18,85 %
1       Alemania [+]   2019       39.563.217    1,75 %
2    Reino Unido [+]   2019       39.417.975    8,54 %
3        Francia [+]   2018       89.322.000    2,96 %
4         Italia [+]   2019       64.512.919    4,78 %
..               ...    ...              ...       ...
184        Samoa [+]   2019          172.284    5,05 %
185        Yemen [+]   2015          366.700  -63,96 %
186    Sudáfrica [+]   2019       10.228.593   -2,32 %
187       Zambia [+]   2019        1.266.000   18,10 %
188     Zimbabue [+]   2018        2.580.000    6,48 %

[189 rows x 4 columns]


In [16]:
tourism_data_df

Unnamed: 0,Países,Fecha,Llegadas anuales,Var.
0,España [+],2023,85.169.050,"18,85 %"
1,Alemania [+],2019,39.563.217,"1,75 %"
2,Reino Unido [+],2019,39.417.975,"8,54 %"
3,Francia [+],2018,89.322.000,"2,96 %"
4,Italia [+],2019,64.512.919,"4,78 %"
...,...,...,...,...
184,Samoa [+],2019,172.284,"5,05 %"
185,Yemen [+],2015,366.700,"-63,96 %"
186,Sudáfrica [+],2019,10.228.593,"-2,32 %"
187,Zambia [+],2019,1.266.000,"18,10 %"


In [56]:
# Cleaning and adaptation of columns

tourism_data_df['Países'] = tourism_data_df['Países'].str.replace(r'\s*\[\s*\+\s*\]\s*', '')
tourism_data_df = tourism_data_df.rename(columns={'Llegadas anuales': 'Llegadas_anuales'})
tourism_data_df['Llegadas_anuales'] = tourism_data_df['Llegadas_anuales'].astype(int)

  tourism_data_df['Países'] = tourism_data_df['Países'].str.replace(r'\s*\[\s*\+\s*\]\s*', '')


In [51]:
# Ranking of countries included in the WHL ordered from highest to lowest by tourist arrivals

tourism_data_df

Unnamed: 0,Países,Fecha,Llegadas_anuales,Var.
0,España,2023,85.169.050,"18,85 %"
1,Alemania,2019,39.563.217,"1,75 %"
2,Reino Unido,2019,39.417.975,"8,54 %"
3,Francia,2018,89.322.000,"2,96 %"
4,Italia,2019,64.512.919,"4,78 %"
...,...,...,...,...
184,Samoa,2019,172.284,"5,05 %"
185,Yemen,2015,366.700,"-63,96 %"
186,Sudáfrica,2019,10.228.593,"-2,32 %"
187,Zambia,2019,1.266.000,"18,10 %"


## Spanish Tourism data by origin in WHL

### Domestic tourism (CCAA)

In [19]:
import pandas as pd

# Specify the path to the Excel file
excel_path = '/Users/juanfransf/IRONHACK/COURSE/GitHub_LESSONS/3. WEEK 3/MINI PROJECT WEEK 3/DB axuliar/datos de turismo/turismo españa/turismo_interno_prov_ccaa.xlsx'

# Read the Excel file and load the data into a DataFrame
tourism_inbound_data_df = pd.read_excel(excel_path)

# Display the loaded DataFrame
tourism_inbound_data_df

Unnamed: 0,AÑO,MES,CCAA_ORIGEN,PROVINCIA_ORIGEN,CCAA_DESTINO,PROVINCIA_DESTINO,TURISTAS,PERNOCTACIONES,ESTANCIA_MEDIA
0,2021,1,Andalucía,Almería,Andalucía,Cádiz,1993,12504,6.3
1,2021,1,Andalucía,Almería,Andalucía,Córdoba,1976,11593,5.9
2,2021,1,Andalucía,Almería,Andalucía,Granada,36427,126961,3.5
3,2021,1,Andalucía,Almería,Andalucía,Huelva,964,5616,5.8
4,2021,1,Andalucía,Almería,Andalucía,Jaén,10277,46749,4.5
...,...,...,...,...,...,...,...,...,...
96412,2024,2,Región de Murcia,Murcia,Melilla,Melilla,160,922,5.8
96413,2024,2,Región de Murcia,Murcia,País Vasco,Araba/Álava,638,3854,6.0
96414,2024,2,Región de Murcia,Murcia,País Vasco,Bizkaia,881,7299,8.3
96415,2024,2,Región de Murcia,Murcia,País Vasco,Gipuzkoa,555,7421,13.4


### Domestic tourism (CCAA) in WHL

In [20]:
# List of World Heritage cities in Spain
ciudades_patrimonio = ['Madrid', 'Ávila', 'Jaén', 'Cáceres', 'Córdoba', 'Cuenca', 'Ibiza', 
                       'Badajoz', 'Salamanca', 'Tenerife', 'La Coruña', "A Coruña",
                       'Segovia', 'Tarragona', 'Toledo']


nacional_patrimonio_df = tourism_inbound_data_df[tourism_inbound_data_df['PROVINCIA_DESTINO'].isin(ciudades_patrimonio)]


nacional_patrimonio_df = nacional_patrimonio_df[['AÑO', 'CCAA_DESTINO', 'PROVINCIA_DESTINO', 'TURISTAS']]


nacional_patrimonio_df

Unnamed: 0,AÑO,CCAA_DESTINO,PROVINCIA_DESTINO,TURISTAS
1,2021,Andalucía,Córdoba,1976
4,2021,Andalucía,Jaén,10277
15,2021,Castilla - La Mancha,Cuenca,273
17,2021,Castilla - La Mancha,Toledo,1001
21,2021,Castilla y León,Salamanca,160
...,...,...,...,...
96397,2024,Cataluña,Tarragona,2636
96400,2024,Comunidad de Madrid,Madrid,37386
96404,2024,Extremadura,Badajoz,905
96405,2024,Extremadura,Cáceres,557


### Inbound tourism in Spain (international)

In [21]:
import pandas as pd

# Specify the path to the Excel file
excel_path = '/Users/juanfransf/IRONHACK/COURSE/GitHub_LESSONS/3. WEEK 3/MINI PROJECT WEEK 3/DB axuliar/datos de turismo/turismo españa/turismo_receptor_ccaa_pais.xlsx'

# Read the Excel file and load the data into a DataFrame
tourism_international_data_df = pd.read_excel(excel_path)

# Show the loaded DataFrame
tourism_international_data_df

Unnamed: 0,AÑO,MES,CCAA_DESTINO,CONTINENTE_ORIGEN,PAIS_ORIGEN,TURISTAS,PERNOCTACIONES,ESTANCIA_MEDIA
0,2021,1,Andalucía,América,Argentina,400,3221,8.1
1,2021,1,Andalucía,América,Brasil,434,2532,5.8
2,2021,1,Andalucía,América,Canadá,164,965,5.9
3,2021,1,Andalucía,América,Chile,114,1128,9.9
4,2021,1,Andalucía,América,Colombia,366,2637,7.2
...,...,...,...,...,...,...,...,...
51810,2024,2,Total Nacional,África,Tanzania,178,1095,6.2
51811,2024,2,Total Nacional,África,Togo,73,544,7.5
51812,2024,2,Total Nacional,África,Túnez,1994,10304,5.2
51813,2024,2,Total Nacional,África,Uganda,136,733,5.4


In [22]:
# List of World Heritage cities in Spain
ciudades_patrimonio = ['Madrid', 'Ávila', 'Jaén', 'Cáceres', 'Córdoba', 'Cuenca', 'Ibiza', 
                       'Badajoz', 'Salamanca', 'Tenerife', 'La Coruña', "A Coruña",
                       'Segovia', 'Tarragona', 'Toledo', 'Andalucía', 'Castilla y León',
                       'Extremadura', "Castilla - La Mancha", "Canarias", "Comunidad de Madrid",
                       "Galicia", "Cataluña"]


internacional_patrimonio_df = tourism_international_data_df[tourism_international_data_df['CCAA_DESTINO'].isin(ciudades_patrimonio)]


internacional_patrimonio_df = internacional_patrimonio_df[['AÑO', 'CCAA_DESTINO', 'PAIS_ORIGEN', 'TURISTAS']]


internacional_patrimonio_df

Unnamed: 0,AÑO,CCAA_DESTINO,PAIS_ORIGEN,TURISTAS
0,2021,Andalucía,Argentina,400
1,2021,Andalucía,Brasil,434
2,2021,Andalucía,Canadá,164
3,2021,Andalucía,Chile,114
4,2021,Andalucía,Colombia,366
...,...,...,...,...
51359,2024,Galicia,Argelia,70
51360,2024,Galicia,Egipto,40
51361,2024,Galicia,Ghana,51
51362,2024,Galicia,Marruecos,735


# Analisys UNESCO WHL vs. touristic potencial influence

## Places WHL Global

In [23]:
WHLdf

Unnamed: 0,País,Bien Protegido,Catalogación
0,Afghanistan,Minaret and Archaeological Remains of Jam,en peligro
1,Afghanistan,Cultural Landscape and Archaeological Remains ...,en peligro
2,Albania,Natural and Cultural Heritage of the Ohrid reg...,mixto
3,Albania,Butrint,cultural
4,Albania,Historic Centres of Berat and Gjirokastra,cultural
...,...,...,...
1298,Zimbabwe,"Mana Pools National Park, Sapi and Chewore Saf...",natural
1299,Zimbabwe,Great Zimbabwe National Monument,cultural
1300,Zimbabwe,Khami Ruins National Monument,cultural
1301,Zimbabwe,Mosi-oa-Tunya / Victoria Falls#*,natural


### Countries distribution by number of heritage elements

In [108]:
import pandas as pd

# Group by country and count the number of sites per country.
WHLdf_groupby = WHLdf.groupby('País').size().reset_index(name='Total')
WHLdf_groupby_asc = WHLdf_groupby.sort_values(by='Total', ascending=False)
WHLdf_groupby_asc

Unnamed: 0,País,Total
73,Italy,59
31,China,57
57,Germany,53
53,France,52
141,Spain,50
...,...,...
129,Saint Kitts and Nevis,1
130,Saint Lucia,1
131,San Marino,1
136,Singapore,1


### Countries distribution by risk

In [25]:
# Grouping by country and cataloging and counting the number of sites by country and cataloging

WHLdf_groupby = WHLdf.groupby(['Catalogación', 'País']).size().reset_index(name='Total')


WHLdf_pivot = WHLdf_groupby.pivot_table(index='País', columns='Catalogación', values='Total', fill_value=0)
WHLdf_pivot = WHLdf_pivot.sort_values(by='en peligro', ascending=False)

WHLdf_pivot

Catalogación,Unnamed: 1_level_0,cultural,en peligro,mixto,natural
País,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Syrian Arab Republic,0,0,6,0,0
Libya,0,0,5,0,0
Yemen,0,0,4,0,1
Iraq,0,2,3,1,0
Ukraine,0,4,3,0,1
...,...,...,...,...,...
Iran (Islamic Republic of),0,25,0,0,2
Belize,0,0,0,0,1
Ireland,0,2,0,0,0
Israel,0,9,0,0,0


In [141]:
# Select only the "at risk" column
en_peligro_column = WHLdf_pivot['en peligro']

# Sort column from highest to lowest
en_peligro_sorted = en_peligro_column.sort_values(ascending=False)

# Show sorted column
en_peligro_sorted

País
Syrian Arab Republic    6
Libya                   5
Yemen                   4
Ukraine                 3
Mali                    3
                       ..
Angola                  0
Vanuatu                 0
Andorra                 0
Viet Nam                0
Zimbabwe                0
Name: en peligro, Length: 169, dtype: int64

## Potential influence of tourism

### Flight with destination WHL International

In [26]:
datos_vuelos_df

Unnamed: 0,Aeropuerto,Identificación,Tipo,Origen,Hora de salida,Hora de llegada
0,KBL,ABY718,A320,Int'l de Sharjah (SHJ),04:17a +04,11:20a +0430
1,KBL,FDB301,B38M,Int'l de Dubái (DXB),04:59a +04,08:05a +0430
2,KBL,KMF904,737,Int'l de Abu Dabi (AUH),03:37a +04,06:50AM +0430 (?)
3,KBL,AFG402,737,Int'l de Dubái (DXB),03:24a +04,06:34AM +0430 (?)
4,KBL,KMF902,A343,Int'l de Dubái (DXB),03:18a +04,06:05AM +0430 (?)
...,...,...,...,...,...,...
2832,HRE,FJW6V,E135,Cerca de Hwange (Hwange National Park),01:38p CAT,02:15p CAT
2833,HRE,LNK382,E135,Int'l de la Ciudad del Cabo (CPT),11:27a SAST,02:04p CAT
2834,HRE,AZW456,B762,Int'l Soekarno-Hatta (CGK),08:02a WIB,01:35p CAT
2835,HRE,MWI34,DH8B,Lusaka International Airport (LUN),12:32p CAT,01:14p CAT


In [27]:
grupo_pais_aeropuerto_origen = datos_vuelos_df.groupby(['Aeropuerto', 'Origen']).size().reset_index(name='Count')

# Sort DataFrame by count in ascending order
grupo_pais_aeropuerto_origen = grupo_pais_aeropuerto_origen.sort_values(by='Count', ascending=False)

grupo_pais_aeropuerto_origen

Unnamed: 0,Aeropuerto,Origen,Count
776,GZA,,11
957,KRT,,11
524,DAM,,11
909,KBP,,11
1444,PAP,,11
...,...,...,...
763,GYD,"Cerca de Xiamen, Fujian",1
762,GYD,Cerca de Nakhichevan,1
761,GYD,Cerca de Istanbul,1
760,GYD,Astrakhan (ASF),1


In [28]:
grupo_por_origen = datos_vuelos_nacionales_df.groupby('Origen').size().reset_index(name='Count')

# Sort DataFrame by count in ascending order
grupo_por_origen = grupo_por_origen.sort_values(by='Count', ascending=False)

grupo_por_origen

Unnamed: 0,Origen,Count
0,,17
6,Cordoba (ODB / LEBA),14
25,Madrid-Barajas (MAD),12
3,Barcelona (BCN),8
32,Palma de Mallorca (or Son Sant Joan) (PMI),5
7,Cuatro Vientos (LECU),2
1,Alicante (ALC),2
37,Valencia (VLC),2
14,Granada (GRX / LEGR),2
36,Tenerife Norte (TFN),1


### Tourist arrival to WHL countries

> Worldwide protected assets vs. number of tourists arriving to these countries

In [52]:
tourism_data_df.head()

Unnamed: 0,Países,Fecha,Llegadas_anuales,Var.
0,España,2023,85.169.050,"18,85 %"
1,Alemania,2019,39.563.217,"1,75 %"
2,Reino Unido,2019,39.417.975,"8,54 %"
3,Francia,2018,89.322.000,"2,96 %"
4,Italia,2019,64.512.919,"4,78 %"


In [59]:
tourism_data_df = tourism_data_df.sort_values(by='Llegadas_anuales', ascending=False)
tourism_data_df

Unnamed: 0,Países,Fecha,Llegadas_anuales,Var.
3,Francia,2018,89322000,"2,96 %"
0,España,2023,85169050,"18,85 %"
6,Estados Unidos,2019,79256267,"-0,61 %"
8,China,2019,65700000,"4,45 %"
4,Italia,2019,64512919,"4,78 %"
...,...,...,...,...
113,Malí,2018,14000,"27,27 %"
168,Turkmenistán,2007,8200,"46,43 %"
90,Kiribati,2018,7100,"22,41 %"
111,Islas Marshall,2018,6800,"13,33 %"


In [60]:
!pip install googletrans==4.0.0-rc1
from googletrans import Translator
import pandas as pd



In [61]:
# tourism_data_df with a column 'Countries'.

# Create an instance of the translator
translator = Translator()

# Translate each country from Spanish to English and creates a new column 
tourism_data_df['Países'] = tourism_data_df['Países'].apply(lambda x: translator.translate(x, src='es', dest='en').text)

# Create a new DataFrame with only the desired columns
new_df = tourism_data_df[['Países']]

# Show the new DataFrame
print(new_df)

               Países
3              France
0               Spain
6                 USA
8               China
4               Italy
..                ...
113              Mali
168      Turkmenistan
90           Kiribati
111  Marshall Islands
173            Tuvalu

[189 rows x 1 columns]


In [63]:
from googletrans import Translator
import pandas as pd


# We create an instance of the translator
translator = Translator()

# We define a function to translate the values of the column 'Países'
def translate_country(country):
    # We use the translator to translate the country into English.
    translated = translator.translate(country, src='es', dest='en')
    # We return the translation
    return translated.text

# We apply the function to the column 'Countries' and store the results in a new column 'Translated_Countries'.
tourism_data_df['Translated_Countries'] = tourism_data_df['Países'].apply(translate_country)

# We show the new DataFrame with the column of translated countries
tourism_data_df

Unnamed: 0,Países,Fecha,Llegadas_anuales,Var.,Translated_Countries
3,France,2018,89322000,"2,96 %",France
0,Spain,2023,85169050,"18,85 %",Spain
6,USA,2019,79256267,"-0,61 %",USES
8,China,2019,65700000,"4,45 %",China
4,Italy,2019,64512919,"4,78 %",Italy
...,...,...,...,...,...
113,Mali,2018,14000,"27,27 %",Mali
168,Turkmenistan,2007,8200,"46,43 %",Turkmenistan
90,Kiribati,2018,7100,"22,41 %",Kiribati
111,Marshall Islands,2018,6800,"13,33 %",Marshall Islands


In [64]:
WHLdf

Unnamed: 0,País,Bien Protegido,Catalogación
0,Afghanistan,Minaret and Archaeological Remains of Jam,en peligro
1,Afghanistan,Cultural Landscape and Archaeological Remains ...,en peligro
2,Albania,Natural and Cultural Heritage of the Ohrid reg...,mixto
3,Albania,Butrint,cultural
4,Albania,Historic Centres of Berat and Gjirokastra,cultural
...,...,...,...
1298,Zimbabwe,"Mana Pools National Park, Sapi and Chewore Saf...",natural
1299,Zimbabwe,Great Zimbabwe National Monument,cultural
1300,Zimbabwe,Khami Ruins National Monument,cultural
1301,Zimbabwe,Mosi-oa-Tunya / Victoria Falls#*,natural


In [87]:
import pandas as pd

# Property included
WHLdf['WHL'] = 'included'

WHLdf

Unnamed: 0,País,Bien Protegido,Catalogación,WHL
0,Afghanistan,Minaret and Archaeological Remains of Jam,en peligro,included
1,Afghanistan,Cultural Landscape and Archaeological Remains ...,en peligro,included
2,Albania,Natural and Cultural Heritage of the Ohrid reg...,mixto,included
3,Albania,Butrint,cultural,included
4,Albania,Historic Centres of Berat and Gjirokastra,cultural,included
...,...,...,...,...
1298,Zimbabwe,"Mana Pools National Park, Sapi and Chewore Saf...",natural,included
1299,Zimbabwe,Great Zimbabwe National Monument,cultural,included
1300,Zimbabwe,Khami Ruins National Monument,cultural,included
1301,Zimbabwe,Mosi-oa-Tunya / Victoria Falls#*,natural,included


In [88]:
# Group the DataFrame by the values of the column 'Country'.
grupo = WHLdf.groupby('País')


conteo = grupo.size().sort_values(ascending=False)

# Display the count sorted in ascending order
print(conteo)

País
Italy                    59
China                    57
Germany                  53
France                   52
Spain                    50
                         ..
Saint Kitts and Nevis     1
Saint Lucia               1
San Marino                1
Singapore                 1
Dominican Republic        1
Length: 169, dtype: int64


In [89]:
import pandas as pd

# Fusionar los DataFrames utilizando las columnas 'Países' y 'País' respectivamente
tourism4WHL_df = pd.merge(tourism_data_df, WHLdf, left_on='Países', right_on='País', how='inner')

# Imprimir el nuevo DataFrame fusionado
tourism4WHL_df 

Unnamed: 0,Países,Fecha,Llegadas_anuales,Var.,Translated_Countries,País,Bien Protegido,Catalogación,WHL
0,France,2018,89322000,"2,96 %",France,France,Chartres Cathedral,cultural,included
1,France,2018,89322000,"2,96 %",France,France,Mont-Saint-Michel and its Bay,cultural,included
2,France,2018,89322000,"2,96 %",France,France,Palace and Park of Versailles,cultural,included
3,France,2018,89322000,"2,96 %",France,France,Prehistoric Sites and Decorated Caves of the V...,cultural,included
4,France,2018,89322000,"2,96 %",France,France,"Vézelay, Church and Hill",cultural,included
...,...,...,...,...,...,...,...,...,...
1030,Turkmenistan,2007,8200,"46,43 %",Turkmenistan,Turkmenistan,Parthian Fortresses of Nisa,cultural,included
1031,Turkmenistan,2007,8200,"46,43 %",Turkmenistan,Turkmenistan,Cold Winter Deserts of Turan*,natural,included
1032,Turkmenistan,2007,8200,"46,43 %",Turkmenistan,Turkmenistan,Silk Roads: Zarafshan-Karakum Corridor*,cultural,included
1033,Kiribati,2018,7100,"22,41 %",Kiribati,Kiribati,Phoenix Islands Protected Area,natural,included


In [93]:
# Create a pivot table with WHL and Flights Annual arrivals

tourism4WHL_pivot_df = tourism4WHL_df.pivot_table(index='País', columns='WHL', values=['Llegadas_anuales'], aggfunc='sum')
tourism4WHL_pivot_df

Unnamed: 0_level_0,Llegadas_anuales
WHL,included
País,Unnamed: 1_level_2
Albania,23677316
Algeria,16597000
Andorra,3042000
Angola,218000
Argentina,88788600
...,...
Vanuatu,116000
Viet Nam,144068800
Yemen,1833500
Zambia,1266000


In [147]:
# Ranking from highest to lowest of tourist arrivals to WHL countries

# Sort the DataFrame pivoted by annual arrivals in the category 'included' in descending order.
ranking = tourism4WHL_pivot_df['Llegadas_anuales']['included'].sort_values(ascending=False)

# Mostrar el ranking
ranking

País
France              4644744000
Spain               4258452500
Italy               3806262221
China               3744900000
Germany             2096850501
                       ...    
Mali                     56000
Turkmenistan             41000
Solomon Islands          28910
Kiribati                  7100
Marshall Islands          6800
Name: included, Length: 128, dtype: int64

### - Countries with more protected heritage

In [109]:
WHLdf_groupby_asc

Unnamed: 0,País,Total
73,Italy,59
31,China,57
57,Germany,53
53,France,52
141,Spain,50
...,...,...
129,Saint Kitts and Nevis,1
130,Saint Lucia,1
131,San Marino,1
136,Singapore,1


### - Countries by annual tourist arrivals

In [59]:
tourism_data_df = tourism_data_df.sort_values(by='Llegadas_anuales', ascending=False)
tourism_data_df

Unnamed: 0,Países,Fecha,Llegadas_anuales,Var.
3,Francia,2018,89322000,"2,96 %"
0,España,2023,85169050,"18,85 %"
6,Estados Unidos,2019,79256267,"-0,61 %"
8,China,2019,65700000,"4,45 %"
4,Italia,2019,64512919,"4,78 %"
...,...,...,...,...
113,Malí,2018,14000,"27,27 %"
168,Turkmenistán,2007,8200,"46,43 %"
90,Kiribati,2018,7100,"22,41 %"
111,Islas Marshall,2018,6800,"13,33 %"


### - Countries included in World Heritage List by annual tourist arrivals

In [103]:
# Print the sorted DataFrame
tourism4WHL_pivot_df_sorted = tourism4WHL_pivot_df.sort_values(by=('Llegadas_anuales', 'included'), ascending=False)


tourism4WHL_pivot_df_sorted

Unnamed: 0_level_0,Llegadas_anuales
WHL,included
País,Unnamed: 1_level_2
France,4644744000
Spain,4258452500
Italy,3806262221
China,3744900000
Germany,2096850501
...,...
Mali,56000
Turkmenistan,41000
Solomon Islands,28910
Kiribati,7100


In [149]:
# Top 6 countries with tourist arrivals and goods registered in WHL

tourism4WHL_pivot_df_sorted.head(6)

Unnamed: 0_level_0,Llegadas_anuales
WHL,included
País,Unnamed: 1_level_2
France,4644744000
Spain,4258452500
Italy,3806262221
China,3744900000
Germany,2096850501
Mexico,1575855855


### - Domestic Tourist with destination autonomous communities with WHL assets

In [129]:

nacional_patrimonio_df_pivot = nacional_patrimonio_df.pivot_table(index='CCAA_DESTINO', columns='AÑO', values=['TURISTAS'], aggfunc='sum')
nacional_patrimonio_df_pivot


Unnamed: 0_level_0,TURISTAS,TURISTAS,TURISTAS,TURISTAS
AÑO,2021,2022,2023,2024
CCAA_DESTINO,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
Andalucía,4159725,4723959,4750257,736986
Castilla - La Mancha,7002414,7450418,7485490,1013218
Castilla y León,5808660,6595643,6874343,894899
Cataluña,6195426,6364281,6765431,669383
Comunidad de Madrid,14784757,17911541,18675228,3072849
Extremadura,3995604,4496633,4632675,659480
Galicia,3085987,3465346,3621350,487853


In [150]:
nacional_patrimonio_df_pivot_ord = nacional_patrimonio_df_pivot.sort_values(by=('TURISTAS', 2023), ascending=False)
nacional_patrimonio_df_pivot_ord

Unnamed: 0_level_0,TURISTAS,TURISTAS,TURISTAS,TURISTAS
AÑO,2021,2022,2023,2024
CCAA_DESTINO,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
Comunidad de Madrid,14784757,17911541,18675228,3072849
Castilla - La Mancha,7002414,7450418,7485490,1013218
Castilla y León,5808660,6595643,6874343,894899
Cataluña,6195426,6364281,6765431,669383
Andalucía,4159725,4723959,4750257,736986
Extremadura,3995604,4496633,4632675,659480
Galicia,3085987,3465346,3621350,487853


### - International Tourist with destination autonomous communities with WHL assets

In [131]:
internacional_patrimonio_df_pivot = internacional_patrimonio_df.pivot_table(index='CCAA_DESTINO', columns='AÑO', values=['TURISTAS'], aggfunc='sum')
internacional_patrimonio_df_pivot

Unnamed: 0_level_0,TURISTAS,TURISTAS,TURISTAS,TURISTAS
AÑO,2021,2022,2023,2024
CCAA_DESTINO,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
Andalucía,5499118,10291375,12545750,1599462
Canarias,4476042,12081247,13520566,2633071
Castilla - La Mancha,702673,885132,1102934,151719
Castilla y León,1629622,1990887,2531243,305265
Cataluña,8100352,14714860,18536471,2282286
Comunidad de Madrid,3080734,5776206,7241769,1172842
Extremadura,584506,675639,984240,138065
Galicia,1371595,1772287,2311192,256809


In [151]:
internacional_patrimonio_df_pivot_ord = internacional_patrimonio_df_pivot.sort_values(by=('TURISTAS', 2023), ascending=False)
internacional_patrimonio_df_pivot_ord

Unnamed: 0_level_0,TURISTAS,TURISTAS,TURISTAS,TURISTAS
AÑO,2021,2022,2023,2024
CCAA_DESTINO,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
Cataluña,8100352,14714860,18536471,2282286
Canarias,4476042,12081247,13520566,2633071
Andalucía,5499118,10291375,12545750,1599462
Comunidad de Madrid,3080734,5776206,7241769,1172842
Castilla y León,1629622,1990887,2531243,305265
Galicia,1371595,1772287,2311192,256809
Castilla - La Mancha,702673,885132,1102934,151719
Extremadura,584506,675639,984240,138065


# Data sources.

https://datosmacro.expansion.com/comercio/turismo-internacional

https://whc.unesco.org/en/list/

https://es.flightaware.com/live/airport/SPJC

https://www.dataestur.es

## Save df data

In [None]:
import pickle

# Create order to save df's:
with open('WHLdf.pkl', 'wb') as f1:
    pickle.dump(WHLdf, f1)

with open('dataframe_df_bienes_protegidos.pkl', 'wb') as f2:
    pickle.dump(df_bienes_protegidos, f2)
    
with open('dataframe_datos_vuelos_df.pkl', 'wb') as f3:
    pickle.dump(datos_vuelos_df, f3)
    
with open('dataframe_df_aeropuertos.pkl', 'wb') as f4:
    pickle.dump(df_aeropuertos, f4)
    
with open('IATA_conuntry_df.pkl', 'wb') as f5:
    pickle.dump(df_aeropuertos, f5)

with open('Tourism_inbonund_data_df.pkl', 'wb') as f6:
    pickle.dump(df_aeropuertos, f6)
    
with open('nacional_patrimonio_df.pkl', 'wb') as f7:
    pickle.dump(df_aeropuertos, f7)
    
with open('Tourism_internacional_data_df.pkl', 'wb') as f8:
    pickle.dump(df_aeropuertos, f8)
    
with open('international_patrimonio_df.pkl', 'wb') as f9:
    pickle.dump(df_aeropuertos, f9)  
    
with open('WHLdf_groupby_asc.pkl', 'wb') as f10:
    pickle.dump(df_aeropuertos, f10) 
    
with open('datos_vuelos_df.pkl', 'wb') as f11:
    pickle.dump(df_aeropuertos, f11)
    
with open('grupo_pais_aeropuerto_origen.pkl', 'wb') as f12:
    pickle.dump(df_aeropuertos, f12)
    
with open('grupo_por_origen.pkl', 'wb') as f13:
    pickle.dump(df_aeropuertos, f13)
    
with open('Tourism_data_df.pkl', 'wb') as f14:
    pickle.dump(df_aeropuertos, f14)
    
with open('Tourism4WHL_df.pkl', 'wb') as f15:
    pickle.dump(df_aeropuertos, f15)
    
with open('tourism4WHL_pivot_df.pkl', 'wb') as f16:
    pickle.dump(df_aeropuertos, f16)

### Check download path

In [None]:
import os

# Directorio donde se espera que se hayan guardado los archivos
directorio = '.'  # Cambiar esto al directorio correcto si es diferente

# Lista de nombres de archivos que esperamos encontrar
nombres_archivos = [
    'dataframe_WHLdf.pkl',
    'dataframe_df_bienes_protegidos.pkl',
    'dataframe_datos_vuelos_df.pkl',
    'dataframe_df_aeropuertos.pkl'
]

# Verificar si los archivos existen en el directorio
for nombre_archivo in nombres_archivos:
    if os.path.exists(os.path.join(directorio, nombre_archivo)):
        print(f"El archivo {nombre_archivo} se ha guardado en el directorio {directorio}.")
    else:
        print(f"El archivo {nombre_archivo} no se ha encontrado en el directorio {directorio}.")


## Recuperar los df

In [None]:
#import pickle

## Cargar el DataFrame desde el archivo 'dataframe_WHLdf.pkl'
#with open('dataframe_WHLdf.pkl', 'rb') as f1:
#    WHLdf = pickle.load(f1)

## Cargar el DataFrame desde el archivo 'dataframe_df_bienes_protegidos.pkl'
#with open('dataframe_df_bienes_protegidos.pkl', 'rb') as f2:
#    df_bienes_protegidos = pickle.load(f2)

## Cargar el DataFrame desde el archivo 'dataframe_datos_vuelos_df.pkl'
#with open('dataframe_datos_vuelos_df.pkl', 'rb') as f3:
#    datos_vuelos_df = pickle.load(f3)

## Cargar el DataFrame desde el archivo 'dataframe_df_aeropuertos.pkl'
#with open('dataframe_df_aeropuertos.pkl', 'rb') as f4:
#    df_aeropuertos_df = pickle.load(f4)
