# 0. Imports

In [1]:
from bs4 import BeautifulSoup

import requests

import pandas as pd
import numpy as np

from time import sleep

from selenium import webdriver 
from webdriver_manager.chrome import ChromeDriverManager  
from selenium.webdriver.common.keys import Keys  
from selenium.webdriver.support.ui import Select 
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import NoSuchElementException 

from geopy.geocoders import Nominatim
geolocator = Nominatim(user_agent="my-geopy-app")
import random
import re
import datetime
import json
import math

# import suppor functions
import sys 
sys.path.append("..")
from src.data_extraction_support import extract_all_activities, create_country_airport_code_df, request_flight_itineraries, create_itineraries_dataframe

# 1. Conditions for travel itinerary planning

For this project, our clients requested that we organise and plan their holiday, including flight, accommodation and activities suggestions according to the following restrictions:
- Holiday dates must be from Friday 8th to Sunday 17th November. 
- Flights should be direct from Madrid to Paris, on the 8th November, to Berlin on the 13th and back to Madrid on the 17th.
- Accommodation must be under 100 euros per night for a couple
- Total activity budget is X
- Activities should be about X, Y and Z
- Activities suggested should not overlap

# 2. Extracting information

## 2.1 Flights

In [None]:
list_of_countries = ["france","spain", "germany", "thailand","bali","philippines","china",
                     "australia","italy","russia","romania","united states","argentina","mexico","brasil",
                     "portugal","austria","belgium","cuba","colombia","morocco","south africa","madagascar","new zealand"]

In [None]:
countries_airports = create_country_airport_code_df(list_of_countries)

countries_airports.to_csv("../data/airport_codes/countries_airports.csv")

countries_airports

Information needed for extraction from each flight:
- Duration
- Price
- Stops
- Departure
- Arrival
- Company
- Self_transfer
- Fare_policy columns: 'isChangeAllowed', 'isPartiallyChangeable', 'isCancellationAllowed', 'isPartiallyRefundable'
- Score
- Luggage price (optional)
- Origin airport
- Destination airport


In [None]:
itineraries_dict_list = request_flight_itineraries(countries_airports, "madrid","paris", n_adults=2, date="2024-11-08")
itineraries_dict_list

In [None]:
flight_itinerary_df = create_itineraries_dataframe(itineraries_dict_list)
flight_itinerary_df

## 2.2 Accommodations

## 2.3 Activities

As a first option, the range of activities proposed to our clients will come from the Civitatis catalog. If needed, more catalogs will be built on top of it to make it more compelling.

### 3.1 Civitatis - scraping


As for the information to extract from civitatis, the requirements are:
- Total activity budget is X
- Activities should be about X, Y and Z
- Activities suggested should not overlap

Therefore, the fields to be extracted should ideally be, at least:
- Date [X]
- Time [X]
- Address [X]
- Duration [X]
- Price [X]
- Name [X]
- Description [X]
- Category [X]
- Image [X]
- URL [X]
- Score (optional)
- Score (optional)
- Reviews (Optional)
- Language (Optional)

In [2]:
extract_all_activities("berlin","2024-11-09","2024-11-13")

Unnamed: 0,activity_name,description,url,image,image2,available_days,available_times,duration,latitude,longitude,address,price,currency,category
0,Entrada a la Torre de la Televisión,Podréis reservar la entrada a la Torre de la T...,www.civitatis.com/es/berlin/entrada-torre-tele...,www.civitatis.com/f/alemania/berlin/entrada-to...,,[],[],30 minutos,52.520849,13.4096,"Berliner Fernsehturm, 1a, Panoramastraße, Span...",2.65,EUR,Entradas
1,Visita guiada por el Museo Nuevo,¿Cómo vivían los faraones? Lo descubriremos en...,www.civitatis.com/es/berlin/visita-guiada-muse...,"www.civitatis.comdata:image/gif;base64,R0lGODl...",www.civitatis.com/f/alemania/berlin/visita-gui...,[],[],2 horas,52.51956430488067,13.39765149325871,"Museumsinsel, Burgstraße, Spandauer Vorstadt, ...",7.5,EUR,Español
2,Free tour de los misterios y leyendas de Berlín,Los secretos de la capital alemana son numeros...,www.civitatis.com/es/berlin/free-tour-misterio...,"www.civitatis.comdata:image/gif;base64,R0lGODl...",www.civitatis.com/f/alemania/berlin/free-tour-...,[],[],3 horas,52.52062505416855,13.4071427198706,"St. Marienkirche, 8, Karl-Liebknecht-Straße, S...",2.2,EUR,Español
3,Tour de la Guerra Fría + Muro de Berlín + Muse...,Si queréis saber cómo era la vida en una ciuda...,www.civitatis.com/es/berlin/berlin-guerra-fria/,"www.civitatis.comdata:image/gif;base64,R0lGODl...",www.civitatis.com/f/alemania/berlin/berlin-gue...,[],[],4 horas,52.52105,13.41017,"Gontardstraße, Mitte, Berlin, 10178, Deutschland",7.8,EUR,Español
4,Entrada al Icebar Berlín,Si queréis saber cómo se vive dentro de un igl...,www.civitatis.com/es/berlin/entrada-icebar-ber...,"www.civitatis.comdata:image/gif;base64,R0lGODl...",www.civitatis.com/f/alemania/berlin/entrada-ic...,[],[],45 minutos,52.521041,13.4040211,"2, Spandauer Straße, Spandauer Vorstadt, Mitte...",5.0,EUR,Entradas
5,Visita guiada por la Isla de los Museos,Acompañadnos en esta visita guiada por la Isla...,www.civitatis.com/es/berlin/visita-guiada-isla...,"www.civitatis.comdata:image/gif;base64,R0lGODl...",www.civitatis.com/f/alemania/berlin/visita-gui...,[],[],3 horas,52.517258,13.394698,"Unter den Linden, Friedrichswerder, Mitte, Ber...",4.0,EUR,Español
6,Tour en bicicleta por Berlín,En este tour en bicicleta por Berlín practicar...,www.civitatis.com/es/berlin/berlin-bicicleta/,"www.civitatis.comdata:image/gif;base64,R0lGODl...",www.civitatis.com/f/alemania/berlin/berlin-bic...,[],[],3h 30m,52.54005526,13.41342434,"Kulturbrauerei, 36, Schönhauser Allee, Bremer ...",6.8,EUR,Inglés
7,"Autobús turístico de Berlín, City Sightseeing",El autobús turístico recorre Berlín realizando...,www.civitatis.com/es/berlin/autobus-turistico-...,"www.civitatis.comdata:image/gif;base64,R0lGODl...",www.civitatis.com/f/alemania/berlin/autobus-tu...,[],[],1 - 2d,0.0,0.0,,9.6,EUR,Español
8,Entrada al Madame Tussauds de Berlín,Sacad a lucir vuestras mejores poses en el Mad...,www.civitatis.com/es/berlin/entrada-madame-tus...,"www.civitatis.comdata:image/gif;base64,R0lGODl...",www.civitatis.com/f/alemania/berlin/entrada-ma...,[],[],Entradas,52.5168789,13.3813841,"74, Unter den Linden, Dorotheenstadt, Mitte, B...",5.8,EUR,
9,Entrada a Panoramapunkt sin colas,Con esta entrada a Panoramapunkt subiréis en e...,www.civitatis.com/es/berlin/entrada-panoramapu...,"www.civitatis.comdata:image/gif;base64,R0lGODl...",www.civitatis.com/f/alemania/berlin/entrada-pa...,[],[],Entradas,52.509299280840885,13.375025669424874,"Jam Sound, 1, Potsdamer Straße, Tiergarten, Mi...",3.38,EUR,
