# Tourism in Europe

In [3]:
from google.cloud import bigquery
import pandas as pd
from pathlib import Path

In [4]:
PROJECT_ID = "tourism-in-europe"
TABLE_FQN  = "tourism_analytics.airbnb_wiki_numbeo_partitioned"  # dataset.table
OUT_CSV    = "../data/clean/tourism_all_information_partitioned.csv"

client = bigquery.Client(project=PROJECT_ID)

sql = f"SELECT * FROM `{PROJECT_ID}.{TABLE_FQN}`"

# Exécuter la requête en forçant la localisation EU
try:
    df = client.query(sql, location="EU").result().to_dataframe(create_bqstorage_client=True)
except Exception:
    df = client.query(sql, location="EU").result().to_dataframe(create_bqstorage_client=False)

df.to_csv(OUT_CSV, index=False)
print(f"✅ Écrit: {OUT_CSV} | lignes: {len(df)} | taille: {Path(OUT_CSV).stat().st_size/1_000_000:.2f} MB")

✅ Écrit: ../data/clean/tourism_all_information_partitioned.csv | lignes: 114212 | taille: 16.82 MB


In [5]:
df_wiki = pd.read_csv("../data/clean/wiki_city_international_visitors.csv")
df_airbnb = pd.read_csv("../data/clean/airbnb_clean.csv")

df_airbnb.head(5)

Unnamed: 0,listing_name,neighbourhood_cleansed,latitude,longitude,room_type,accommodates,bedrooms,beds,bathrooms,bathrooms_text,price,minimum_nights,maximum_nights,availability_365,number_of_reviews,review_scores_rating,city
0,zen and calm,Observatoire,48.83191,2.3187,Entire home/apt,2,1.0,1.0,1.0,1 bath,135.0,2,30,355,7,5.0,Paris
1,Your perfect Paris studio on Île Saint-Louis,Hôtel-de-Ville,48.85247,2.35835,Entire home/apt,2,0.0,1.0,1.0,1 bath,114.0,1,730,69,452,4.62,Paris
2,MARAIS - 2ROOMS APT - 24 PEOPLE,Hôtel-de-Ville,48.85909,2.35315,Entire home/apt,4,2.0,1.0,1.0,1 bath,149.0,10,130,197,380,4.73,Paris
3,"Cozy, Central Paris WALK or VELIB EVERYWHERE !",Louvre,48.86006,2.34863,Entire home/apt,1,1.0,1.0,1.0,1 bath,75.0,180,360,358,0,,Paris
4,room in an artists flat with great view!,Buttes-Montmartre,48.88946,2.35867,Private room,1,1.0,1.0,1.0,1 shared bath,50.0,5,1125,82,63,4.63,Paris


In [6]:
df_airbnb.shape

(114213, 17)

## Add desription

In [8]:
# 1. Join avec Wikipedia (sur city_name)
df_final = df.merge(
    df_wiki[["city", "description"]],
    how="left",
    left_on="city_name",
    right_on="city"
).drop(columns=["city"])  # on supprime la colonne "city" doublon

df_final.head(5)

Unnamed: 0,price_int,price_range,listing_id,latitude,longitude,city_name,country_name,room_type,price,accommodates,...,mcdonalds,cappuccino,gasoline_1l,one_way_ticket,monthly_pass,taxi_1km,cinema,fitness_monthly,international_visitors_2018,description
0,1050,1000+,64220,40.40107,-3.71306,Madrid,Spain,Entire home/apt,1050.0,7,...,10.0,2.71,1.6,1.5,39.25,1.3,10.0,43.71,5440100,Madrid is the capital and most populous munici...
1,1053,1000+,81589,40.411193,-3.707307,Madrid,Spain,Entire home/apt,1053.0,15,...,10.0,2.71,1.6,1.5,39.25,1.3,10.0,43.71,5440100,Madrid is the capital and most populous munici...
2,1085,1000+,73295,40.43224,-3.70707,Madrid,Spain,Entire home/apt,1085.0,12,...,10.0,2.71,1.6,1.5,39.25,1.3,10.0,43.71,5440100,Madrid is the capital and most populous munici...
3,1098,1000+,80704,40.417948,-3.702761,Madrid,Spain,Private room,1098.0,6,...,10.0,2.71,1.6,1.5,39.25,1.3,10.0,43.71,5440100,Madrid is the capital and most populous munici...
4,1053,1000+,75343,40.41924,-3.70052,Madrid,Spain,Entire home/apt,1053.0,12,...,10.0,2.71,1.6,1.5,39.25,1.3,10.0,43.71,5440100,Madrid is the capital and most populous munici...


In [9]:
df_final.shape

(114212, 27)

In [10]:
df_airbnb_unique = df_airbnb.drop_duplicates(subset=["latitude", "longitude"])

df_airbnb_wiki_numbeo_partitioned = df_final.merge(
    df_airbnb_unique[["latitude", "longitude", "listing_name", "neighbourhood_cleansed"]],
    how="left",
    on=["latitude", "longitude"]
)

df_airbnb_wiki_numbeo_partitioned.head(5)

Unnamed: 0,price_int,price_range,listing_id,latitude,longitude,city_name,country_name,room_type,price,accommodates,...,gasoline_1l,one_way_ticket,monthly_pass,taxi_1km,cinema,fitness_monthly,international_visitors_2018,description,listing_name,neighbourhood_cleansed
0,1050,1000+,64220,40.40107,-3.71306,Madrid,Spain,Entire home/apt,1050.0,7,...,1.6,1.5,39.25,1.3,10.0,43.71,5440100,Madrid is the capital and most populous munici...,Flat Madrid Rio Park 4br 2bth,Acacias
1,1053,1000+,81589,40.411193,-3.707307,Madrid,Spain,Entire home/apt,1053.0,15,...,1.6,1.5,39.25,1.3,10.0,43.71,5440100,Madrid is the capital and most populous munici...,,
2,1085,1000+,73295,40.43224,-3.70707,Madrid,Spain,Entire home/apt,1085.0,12,...,1.6,1.5,39.25,1.3,10.0,43.71,5440100,Madrid is the capital and most populous munici...,Chamberí Living - New Luxury Apartment DE,Arapiles
3,1098,1000+,80704,40.417948,-3.702761,Madrid,Spain,Private room,1098.0,6,...,1.6,1.5,39.25,1.3,10.0,43.71,5440100,Madrid is the capital and most populous munici...,,
4,1053,1000+,75343,40.41924,-3.70052,Madrid,Spain,Entire home/apt,1053.0,12,...,1.6,1.5,39.25,1.3,10.0,43.71,5440100,Madrid is the capital and most populous munici...,The wonder in Gran Via 5BR 5BH aa central,Sol


In [11]:
df_airbnb_wiki_numbeo_partitioned.shape

(114212, 29)

In [12]:
df_airbnb_wiki_numbeo_partitioned.to_csv("../data/clean/from_big_query/df_airbnb_wiki_numbeo_partitioned.csv", index=False, encoding="utf-8")