# Import librairies + data

In [136]:
import pandas as pd
import numpy as np
import plotly.express as px

url = "https://raw.githubusercontent.com/JBaptisteAll/Japan_tourism_analysis/refs/heads/main/data_processed/df_clean.csv"

df = pd.read_csv(url)
print(df.shape)

(54, 42)


In [137]:
df.columns

Index(['Horodateur', 'nationality', 'country', 'age_group', 'family_situation',
       'household_income_in_€', 'travel_frequency', 'been_to_Japan',
       'Japan_vac_duration', 'rating_interest_culture_and_history',
       'rating_interest_food', 'rating_interest_nature_hiking',
       'rating_interest_shopping_and_techno',
       'rating_interest_events_and_festivals', 'rating_interest_wellness',
       'rating_interest_theme_park', 'Japan_budget_per_week',
       'Japan_prefered_accomodation', 'alternative_destination',
       'alt_dest_main_reason', 'alt_dest_prefered_accomodation',
       'alt_dest_budget_per_week', 'alt_dest_transportation', 'trip_prep',
       'booking_trip_channel', 'most_influencial_reason_to_choose_dest',
       'recomendation_to_improve_attractiveness',
       'most_wanted_pref_to_visit_1', 'most_wanted_pref_to_visit_2',
       'most_wanted_pref_to_visit_3', 'most_wanted_pref_to_visit_4',
       'most_wanted_pref_to_visit_5', 'Japan_most_difficulties_1',
   

In [138]:
display(df.head())

Unnamed: 0,Horodateur,nationality,country,age_group,family_situation,household_income_in_€,travel_frequency,been_to_Japan,Japan_vac_duration,rating_interest_culture_and_history,...,Japan_most_difficulties_1,Japan_most_difficulties_2,Japan_most_difficulties_3,Japan_most_difficulties_4,Japan_most_difficulties_5,alt_dest_most_difficulties_1,alt_dest_most_difficulties_2,alt_dest_most_difficulties_3,alt_dest_most_difficulties_4,alt_dest_most_difficulties_5
0,05/10/2025 13:09:39,France,France,35-44,Married_no_kids,1500-1999,Every 2–3 years,"Yes, several times",More than 4 weeks,Essential,...,Language,Crowded/Popularity,Translation,,,Language,Transportation,Car rental,,
1,06/10/2025 13:30:50,France,France,45-54,Relationship_with_kids,1500-1999,Every 2–3 years,"No, but I would like to go",2 weeks,Essential,...,Language,Expensive,Translation,,,Expensive,Crowded,Translation,,
2,06/10/2025 17:20:05,France,France,35-44,Single,1500-1999,Once a year,"No, but I would like to go",3 weeks,Very important,...,Language,Car rental,Expensive,,,Language,Car rental,Expensive,,
3,06/10/2025 19:47:27,France,France,45-54,Single,2000-2499,Every 2–3 years,"No, but I would like to go",1 week,Moderately important,...,Car rental,Expensive,,,,Transportation,,,,
4,06/10/2025 20:56:00,France,France,45-54,Married_no_kids,2500-2999,Every 2–3 years,"No, but I would like to go",3 weeks,Very important,...,Language,Expensive,,,,Language,,,,


# Slide 2
## Overall people intersted about a trip to Japan

In [139]:
mapping_xp_japan = {
    "Yes, several times": "Déjà allés",
    "Yes, once": "Déjà allés",
    "No, and I’m not interested": "Pas intéressés (jamais allés)",
    "No, but I would like to go": "Intéressés (jamais allés)"
}

df["been_to_Japan"] = (df["been_to_Japan"].map(mapping_xp_japan))

In [140]:
df_xp_japan = df["been_to_Japan"].value_counts().reset_index()

In [141]:
print(df_xp_japan)

                   been_to_Japan  count
0      Intéressés (jamais allés)     35
1                     Déjà allés     12
2  Pas intéressés (jamais allés)      7


In [142]:
df_xp_japan["percentage"] = (
    df_xp_japan["count"] / df_xp_japan["count"].sum() * 100
).round(2)

In [143]:
print(df_xp_japan)

                   been_to_Japan  count  percentage
0      Intéressés (jamais allés)     35       64.81
1                     Déjà allés     12       22.22
2  Pas intéressés (jamais allés)      7       12.96


Pie chart

In [144]:
fig = px.pie (
    df_xp_japan,
    values = "count",
    names = "been_to_Japan",
    title = "Un intérêt fort… mais une conversion faible",
    hole = 0
)

fig.update_traces (
    textinfo = "percent+label"
)

fig.update_layout (
    showlegend = True
)

Bar chart

In [145]:
fig = px.bar (
    df_xp_japan,
    x = "been_to_Japan",
    y = "percentage",
    text = "percentage",
    title = "Un intérêt fort… mais une conversion faible",
    color = "count"
)

fig.update_traces (
    texttemplate = "%{text}%",
    textposition="outside"
)

fig.update_layout (
    width=800,
    height=500,
    xaxis_title = "",
    yaxis_title = "Pourcentage de répondants",
    coloraxis_showscale=False
)

In [152]:
df_xp_japan["text_display"] = df_xp_japan["percentage"].astype(str) + "%"

df_xp_japan.loc[
    df_xp_japan["been_to_Japan"] == "Intéressés (jamais allés)",
    "text_display"
] = "<b>" + df_xp_japan["text_display"] + "</b>"

In [155]:
fig = px.bar (
    df_xp_japan,
    x = "been_to_Japan",
    y = "percentage",
    text = "text_display",
    title = "Un intérêt fort… mais une conversion faible",
    color = "been_to_Japan",
    color_discrete_map = {
        "Déjà allés": "#0C0241",
        "Pas intéressés (jamais allés)": "#0C0241",
        "Intéressés (jamais allés)": "#E6FF01"
        
    }
)

fig.update_traces (
    textfont = dict(size=14),
    textposition = "outside"
)

fig.update_layout (
    width=800,
    height=500,
    xaxis_title = "",
    yaxis_title = "Part des répondants (%)",
    showlegend = False
)



