In [1]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import ipywidgets as widgets
from IPython.display import display, Markdown

df = pd.read_csv("../data/merged/ready_data.csv", parse_dates=["timestamp", "sunrise", "sunset", "extracted_at"])

month_order = ["January", "February", "March", "April", "May", "June",
               "July", "August", "September", "October", "November", "December"]
df["month"] = pd.Categorical(df["month"], categories=month_order, ordered=True)

df["temp_category"] = pd.cut(df["temp_C"], bins=[-float("inf"), 18, 30, float("inf")],
                             labels=["Too Cold", "Comfortable", "Too Hot"])


In [2]:
def travel_insight(city):
    city_df = df[df["city"] == city]

    # Summary stats
    ideal_ratio = city_df.groupby("month", observed=False)["is_ideal_day"].mean()
    best_month = ideal_ratio.idxmax()
    best_ratio = ideal_ratio.max()

    comfort_avg = city_df.groupby("month", observed=False)["comfort_score"].mean()
    top_comfort_month = comfort_avg.idxmax()
    top_comfort_score = comfort_avg.max()

    # Display summary
    display(Markdown(f"""
### ✈️ Travel Recommendation for **{city}**

- 📅 **Best month to travel** (most ideal days): **{best_month}** ({best_ratio:.0%} ideal days)
- 🧘 **Most comfortable month** (highest average score): **{top_comfort_month}** (score: {top_comfort_score:.2f})
- 🌡️ **Temperature comfort** trends shown below
    """))

    # Plot 1: Ideal day ratio
    plt.figure(figsize=(10, 4))
    sns.barplot(x=ideal_ratio.index, y=ideal_ratio.values)
    plt.title("Proportion of Ideal Days by Month")
    plt.ylabel("Ideal Day Ratio")
    plt.xticks(rotation=45)
    plt.tight_layout()
    plt.show()

    # Plot 2: Comfort score
    plt.figure(figsize=(10, 4))
    sns.lineplot(x=comfort_avg.index, y=comfort_avg.values, marker="o")
    plt.title("Average Comfort Score by Month")
    plt.ylabel("Comfort Score")
    plt.xticks(rotation=45)
    plt.tight_layout()
    plt.show()

    # Plot 3: Temperature comfort categories
    plt.figure(figsize=(10, 4))
    sns.countplot(data=city_df, x="month", hue="temp_category", order=month_order)
    plt.title("Temperature Comfort Categories by Month")
    plt.ylabel("Number of Days")
    plt.xticks(rotation=45)
    plt.tight_layout()
    plt.show()


In [3]:
dropdown = widgets.Dropdown(
    options=sorted(df["city"].unique()),
    description="City:",
    style={"description_width": "initial"},
    layout=widgets.Layout(width="50%")
)

widgets.interact(travel_insight, city=dropdown)


interactive(children=(Dropdown(description='City:', layout=Layout(width='50%'), options=('Mahajanga', 'New Yor…

<function __main__.travel_insight(city)>

In [4]:
month_order = ["January", "February", "March", "April", "May", "June",
               "July", "August", "September", "October", "November", "December"]
df["month"] = pd.Categorical(df["month"], categories=month_order, ordered=True)


def compare_cities_by_month(selected_month):
    filtered = df[df["month"] == selected_month]
    comparison = (
        filtered.groupby("city", observed=False)["comfort_score"]
        .mean()
        .sort_values(ascending=False)
        .reset_index()
    )

    plt.figure(figsize=(10, 5))
    sns.barplot(data=comparison, x="city", y="comfort_score", hue="city", palette="viridis", legend=False)
    plt.title(f"Best Cities to Visit in {selected_month} (Avg. Comfort Score)")
    plt.ylabel("Comfort Score")
    plt.xticks(rotation=45)
    plt.tight_layout()
    plt.show()


In [5]:
month_dropdown = widgets.Dropdown(
    options=month_order,
    description="Select Month:",
    style={"description_width": "initial"},
    layout=widgets.Layout(width="50%")
)

widgets.interact(compare_cities_by_month, selected_month=month_dropdown)


interactive(children=(Dropdown(description='Select Month:', layout=Layout(width='50%'), options=('January', 'F…

<function __main__.compare_cities_by_month(selected_month)>