In [None]:
import pandas as pd
import numpy as np  
import os
from prophet import Prophet
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import seaborn as sns
from matplotlib.backends.backend_pdf import PdfPages

In [None]:
# Get the current directory
current_dir = os.getcwd()

# Navigate one folder up
parent_dir = os.path.dirname(current_dir)

# Where the files are located
data = "data/other/From2021_to_Jan2025_PricesHotelsDates"

# Navigate down into the "data" folder
data_dir = os.path.join(parent_dir, data)

# Initialize an empty list to store DataFrames
dataframes = []

# Variable to track total rows
total_rows = 0

# Loop through all files in the "data" folder
try:
    for file_name in os.listdir(data_dir):
        
        if file_name.endswith('.csv'):  # Check if the file is a CSV
            
            file_path = os.path.join(data_dir, file_name)
            
            # Read the CSV file into a DataFrame
            df = pd.read_csv(file_path)
            dataframes.append(df)  # Append the DataFrame to the list
            
             # Print dimensions of the current file
            print(f"File: {file_name} | Dimensions: {df.shape}")
            
            # Add the number of rows to the total count
            total_rows += df.shape[0]

    # Concatenate all DataFrames in the list by binding rows
    combined_df = pd.concat(dataframes, ignore_index=True)

    # Print dimensions of the combined DataFrame
    print(f"Combined DataFrame Dimensions: {combined_df.shape}")

    # Verify the sum of rows matches
    if total_rows == combined_df.shape[0]:
        print("Row count verification successful! Total rows match.")
    else:
        print("Row count verification failed! Mismatch in row count.")

    print(combined_df.head())  # Display the first few rows of the combined DataFrame

except FileNotFoundError:
    print(f"Folder '{data_dir}' not found.")
except Exception as e:
    print(f"An error occurred: {e}")

In [None]:
combined_df.shape

In [None]:
del current_dir, data, data_dir, dataframes, df, file_name, file_path, parent_dir, total_rows

In [None]:
data_lake_prd_314410_cz_moedas = pd.read_csv('..\\data\\lookups\\data-lake-prd-314410.cz.moedas.csv')
data_lake_prd_314410_cz_moedas.head

In [None]:
data_lake_prd_314410_cz_hoteis = pd.read_csv('..\\data\\lookups\\data-lake-prd-314410.cz.hoteis.csv')
data_lake_prd_314410_cz_hoteis.head

In [None]:
data_lake_prd_314410_cz_hoteis.columns

In [None]:
data_lake_prd_314410_cz_hoteis = data_lake_prd_314410_cz_hoteis[['Hotel_ID', 'Moeda', 'Estrelas', 'Cidade_ID']]

In [None]:
data_lake_prd_314410_cz_hoteis = pd.merge(data_lake_prd_314410_cz_hoteis, 
                     data_lake_prd_314410_cz_moedas[['Moeda', 'Cotacao_USD']], 
                     on='Moeda', 
                     how='left')

In [None]:
data_lake_prd_314410_cz_hoteis = data_lake_prd_314410_cz_hoteis.dropna()

In [None]:
data_lake_prd_314410_cz_hoteis # if wanting to convert it all to dollars

In [None]:
combined_df

In [None]:
# Perform a left join on 'Hotel_ID'
combined_df = pd.merge(combined_df, data_lake_prd_314410_cz_hoteis, on='Hotel_ID', how='left')

In [None]:
combined_df

In [None]:
combined_df.isna().sum()

In [None]:
combined_df = combined_df.dropna()

In [None]:
combined_df

In [None]:
# Select distinct "Cidade_ID" and "Moeda"
distinct_cidade_moeda = combined_df[['Cidade_ID', 'Moeda']].drop_duplicates()

# Count distinct "Moeda" for each "Cidade_ID"
cidade_moeda_count = distinct_cidade_moeda.groupby('Cidade_ID').size().reset_index(name='n')

# Sort by the count of distinct "Moeda"
cidade_moeda_count_sorted = cidade_moeda_count.sort_values(by='n', ascending=False)

# Display the result
print(cidade_moeda_count_sorted)

In [None]:
cidade_moeda_count_sorted.groupby('n').size().reset_index(name='nn')


In [None]:
cidade_moeda_2plus = cidade_moeda_count_sorted[cidade_moeda_count_sorted['n'] > 1]
print(cidade_moeda_2plus)

In [None]:
combined_df

In [None]:
# Group by 'Cidade_ID' and calculate the mean of 'DiariaMedia'
combined_df.groupby('Cidade_ID')['DiariaMedia'].mean()

In [None]:
# Calculate the weighted mean of "DiariaMedia" using "Reservas" as the weight
combined_city_df = combined_df.groupby(['Data', 'Moeda', 'Cidade_ID', 'Estrelas']).apply(
    lambda x: np.average(x['DiariaMedia'], weights=x['Reservas'])
).reset_index(name='Weighted_DiariaMedia')

In [None]:
# Display the result
combined_city_df.groupby('Cidade_ID')['Weighted_DiariaMedia'].mean()

In [None]:

combined_df[combined_df['Cidade_ID']==7562201.0].groupby(['Cidade_ID', 'Moeda', 'Estrelas']).apply(
    lambda x: np.average(x['DiariaMedia'])
    )

In [None]:

combined_city_df[combined_city_df['Cidade_ID']==7562201.0].groupby(['Cidade_ID', 'Moeda', 'Estrelas']).apply(
    lambda x: np.average(x['Weighted_DiariaMedia'])
    )

In [None]:
combined_city_df

In [None]:
# sort by 'Cidade_ID' and 'Data'
combined_city_df = combined_city_df.sort_values(by=['Cidade_ID', 'Estrelas', 'Moeda', 'Data'])

In [None]:
combined_city_df

In [None]:
combined_city_df['Data'] = pd.to_datetime(combined_city_df['Data']).dt.tz_localize(None)
combined_city_df.head

In [None]:
# Filter for data in 2024
cities_2024 = combined_city_df[combined_city_df['Data'].dt.year == 2024]['Cidade_ID'].unique()

# Filter the original combined_df to keep only cities that appeared in 2024
combined_city_df = combined_city_df[combined_city_df['Cidade_ID'].isin(cities_2024)]

# Display the filtered DataFrame
combined_city_df

In [None]:
combined_city_df.shape

In [None]:
combined_city_df = combined_city_df.loc[:, ['Data',  'Cidade_ID', 'Moeda', 'Estrelas', 'Weighted_DiariaMedia']]

combined_city_df.rename(columns={'Data': 'ds', 'Cidade_ID': 'city_id', 'Weighted_DiariaMedia': 'y', 'Moeda':'currency', 'Estrelas':'stars'}, inplace=True)

combined_city_df = combined_city_df.loc[:, ['city_id',  'ds', 'y', 'currency', 'stars']]

In [None]:
combined_city_df = combined_city_df.groupby(['city_id', 'ds', 'stars', 'currency']).agg({'y': 'mean'}).reset_index()
combined_city_df.shape

In [None]:
combined_city_df

In [None]:
print(combined_city_df['ds'].min())
print(combined_city_df['ds'].max())

In [None]:
combined_city_df['city_stars_currency'] = combined_city_df['city_id'].astype(int).astype(str) + '__' + combined_city_df['stars'].astype(int).astype(str) + '__' + combined_city_df['currency']

In [None]:
combined_city_df

In [None]:
# Group by 'city_stars_currency' and count the number of rows for each city star currency
city_stars_currencies_counts = combined_city_df.groupby('city_stars_currency').size()

# Filter for city stars currencies that have more than 183 rows/dates
city_stars_currencies_with_more_than_183_rows = city_stars_currencies_counts[city_stars_currencies_counts > 183].index

# Filter the original table for these hotels
filtered_combined_city_df = combined_city_df[combined_city_df['city_stars_currency'].isin(city_stars_currencies_with_more_than_183_rows)]

# Display the filtered DataFrame
filtered_combined_city_df

In [None]:
print(combined_city_df.shape)
print(filtered_combined_city_df.shape)

In [None]:
city_stars_currency_list = filtered_combined_city_df['city_stars_currency'].drop_duplicates()
print(city_stars_currency_list.shape)

In [None]:
city_stars_currency_list = city_stars_currency_list.tolist()

In [None]:
filtered_combined_city_df

In [None]:
# Process all hotels without chunking
all_forecasts = pd.DataFrame()

for city_stars_currency in city_stars_currency_list:
    print(f"Processing forecast for City/Stars/Currency {city_stars_currency}...")
    
    # Load data for the specific hotel
    city_stars_currency_data = filtered_combined_city_df[filtered_combined_city_df['city_stars_currency'] == city_stars_currency]
    
    # Fit the model
    model = Prophet()
    model.fit(city_stars_currency_data)
    
    # Create future dataframe for predictions
    future = model.make_future_dataframe(periods=365)  # Forecast 1 year ahead
    
    # Make predictions
    forecast = model.predict(future)
    
    # Add hotel name for identification
    forecast["city_stars_currency"] = city_stars_currency
    
    # Mark historical vs. forecasted data
    forecast["data_type"] = ["historical" if date <= city_stars_currency_data["ds"].max() else "forecast" for date in forecast["ds"]]
    
    # Append to the main DataFrame
    all_forecasts = pd.concat([all_forecasts, forecast], ignore_index=True)

# Save the results to a CSV file
all_forecasts.to_csv("../out/city_stars_currency_forecasts_From2021_to_Jan2025onwards.csv", index=False)
print("Forecasting complete. Results saved as 'city_stars_currency_forecasts_From2021_to_Jan2025onwards.csv'.")

In [None]:
all_forecasts

In [None]:
all_forecasts['data_type'].unique()

In [None]:
city_stars_currency_forecasts_From2021_to_Jan2025onwards = pd.read_csv("../out/city_stars_currency_forecasts_From2021_to_Jan2025onwards.csv")

In [None]:
combined_df['Data'] = pd.to_datetime(combined_df['Data']).dt.tz_localize(None)


In [None]:
filtered_combined_city_df
city_stars_currency_forecasts_From2021_to_Jan2025onwards

In [None]:
city_stars_currency_list = city_stars_currency_forecasts_From2021_to_Jan2025onwards['city_stars_currency'].drop_duplicates()

In [None]:
filtered_combined_city_df["ds"] = pd.to_datetime(filtered_combined_city_df["ds"])
city_stars_currency_forecasts_From2021_to_Jan2025onwards["ds"] = pd.to_datetime(city_stars_currency_forecasts_From2021_to_Jan2025onwards["ds"])

In [None]:
filtered_combined_city_df = filtered_combined_city_df[filtered_combined_city_df['city_stars_currency'].isin(city_stars_currency_list)]
city_stars_currency_forecasts_From2021_to_Jan2025onwards = city_stars_currency_forecasts_From2021_to_Jan2025onwards[city_stars_currency_forecasts_From2021_to_Jan2025onwards['city_stars_currency'].isin(city_stars_currency_list)]


In [None]:
filtered_combined_city_df

In [None]:
from sklearn.metrics import mean_absolute_error, mean_squared_error
import numpy as np
import pandas as pd

# Initialize a list to store results
error_metrics = []

# Loop through each city_currency
for city_stars_currency in city_stars_currency_list:
    hotel_forecast = city_stars_currency_forecasts_From2021_to_Jan2025onwards[city_stars_currency_forecasts_From2021_to_Jan2025onwards["city_stars_currency"] == city_stars_currency]
    historical_data = filtered_combined_city_df[filtered_combined_city_df["city_stars_currency"] == city_stars_currency]

    # Merge on the date column
    comparison = historical_data.merge(hotel_forecast, on="ds", how="inner")

    # Skip if no matching dates found
    if comparison.empty:
        print(f"Skipping hotel {city_stars_currency}: No matching dates found.")
        continue  

    # Compute error metrics
    mae = mean_absolute_error(comparison["y"], comparison["yhat"])
    rmse = np.sqrt(mean_squared_error(comparison["y"], comparison["yhat"]))
    mape = np.mean(np.abs((comparison["y"] - comparison["yhat"]) / comparison["y"])) * 100

    # Append results
    error_metrics.append({"city_currency": city_stars_currency, "MAE": mae, "RMSE": rmse, "MAPE": mape})


# Convert results to a DataFrame
error_df = pd.DataFrame(error_metrics)

# Display the error metrics table
print(error_df)

# Save to CSV for further analysis
error_df.to_csv("../out/city_stars_currency_error_metrics_From2021_to_Jan2025onwards.csv", index=False)

print("Error metrics saved to 'city_stars_currency_error_metrics_From2021_to_Jan2025onwards.csv'.")


In [None]:
error_df

In [None]:
error_df = error_df[ (error_df['MAPE'] < 100) & (error_df['MAPE'] > 0) ]

In [None]:
error_df

In [None]:
error_df['MAPE'].describe()

In [None]:
# Remove rows where MAPE is infinite
error_df = error_df.replace([np.inf, -np.inf], np.nan).dropna(subset=["MAPE"])


In [None]:
error_df['MAPE'].describe()

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns

# Create a histogram of the "MAPE" variable
plt.figure(figsize=(8, 6))
sns.histplot(data=error_df[ (error_df['MAPE']<100) & (error_df['MAPE']>0) ], x="MAPE", bins=100, kde=True)

# Add labels and title
plt.xlabel("\n MAPE")
plt.ylabel("Frequency \n")
plt.title("Histogram of Mean Absolute Percent Error [MAPE]")

# Show the plot
plt.show()


In [None]:
# Ensure 'ds' columns are in datetime format
filtered_combined_city_df["ds"] = pd.to_datetime(filtered_combined_city_df["ds"])
city_stars_currency_forecasts_From2021_to_Jan2025onwards["ds"] = pd.to_datetime(city_stars_currency_forecasts_From2021_to_Jan2025onwards["ds"])

# Merge both tables on 'city_currency' and 'ds'
comparison = filtered_combined_city_df.merge(
    city_stars_currency_forecasts_From2021_to_Jan2025onwards, 
    on=["city_stars_currency", "ds"], 
    how="inner"
)

In [None]:
error_df

In [None]:
city_stars_currency_to_keep_error = error_df['city_currency'].drop_duplicates()

In [None]:
city_stars_currency_to_keep_error

In [None]:
comparison.shape

In [None]:
comparison = comparison[comparison['city_stars_currency'].isin(city_stars_currency_to_keep_error)]


In [None]:
city_stars_currency_list = comparison.groupby('city_stars_currency').size().sort_values(ascending=False).sample(50).index.tolist()

In [None]:
city_stars_currency_list

In [None]:

# Set the seaborn theme
sns.set_theme()

# Define figure size based on the number of hotels
fig, axes = plt.subplots(len(city_stars_currency_list), 1, figsize=(12, 6 * len(city_stars_currency_list)), sharex=True)

# If there's only one hotel, `axes` won't be an iterable, so wrap it in a list
if len(city_stars_currency_list) == 1:
    axes = [axes]

# Define colors for historical and forecasted data
colors = {"historical": "#E195AB", "forecast": "#27445D"}

# Loop through each city_currency and create a separate plot
for i, city_stars_currency in enumerate(city_stars_currency_list):
    # Filter data for this city_currency
    city_stars_currency_forecast = comparison[comparison["city_stars_currency"] == city_stars_currency]

    # Plot historical data
    sns.lineplot(ax=axes[i], 
                 data=city_stars_currency_forecast, 
                 x="ds", y="y", label="Historical", color=colors["historical"])

    # Plot forecasted data
    sns.lineplot(ax=axes[i], 
                 data=city_stars_currency_forecast, 
                 x="ds", y="yhat", label="Forecast", color=colors["forecast"], linestyle="dashed")

    # Formatting for each subplot
    axes[i].set_title(f"City|Stars|Currency {city_stars_currency} Forecast")
    axes[i].set_xlabel("Date")
    axes[i].set_ylabel("Prices [Local Currency]")
    axes[i].legend()
    axes[i].grid(True)

     # Ensure the y-axis starts at zero
    axes[i].set_ylim(0, city_stars_currency_forecast[["y", "yhat"]].max().max())


# Adjust layout to prevent overlap
plt.tight_layout()

# Show plots
plt.show()



In [None]:
city_stars_currency_forecasts_From2021_to_Jan2025onwards['city_stars_currency'].nunique()

In [None]:

filtered_combined_city_df = filtered_combined_city_df[filtered_combined_city_df['city_stars_currency'].isin(city_stars_currency_to_keep_error)]

city_stars_currency_forecasts_From2021_to_Jan2025onwards = city_stars_currency_forecasts_From2021_to_Jan2025onwards[city_stars_currency_forecasts_From2021_to_Jan2025onwards['city_stars_currency'].isin(city_stars_currency_to_keep_error)]



In [None]:
combined_df_hist = filtered_combined_city_df[filtered_combined_city_df['city_stars_currency']=="6978686__3__BRL"]
all_citiy_stars_currencies_forecasts_From2021_to_Jan2025onwards_future = city_stars_currency_forecasts_From2021_to_Jan2025onwards[city_stars_currency_forecasts_From2021_to_Jan2025onwards['city_stars_currency']=="6978686__3__BRL"]

In [None]:
all_citiy_stars_currencies_forecasts_From2021_to_Jan2025onwards_future

In [None]:
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import seaborn as sns

# Set the seaborn theme
sns.set_theme()

# Plot observed data and predictions with different colors
plt.figure(figsize=(16, 8))
plt.plot(combined_df_hist['ds'], combined_df_hist['y'], label="Observed Data", color="#E195AB", alpha=0.8)
plt.plot(all_citiy_stars_currencies_forecasts_From2021_to_Jan2025onwards_future['ds'], all_citiy_stars_currencies_forecasts_From2021_to_Jan2025onwards_future['yhat'], label="Predicted Data", color="#27445D", alpha=0.6)

# Add confidence intervals for predictions
plt.fill_between(
    all_citiy_stars_currencies_forecasts_From2021_to_Jan2025onwards_future['ds'], all_citiy_stars_currencies_forecasts_From2021_to_Jan2025onwards_future['yhat_lower'], all_citiy_stars_currencies_forecasts_From2021_to_Jan2025onwards_future['yhat_upper'], color="#27445D", alpha=0.2, label="Uncertainty Interval"
)

# Customize x-axis for better readability
plt.gca().xaxis.set_major_locator(mdates.MonthLocator(interval=1))  # Set ticks every 3 months
plt.gca().xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m'))  # Format as Year-Month
plt.gcf().autofmt_xdate(rotation=90)  # Rotate x-axis labels vertically

# Customize the plot
plt.title(f"City Stars Currency Data Forecast - City ID + Hotel Stars + Currency {combined_df_hist['city_stars_currency'].unique()}", fontsize=16)
plt.xlabel("\n Exact Date", fontsize=14)
plt.ylabel("Average Daily Price (Local Currency) \n", fontsize=14)
plt.legend(fontsize=12)
plt.grid(True, linestyle="--", alpha=0.6)


# Save the plot before showing
plt.savefig(f"../out/figures/city_stars_currency_forecast_{combined_df_hist['city_stars_currency'].unique()}.png", dpi=300, bbox_inches="tight")

# Show the plot
plt.tight_layout()
plt.show()


In [None]:

filtered_combined_city_df.to_csv("../out/all_city_stars_currency_historic_From2021_to_Jan2025_final_selected_final_selected.csv", index=False)
city_stars_currency_forecasts_From2021_to_Jan2025onwards.to_csv("../out/city_stars_currency_forecasts_From2021_to_Jan2025onwards_final_selected.csv", index=False)



In [None]:
filtered_combined_city_df


In [None]:
# Set the seaborn theme
sns.set_theme()

# Get unique city_currency IDs
city_stars_currency_to_keep_error

# Loop through each city_currency_ids
for city_stars_currency in city_stars_currency_to_keep_error:
    # Filter data for the current city_stars_currency_ids
    city_currency_hist = filtered_combined_city_df[filtered_combined_city_df["city_stars_currency"] == city_stars_currency]
    city_currency_forecast = city_stars_currency_forecasts_From2021_to_Jan2025onwards[
        city_stars_currency_forecasts_From2021_to_Jan2025onwards["city_stars_currency"] == city_stars_currency
    ]

    # Create a new figure
    plt.figure(figsize=(16, 8))
    
    # Plot observed data
    plt.plot(city_currency_hist["ds"], city_currency_hist["y"], label="Observed Data", color="#E195AB", alpha=0.8)
    
    # Plot predicted data
    plt.plot(city_currency_forecast["ds"], city_currency_forecast["yhat"], label="Predicted Data", color="#27445D", alpha=0.6)
    
    # Add confidence intervals for predictions
    plt.fill_between(
        city_currency_forecast["ds"], 
        city_currency_forecast["yhat_lower"], 
        city_currency_forecast["yhat_upper"], 
        color="#27445D", alpha=0.2, label="Uncertainty Interval"
    )

    # Customize x-axis
    plt.gca().xaxis.set_major_locator(mdates.MonthLocator(interval=1))  
    plt.gca().xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m'))  
    plt.gcf().autofmt_xdate(rotation=90)  

    # Customize the plot
    plt.title(f"City Stars Currency Data Forecast - City ID + Hotel Stars + Currency {city_stars_currency}", fontsize=16)
    plt.xlabel("\n Exact Date", fontsize=14)
    plt.ylabel("Average Daily Price (Local Currency) \n", fontsize=14)
    plt.legend(fontsize=12)
    plt.grid(True, linestyle="--", alpha=0.6)

    # Save the plot with hotel_id in the filename
    plt.savefig(f"../out/figures/png/city_stars_currency_forecast_{city_stars_currency}.png", dpi=300, bbox_inches="tight")
    
    # Close the plot to free memory before the next iteration
    plt.close()

print("All plots saved successfully!")

In [None]:
# Set the seaborn theme
sns.set_theme()

# Get unique city_currency IDs
city_stars_currency_to_keep_error

# Create a single PDF to store all plots
with PdfPages("../out/figures/pdf/city_stars_currency_forecasts_prophet_365_2025.pdf") as pdf:
    for city_stars_currency in city_stars_currency_to_keep_error:
        # Filter data for the current hotel
        city_currency_hist = filtered_combined_city_df[filtered_combined_city_df["city_stars_currency"] == city_stars_currency]
        city_currency_forecast = city_stars_currency_forecasts_From2021_to_Jan2025onwards[
            city_stars_currency_forecasts_From2021_to_Jan2025onwards["city_stars_currency"] == city_stars_currency
        ]

        # Create a new figure
        plt.figure(figsize=(16, 8))

        # Plot observed data
        plt.plot(city_currency_hist["ds"], city_currency_hist["y"], label="Observed Data", color="#E195AB", alpha=0.8)

        # Plot predicted data
        plt.plot(city_currency_forecast["ds"], city_currency_forecast["yhat"], label="Predicted Data", color="#27445D", alpha=0.6)

        # Add confidence intervals for predictions
        plt.fill_between(
            city_currency_forecast["ds"], 
            city_currency_forecast["yhat_lower"], 
            city_currency_forecast["yhat_upper"], 
            color="#27445D", alpha=0.2, label="Uncertainty Interval"
        )

        # Customize x-axis
        plt.gca().xaxis.set_major_locator(mdates.MonthLocator(interval=1))  
        plt.gca().xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m'))  
        plt.gcf().autofmt_xdate(rotation=90)  

        # Customize the plot
        plt.title(f"City Data Forecast - City ID + Hotel Stars + Currency {city_stars_currency}", fontsize=16)
        plt.xlabel("\n Exact Date", fontsize=14)
        plt.ylabel("Average Daily Price (Local Currency) \n", fontsize=14)
        plt.legend(fontsize=12)
        plt.grid(True, linestyle="--", alpha=0.6)

        # Save the current figure to the PDF
        pdf.savefig()
        plt.close()  # Close the figure to free memory

print("All plots saved successfully in 'city_stars_currency_forecasts.pdf'!")

In [None]:
import plotly.graph_objects as go
import pandas as pd

# Get unique hotel IDs
city_stars_currency_to_keep_error

output_dir = "../out/figures/html/"


for city_stars_currency in city_stars_currency_to_keep_error:
    # Extract data for the current hotel
    city_stars_currency_hist = filtered_combined_city_df[filtered_combined_city_df["city_stars_currency"] == city_stars_currency]
    city_stars_currency_forecast = city_stars_currency_forecasts_From2021_to_Jan2025onwards[
        city_stars_currency_forecasts_From2021_to_Jan2025onwards["city_stars_currency"] == city_stars_currency
    ]

    # Create interactive plot
    fig = go.Figure()

    # Add observed data
    fig.add_trace(go.Scatter(
        x=city_stars_currency_hist["ds"],
        y=city_stars_currency_hist["y"],
        mode="lines",
        name="Observed Data",
        line=dict(color="#E195AB")
    ))

    # Add predicted data
    fig.add_trace(go.Scatter(
        x=city_stars_currency_forecast["ds"],
        y=city_stars_currency_forecast["yhat"],
        mode="lines",
        name="Predicted Data",
        line=dict(color="#27445D", dash="dash")
    ))

    # Add confidence interval
    fig.add_trace(go.Scatter(
        x=city_stars_currency_forecast["ds"].tolist() + city_stars_currency_forecast["ds"].tolist()[::-1],
        y=city_stars_currency_forecast["yhat_upper"].tolist() + city_stars_currency_forecast["yhat_lower"].tolist()[::-1],
        fill="toself",
        fillcolor="rgba(39,68,93,0.3)",
        line=dict(color="rgba(255,255,255,0)"),
        name="Uncertainty Interval"
    ))

    # Customize layout
    fig.update_layout(
        title=f"City Data Forecast - City ID + Hotel Stars + Currency {city_stars_currency}",
        xaxis_title="Exact Date",
        yaxis_title="Average Daily Price (Local Currency)",
        xaxis=dict(tickformat="%Y-%m", tickangle=90),
        template="plotly_white"
    )

    # Save as HTML file
    output_path = os.path.join(output_dir, f"city_stars_currency_forecast_{city_stars_currency}.html")
    fig.write_html(output_path)

print("All interactive plots saved successfully!")
