In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import plotly.express as px


In [3]:
df = pd.read_csv("df_resid_limpio.csv")

In [4]:
df

Unnamed: 0,RESIDENCIA/ORIGEN,Países,Viajeros y pernoctaciones,Total,Año,mes,Nom_mes
0,Total,,Viajero,10018537.0,2025,4,Abril
1,Total,,Viajero,7680722.0,2025,3,Marzo
2,Total,,Viajero,6539960.0,2025,2,Febrero
3,Total,,Viajero,5776075.0,2025,1,Enero
4,Total,,Viajero,6553489.0,2024,12,Diciembre
...,...,...,...,...,...,...,...
20219,Asia (sin Japón),,Pernoctaciones,25427.0,1999,5,Mayo
20220,Asia (sin Japón),,Pernoctaciones,26878.0,1999,4,Abril
20221,Asia (sin Japón),,Pernoctaciones,24271.0,1999,3,Marzo
20222,Asia (sin Japón),,Pernoctaciones,18530.0,1999,2,Febrero


In [5]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 20224 entries, 0 to 20223
Data columns (total 7 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   RESIDENCIA/ORIGEN          20224 non-null  object 
 1   Países                     17064 non-null  object 
 2   Viajeros y pernoctaciones  20224 non-null  object 
 3   Total                      18860 non-null  float64
 4   Año                        20224 non-null  int64  
 5   mes                        20224 non-null  int64  
 6   Nom_mes                    20224 non-null  object 
dtypes: float64(1), int64(2), object(4)
memory usage: 1.1+ MB


In [6]:
df

Unnamed: 0,RESIDENCIA/ORIGEN,Países,Viajeros y pernoctaciones,Total,Año,mes,Nom_mes
0,Total,,Viajero,10018537.0,2025,4,Abril
1,Total,,Viajero,7680722.0,2025,3,Marzo
2,Total,,Viajero,6539960.0,2025,2,Febrero
3,Total,,Viajero,5776075.0,2025,1,Enero
4,Total,,Viajero,6553489.0,2024,12,Diciembre
...,...,...,...,...,...,...,...
20219,Asia (sin Japón),,Pernoctaciones,25427.0,1999,5,Mayo
20220,Asia (sin Japón),,Pernoctaciones,26878.0,1999,4,Abril
20221,Asia (sin Japón),,Pernoctaciones,24271.0,1999,3,Marzo
20222,Asia (sin Japón),,Pernoctaciones,18530.0,1999,2,Febrero


In [7]:
pernoctaciones = df[df["Viajeros y pernoctaciones"].str.contains("Pernoctaciones",na=False)]

In [8]:
pernoctaciones[["RESIDENCIA/ORIGEN","Países","Viajeros y pernoctaciones","Total","Año","mes"]]

Unnamed: 0,RESIDENCIA/ORIGEN,Países,Viajeros y pernoctaciones,Total,Año,mes
316,Total,,Pernoctaciones,29070077.0,2025,4
317,Total,,Pernoctaciones,22344694.0,2025,3
318,Total,,Pernoctaciones,18412444.0,2025,2
319,Total,,Pernoctaciones,16910798.0,2025,1
320,Total,,Pernoctaciones,18033188.0,2024,12
...,...,...,...,...,...,...
20219,Asia (sin Japón),,Pernoctaciones,25427.0,1999,5
20220,Asia (sin Japón),,Pernoctaciones,26878.0,1999,4
20221,Asia (sin Japón),,Pernoctaciones,24271.0,1999,3
20222,Asia (sin Japón),,Pernoctaciones,18530.0,1999,2


In [12]:
# Ensure 'Total' is numeric
pernoctaciones["Total"] = pd.to_numeric(pernoctaciones["Total"], errors='coerce')

# Filter for years greater than 2022
pernoctaciones= pernoctaciones[pernoctaciones["Año"]>2022]



# Identify countries that never reach 2,000,000 pernoctaciones in any month
# First, find the maximum 'Total' for each 'Países'
max_pernoctaciones_per_pais = pernoctaciones.groupby("Países")["Total"].max()

# Get the list of countries where max pernoctaciones is less than 2,000,000
countries_to_group = max_pernoctaciones_per_pais[max_pernoctaciones_per_pais < 1000].index.tolist()

# Replace the 'Países' name for these identified countries with "conjunto"
pernoctaciones["Países_Agrupado"] = pernoctaciones["Países"].apply(
    lambda x: "conjunto" if x in countries_to_group else x
)



# Combine 'Año' and 'mes' into a single date-like column for better time series plotting
# Assuming 'mes' is a month number (1-12)
pernoctaciones["Fecha"] = pd.to_datetime(pernoctaciones["Año"].astype(str) + "-" + pernoctaciones["mes"].astype(str) + "-01")


# Aggregate the data by 'Fecha' and the new 'Países_Agrupado' column
# This is crucial to sum the 'Total' for the "conjunto" group
pernoctaciones_grouped = pernoctaciones.groupby(["Fecha", "Países_Agrupado"])["Total"].sum().reset_index()

# Sort by date for correct line plot order
pernoctaciones_grouped = pernoctaciones_grouped.sort_values(by="Fecha")




'''
# Sort by date for correct line plot order
pernoctaciones = pernoctaciones.sort_values(by="Fecha")
'''
# Define the custom green color palette
green_colors = [
    "#004d00",  # dark green
    "#237a23",
    "#4ca64c",
    "#80c080",
    "#b3d9b3",
    "#99cc99",
    "#66b366",
    "#339933",
    "#1a801a"
]


# Create the line plot using the grouped DataFrame
fig = px.line(
    pernoctaciones_grouped,
    x="Fecha",                  # X-axis: Time (Año and mes combined)
    y="Total",                  # Y-axis: Total pernoctaciones
    color="Países_Agrupado",    # Different lines for each 'Países_Agrupado'
    line_group="Países_Agrupado", # Ensures lines are drawn correctly for each group
    hover_name="Países_Agrupado", # Display 'Países_Agrupado' on hover
    hover_data={                # Additional data to show on hover
        "Total": True,
        "Fecha": False          # Hide the combined 'Fecha' from hover
    },
    title="Total de Pernoctaciones por País (Agrupado) a lo Largo del Tiempo (Año > 2022)", # Plot title
    color_discrete_sequence=green_colors, # Apply the custom green colors
    labels={                    # Axis labels
        "Fecha": "Fecha",
        "Total": "Total de Pernoctaciones",
        "Países_Agrupado": "Países/Conjunto"
    }
)

# Customize the plot layout for better readability
fig.update_layout(
    xaxis_title="Fecha",
    yaxis_title="Total de Pernoctaciones",
    legend_title="Países/Conjunto",
    hovermode="x unified" # Unify hover information across all lines at a given x-value
)

# Show the plot
fig.show()

'''# Export the plot to an HTML file
fig.write_html("pernoctaciones_grafico.html")
print("El gráfico se ha exportado a 'pernoctaciones_grafico.html'")
'''


'''
# Create the line plot
fig = px.line(
    pernoctaciones,
    x="Fecha",                  # X-axis: Time (Año and mes combined)
    y="Total",                  # Y-axis: Total pernoctaciones
    color="Países",             # Different lines for each 'Países'
    line_group="Países",        # Ensures lines are drawn correctly for each group
    hover_name="Países",        # Display 'Países' on hover
    hover_data={                # Additional data to show on hover
        "RESIDENCIA/ORIGEN": True,
        "Total": True,
        "Año": True,
        "mes": True,
        "Fecha": False          # Hide the combined 'Fecha' from hover
    },
    title="Total de Pernoctaciones por País a lo Largo del Tiempo", # Plot title
    color_discrete_sequence=green_colors, # Apply the custom green colors
    labels={                    # Axis labels
        "Fecha": "Fecha",
        "Total": "Total de Pernoctaciones",
        "Países": "Países"
    }
)

# Customize the plot layout for better readability
fig.update_layout(
    xaxis_title="Fecha",
    yaxis_title="Total de Pernoctaciones",
    legend_title="Países",
    hovermode="x unified" # Unify hover information across all lines at a given x-value
)

# Show the plot
fig.show()
'''

'\n# Create the line plot\nfig = px.line(\n    pernoctaciones,\n    x="Fecha",                  # X-axis: Time (Año and mes combined)\n    y="Total",                  # Y-axis: Total pernoctaciones\n    color="Países",             # Different lines for each \'Países\'\n    line_group="Países",        # Ensures lines are drawn correctly for each group\n    hover_name="Países",        # Display \'Países\' on hover\n    hover_data={                # Additional data to show on hover\n        "RESIDENCIA/ORIGEN": True,\n        "Total": True,\n        "Año": True,\n        "mes": True,\n        "Fecha": False          # Hide the combined \'Fecha\' from hover\n    },\n    title="Total de Pernoctaciones por País a lo Largo del Tiempo", # Plot title\n    color_discrete_sequence=green_colors, # Apply the custom green colors\n    labels={                    # Axis labels\n        "Fecha": "Fecha",\n        "Total": "Total de Pernoctaciones",\n        "Países": "Países"\n    }\n)\n\n# Customize t