In [4]:
import pandas as pd
import plotly_express as px


In [5]:
demographics = pd.read_csv("/Users/fernandosotres/Desktop/PYTHON/Demography/blank-app/world_bank_data.csv")

In [None]:

# Rename columns: keep only the year if column matches the pattern 'YYYY [YRYYYY]'
demographics.columns = [
    col.split(' ')[0] if ' [YR' in col else col
    for col in demographics.columns]

In [13]:
# ...existing code...

# Identify year columns (those that are digits)
year_cols = [col for col in demographics.columns if col.isdigit()]

# Convert to float and round to 2 decimals
demographics[year_cols] = demographics[year_cols].apply(pd.to_numeric, errors='coerce').round(2)

# ...existing code...

In [49]:
print(demographics['Country Name'].unique())


['Africa Eastern and Southern' 'Arab World' 'Central African Republic'
 'East Asia & Pacific' 'European Union' 'High income'
 'Latin America & Caribbean' 'Low income' 'Lower middle income' 'Mexico'
 'Middle East & North Africa' 'Middle income' 'North America'
 'South Africa' 'South Asia' 'Spain' 'Sub-Saharan Africa'
 'Upper middle income' nan]


In [20]:
# Get all unique names in the 'Series Name' column
unique_series_names = demographics['Series Name'].unique().tolist()
# Line the names up in a single column
unique_series_names = pd.Series(unique_series_names)
# erase 9, 10 and 11 lines
unique_series_names = unique_series_names.drop([9, 10, 11]).reset_index(drop=True)
print(unique_series_names)

0                 Death rate, crude (per 1,000 people)
1              Life expectancy at birth, total (years)
2                                        Net migration
3             Fertility rate, total (births per woman)
4                                    Population, total
5             Rural population (% of total population)
6                                     Urban population
7    Total alcohol consumption per capita (liters o...
8                         Population growth (annual %)
dtype: object


In [63]:
# order the data frame by the column 'Country Name'
demographics = demographics.sort_values(by='Country Name')

In [52]:
import plotly.express as px

# Seleccionar columnas de años desde 1974 en adelante
year_cols = [col for col in demographics.columns if col.isdigit() and int(col) >= 1974]

# Lista de nombres a eliminar
to_remove = [
    'Upper middle income', 'Middle income', 'Lower middle income', 'Low income', 'High income'
]

# Filtrar para las dos series de interés y quitar los grupos de ingreso
df_fertility = demographics[
    (demographics['Series Name'] == 'Fertility rate, total (births per woman)') &
    (~demographics['Country Name'].isin(to_remove))
]
df_urban = demographics[
    (demographics['Series Name'] == 'Urban population') &
    (~demographics['Country Name'].isin(to_remove))
]

# Melt para formato largo
fertility_melted = df_fertility.melt(
    id_vars=['Country Name'],
    value_vars=year_cols,
    var_name='Year',
    value_name='Fertility rate, total (births per woman)'
).dropna()

urban_melted = df_urban.melt(
    id_vars=['Country Name'],
    value_vars=year_cols,
    var_name='Year',
    value_name='Urban population'
).dropna()

# Unir ambos dataframes por país y año
merged = pd.merge(
    fertility_melted,
    urban_melted,
    on=['Country Name', 'Year'],
    how='inner'
)

# Graficar dispersión diferenciando por país
# ...existing code...

fig = px.scatter(
    merged,
    x='Fertility rate, total (births per woman)',
    y='Urban population',
    color='Country Name',
    animation_frame='Year',
    title='Índice De Fertilidad vs  Población Urbanizada',
    labels={
        'Fertility rate, total (births per woman)': 'Tasa De Fertilidad (Hijos Por Mujer)',
        'Urban population': 'Población Urbanizada'
    }
)

# Fijar el rango del eje x y y para ver todos los puntos
fig.update_xaxes(range=[0, 7])
fig.update_yaxes(range=[0, merged['Urban population'].max() * 1.05])

# Hacer los puntos más grandes
fig.update_traces(marker=dict(size=12))

# Cambiar la leyenda de "Year=" a "Año="
for frame in fig.frames:
    frame.name = frame.name.replace("Year=", "Año=")
fig.layout.sliders[0].currentvalue.prefix = "Año="

fig.show()

In [53]:
import plotly.express as px

# Seleccionar columnas de años desde 1974 en adelante
year_cols = [col for col in demographics.columns if col.isdigit() and int(col) >= 1974]

# Lista de income groups a mostrar
income_groups = [
    'Upper middle income', 'Middle income', 'Lower middle income', 'Low income', 'High income'
]

# Filtrar para las dos series de interés y solo los grupos de ingreso
df_fertility_income = demographics[
    (demographics['Series Name'] == 'Fertility rate, total (births per woman)') &
    (demographics['Country Name'].isin(income_groups))
]
df_urban_income = demographics[
    (demographics['Series Name'] == 'Urban population') &
    (demographics['Country Name'].isin(income_groups))
]

# Melt para formato largo
fertility_melted_income = df_fertility_income.melt(
    id_vars=['Country Name'],
    value_vars=year_cols,
    var_name='Year',
    value_name='Tasa De Fertilidad (Hijos Por Mujer)'
).dropna()

urban_melted_income = df_urban_income.melt(
    id_vars=['Country Name'],
    value_vars=year_cols,
    var_name='Year',
    value_name='Población Urbanizada'
).dropna()

# Unir ambos dataframes por grupo de ingreso y año
merged_income = pd.merge(
    fertility_melted_income,
    urban_melted_income,
    on=['Country Name', 'Year'],
    how='inner'
)

# Graficar dispersión diferenciando por grupo de ingreso
fig = px.scatter(
    merged_income,
    x='Tasa De Fertilidad (Hijos Por Mujer)',
    y='Población Urbanizada',
    color='Country Name',
    animation_frame='Year',
    title='Índice De Fertilidad vs  Población Urbanizada (Grupos de Ingreso)',
    labels={
        'Tasa De Fertilidad (Hijos Por Mujer)': 'Tasa De Fertilidad (Hijos Por Mujer)',
        'Población Urbanizada': 'Población Urbanizada',
        'Country Name': 'Grupo de Ingreso'
    }
)

fig.update_xaxes(range=[0, 7])
fig.update_yaxes(range=[0, merged_income['Población Urbanizada'].max() * 1.05])
fig.update_traces(marker=dict(size=12))

# Cambiar la leyenda de "Year=" a "Año="
for frame in fig.frames:
    frame.name = frame.name.replace("Year=", "Año=")
fig.layout.sliders[0].currentvalue.prefix = "Año="

fig.show()

In [73]:
import plotly.express as px
import pandas as pd
import itertools

# ...carga y procesamiento de datos como ya tienes...

# Definición de grupos y colores
orden_grupos = [
    "Triplicación",
    "Duplicación",
    "Equilibrio",
    "Bajo El Equilibrio",

]
colores = {
    "Triplicación": "green",
    "Duplicación": "blue",
    "Equilibrio": "orange",
    "Bajo El Equilibrio": "red",
 
}

import itertools

# ...código anterior...

# Agrupar y juntar nombres de países, separados por salto de línea (para texto vertical)
def resumen_paises(lista, max_n=5):
    lista = sorted(lista)
    if len(lista) > max_n:
        return '<br>'.join(lista[:max_n]) + f"<br>y {len(lista)-max_n} más..."
    else:
        return '<br>'.join(lista)

conteo = fertility_long.groupby(['Año', 'Grupo Fertilidad']).agg({
    'Country Name': list,
    'Tasa de Fertilidad': 'count'
}).reset_index().rename(columns={'Tasa de Fertilidad': 'Número de Países'})

conteo['Paises_texto'] = conteo['Country Name'].apply(lambda x: resumen_paises(x, max_n=5))
conteo['Paises_hover'] = conteo['Country Name'].apply(lambda x: ', '.join(sorted(x)))

# --- Asegura que todos los grupos estén presentes cada año ---
all_years = sorted(conteo['Año'].unique(), key=int)
all_combinations = pd.DataFrame(list(itertools.product(all_years, orden_grupos)), columns=['Año', 'Grupo Fertilidad'])
conteo = pd.merge(all_combinations, conteo, on=['Año', 'Grupo Fertilidad'], how='left')
conteo['Número de Países'] = conteo['Número de Países'].fillna(0).astype(int)
conteo['Paises_texto'] = conteo['Paises_texto'].fillna('')
conteo['Paises_hover'] = conteo['Paises_hover'].fillna('')

# ...resto igual...

fig = px.bar(
    conteo.sort_values(['Año', 'Grupo Fertilidad']),
    x='Grupo Fertilidad',
    y='Número de Países',
    color='Grupo Fertilidad',
    animation_frame='Año',
    category_orders={'Grupo Fertilidad': orden_grupos, 'Año': all_years},
    color_discrete_map=colores,
    title='Distribución de países por grupo de tasa de fertilidad a través de los años',
    labels={
        'Grupo Fertilidad': 'Grupo de Tasa de Fertilidad',
        'Número de Países': 'Número de Países',
        'Año': 'Año'
    },
    hover_data=['Paises_hover'],
    text='Paises_texto'
)

fig.update_traces(
    textposition='inside',
    textfont_size=12,
    insidetextanchor='middle'
)

fig.update_yaxes(range=[0, conteo['Número de Países'].max() * 1.35])

fig.show()

In [74]:
import plotly.express as px

# Seleccionar columnas de años desde 1974 en adelante
year_cols = [col for col in demographics.columns if col.isdigit() and int(col) >= 1974]

# Lista de nombres a eliminar
to_remove = [
    'Upper middle income', 'Middle income', 'Lower middle income', 'Low income', 'High income'
]

# --- POR PAÍS ---
# Filtrar para las tres series de interés y quitar los grupos de ingreso
df_fertility = demographics[
    (demographics['Series Name'] == 'Fertility rate, total (births per woman)') &
    (~demographics['Country Name'].isin(to_remove))
]
df_urban = demographics[
    (demographics['Series Name'] == 'Urban population') &
    (~demographics['Country Name'].isin(to_remove))
]
df_total = demographics[
    (demographics['Series Name'] == 'Population, total') &
    (~demographics['Country Name'].isin(to_remove))
]

# Melt para formato largo
fertility_melted = df_fertility.melt(
    id_vars=['Country Name'],
    value_vars=year_cols,
    var_name='Year',
    value_name='Fertility rate, total (births per woman)'
).dropna()

urban_melted = df_urban.melt(
    id_vars=['Country Name'],
    value_vars=year_cols,
    var_name='Year',
    value_name='Urban population'
).dropna()

total_melted = df_total.melt(
    id_vars=['Country Name'],
    value_vars=year_cols,
    var_name='Year',
    value_name='Population, total'
).dropna()

# Unir ambos dataframes por país y año
merged = pd.merge(
    fertility_melted,
    urban_melted,
    on=['Country Name', 'Year'],
    how='inner'
)
merged = pd.merge(
    merged,
    total_melted,
    on=['Country Name', 'Year'],
    how='inner'
)

# Calcular el porcentaje de población urbana
merged['Urban population'] = pd.to_numeric(merged['Urban population'], errors='coerce')
merged['Population, total'] = pd.to_numeric(merged['Population, total'], errors='coerce')
merged['% Urban Population'] = 100 * merged['Urban population'] / merged['Population, total']

fig = px.scatter(
    merged,
    x='Fertility rate, total (births per woman)',
    y='% Urban Population',
    color='Country Name',
    animation_frame='Year',
    title='Índice De Fertilidad vs % Población Urbanizada',
    labels={
        'Fertility rate, total (births per woman)': 'Tasa De Fertilidad (Hijos Por Mujer)',
        '% Urban Population': '% Población Urbanizada'
    }
)
fig.update_xaxes(range=[0, 7])
fig.update_yaxes(range=[0, 100])
fig.update_traces(marker=dict(size=12))
for frame in fig.frames:
    frame.name = frame.name.replace("Year=", "Año=")
fig.layout.sliders[0].currentvalue.prefix = "Año="
fig.show()

# --- POR GRUPO DE INGRESO ---
income_groups = [
    'Upper middle income', 'Middle income', 'Lower middle income', 'Low income', 'High income'
]

df_fertility_income = demographics[
    (demographics['Series Name'] == 'Fertility rate, total (births per woman)') &
    (demographics['Country Name'].isin(income_groups))
]
df_urban_income = demographics[
    (demographics['Series Name'] == 'Urban population') &
    (demographics['Country Name'].isin(income_groups))
]
df_total_income = demographics[
    (demographics['Series Name'] == 'Population, total') &
    (demographics['Country Name'].isin(income_groups))
]

fertility_melted_income = df_fertility_income.melt(
    id_vars=['Country Name'],
    value_vars=year_cols,
    var_name='Year',
    value_name='Tasa De Fertilidad (Hijos Por Mujer)'
).dropna()

urban_melted_income = df_urban_income.melt(
    id_vars=['Country Name'],
    value_vars=year_cols,
    var_name='Year',
    value_name='Urban population'
).dropna()

total_melted_income = df_total_income.melt(
    id_vars=['Country Name'],
    value_vars=year_cols,
    var_name='Year',
    value_name='Population, total'
).dropna()

merged_income = pd.merge(
    fertility_melted_income,
    urban_melted_income,
    on=['Country Name', 'Year'],
    how='inner'
)
merged_income = pd.merge(
    merged_income,
    total_melted_income,
    on=['Country Name', 'Year'],
    how='inner'
)

merged_income['Urban population'] = pd.to_numeric(merged_income['Urban population'], errors='coerce')
merged_income['Population, total'] = pd.to_numeric(merged_income['Population, total'], errors='coerce')
merged_income['% Urban Population'] = 100 * merged_income['Urban population'] / merged_income['Population, total']

fig = px.scatter(
    merged_income,
    x='Tasa De Fertilidad (Hijos Por Mujer)',
    y='% Urban Population',
    color='Country Name',
    animation_frame='Year',
    title='Índice De Fertilidad vs % Población Urbanizada (Grupos de Ingreso)',
    labels={
        'Tasa De Fertilidad (Hijos Por Mujer)': 'Tasa De Fertilidad (Hijos Por Mujer)',
        '% Urban Population': '% Población Urbanizada (%)',
        'Country Name': 'Grupo de Ingreso'
    }
)
fig.update_xaxes(range=[0, 7])
fig.update_yaxes(range=[0, 100])
fig.update_traces(marker=dict(size=12))
for frame in fig.frames:
    frame.name = frame.name.replace("Year=", "Año=")
fig.layout.sliders[0].currentvalue.prefix = "Año="
fig.show()