In [1]:
# libraries we're gonna use
import numpy as np
import pandas as pd
import seaborn as sns
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
from sklearn.preprocessing import PolynomialFeatures
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.linear_model import LinearRegression
from sklearn.feature_selection import SelectKBest, mutual_info_classif
from sklearn.neighbors import KNeighborsRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.linear_model import Ridge, Lasso
from sklearn.model_selection import GridSearchCV
from sklearn.pipeline import Pipeline
from sklearn.metrics import mean_squared_error, make_scorer, r2_score
import matplotlib.ticker as ticker

In [None]:
# initial population in 2024

edades = np.array([f'{i}-{i+4}' for i in range(0, 101, 5)])

hombres = [929686,
           1150375,
           1296012,
           1273951,
           1242864,
           1283120,
           1384020,
           1563840,
           1923796,
           2019607,
           1869683,
           1719869,
           1496882,
           1211902,
           1028803,
           814281,
           514750,
           365040,
           143322,
           29196,
           4382]

mujeres = [880082,
           1086536,
           1221361,
           1197648,
           1173169,
           1242519,
           1367915,
           1568488,
           1892599,
           1977575,
           1869996,
           1773166,
           1591651,
           1336099,
           1196824,
           1026618,
           742036,
           639505,
           320933,
           90062,
           15257]

In [None]:
# create df for plotting the population pyramid
data = {'edades': edades[::-1], 
        'hombres': hombres[::-1], 
        'mujeres': mujeres[::-1]}

df = pd.DataFrame(data)

In [None]:
df

In [None]:
#reescale the data
df['hombres'] = df['hombres'] / -1000
df['mujeres'] = df['mujeres'] / 1000

In [None]:
#plot population pyramid
ax1 = sns.barplot(x='hombres', y='edades', data=df)
ax2 = sns.barplot(x='mujeres', y='edades', data=df)
plt.title("Population pyramid for España, 2024 estimates")
plt.xlabel("Hombres        |        Mujeres")
plt.grid()
plt.xticks(ticks=[-2000, -1000, 0, 1000, 2000],
labels=['2M', '1M', '0', '1M', '2M'])

In [None]:
# extract info from the proyected mortality excel
pd.read_excel('36774.xlsx').iloc[5:105,3]

In [None]:
mort_hombres = pd.read_excel('36774.xlsx').iloc[5:105,0:49]
ages         = mort_hombres.columns[0]

In [None]:
# plot mortality in a given year, setting age boundaries
# 2024 <= i <= 2071

plt.grid()

i    = 2025
años = 60
v = np.array(mort_hombres.iloc[2:años+3,2072-i])
plt.plot(np.arange(0,años+1,1), v, label = str(i))


i    = 2050
años = 60
v = np.array(mort_hombres.iloc[2:años+3,2072-i])
plt.plot(np.arange(0,años+1,1), v, label= str(i))

i    = 2070
años = 60
v = np.array(mort_hombres.iloc[2:años+3,2072-i])
plt.plot(np.arange(0,años+1,1), v, label= str(i))

plt.legend()
plt.show()


In [None]:
# same plot as avobe but using bars
plt.figure(figsize=(15, 6))

v = [2030,2060,2024,2045]

for año in sorted(v):

    i    = año
    años = 60
    prob = np.array(mort_hombres.iloc[2:años+3,2072-i])
    edad = np.arange(0,años+1,1)

    df = pd.DataFrame({
        'edad': edad,
        'muertes/1000': prob
    })

    sns.barplot(x='edad', y='muertes/1000',data=df, label = i)


# Agregar títulos y etiquetas
plt.title('Histograma de defunciones por cada 1000 habitantes')
plt.xlabel('edad')
plt.ylabel('muertes/1000')
plt.legend()
# Mostrar el histograma
plt.xticks(rotation=90)  # Rotar etiquetas del eje x si es necesario
plt.grid(axis='y', linestyle='--', alpha=0.7)

plt.show()