In [29]:
import pandas as pd
from pymongo import MongoClient
from collections import defaultdict
from urllib.parse import quote_plus
import requests

In [30]:
# Configurar conexión
usuario = "jalope"
contrasena = "admin"
host = "127.0.0.1"
puerto = "27250"

uri = f"mongodb://{quote_plus(usuario)}:{quote_plus(contrasena)}@{host}:{puerto}/?directConnection=true"
client = MongoClient(uri)
db = client["tfm_db"]

In [31]:
df_exp = pd.DataFrame(list(db.EUROSTAT_GOV10A_EXP_GDP_COMPARATIVA.find()))
df_flows = pd.DataFrame(list(db.EUROSTAT_SPENDING_FLOWS.find()))

In [32]:
# 4) Vista previa para confirmar
print("Gasto/Educación base:")
print(df_exp.head())
print("\nFlujos de recaudación:")
print(df_flows.head())

Gasto/Educación base:
                        _id  year  education_pct_gdp  pensions_pct_gdp country
0  685cfc7c9b9ce874eaa20f6e  1995                4.3               6.6      ES
1  685cfc7c9b9ce874eaa20f6f  1996                4.3               6.7      ES
2  685cfc7c9b9ce874eaa20f70  1997                4.2               6.6      ES
3  685cfc7c9b9ce874eaa20f71  1998                4.1               6.5      ES
4  685cfc7c9b9ce874eaa20f72  1999                4.2               6.4      ES

Flujos de recaudación:
                        _id geo time_period  gf01_general_public_services  \
0  685d23d139e248e523f55877  DE        1995                           7.4   
1  685d23d139e248e523f55878  DE        1996                           7.3   
2  685d23d139e248e523f55879  DE        1997                           7.3   
3  685d23d139e248e523f5587a  DE        1998                           7.2   
4  685d23d139e248e523f5587b  DE        1999                           6.9   

   gf07_health  g

In [33]:
df_exp_base = (
    df_exp
      .rename(columns={
         'geo':                'country',
         'time_period':        'year',
         'education_pct_gdp':  'education_pct_gdp',
         'pensions_pct_gdp':   'pensions_pct_gdp'
      })
      [['country','year','education_pct_gdp','pensions_pct_gdp']]
)
df_exp_base['year'] = df_exp_base['year'].astype(int)

In [34]:
tax_cols = ['d51c1','d51c2','d51c3','d51d','d51e','d59','d61','d91']

In [35]:
print("Columnas en spending_flows:", df_flows.columns.tolist())
print("Primer documento:", df_flows.head(1).to_dict(orient='records')[0])

Columnas en spending_flows: ['_id', 'geo', 'time_period', 'gf01_general_public_services', 'gf07_health', 'gf09_education', 'gf1002_old_age', 'd51c1', 'd51c2', 'd51c3', 'd51d', 'd51e', 'd59', 'd61', 'd91', 'inclusion_index', 'urban_rate']
Primer documento: {'_id': ObjectId('685d23d139e248e523f55877'), 'geo': 'DE', 'time_period': '1995', 'gf01_general_public_services': 7.4, 'gf07_health': 6.1, 'gf09_education': 4.1, 'gf1002_old_age': 0.0, 'd51c1': 0.0, 'd51c2': 0.0, 'd51c3': 0.0, 'd51d': 0.0, 'd51e': 0.0, 'd59': 0.0, 'd61': 0.0, 'd91': 0.0, 'inclusion_index': nan, 'urban_rate': nan}


In [36]:
df_flows_base = (
    df_flows
      .rename(columns={
         'geo':         'country',
         'time_period': 'year'
      })
      .assign(
         year = lambda d: d['year'].astype(int),
         revenue_pct_gdp = lambda d: d[tax_cols].sum(axis=1)
      )
      [['country','year','revenue_pct_gdp','inclusion_index','urban_rate']]
)

In [37]:
# 3) Mostrar previews
print("=== Gasto base ===")
print(df_exp_base.head())
print("\n=== Recaudación + Modernización ===")
print(df_flows_base.head())

=== Gasto base ===
  country  year  education_pct_gdp  pensions_pct_gdp
0      ES  1995                4.3               6.6
1      ES  1996                4.3               6.7
2      ES  1997                4.2               6.6
3      ES  1998                4.1               6.5
4      ES  1999                4.2               6.4

=== Recaudación + Modernización ===
  country  year  revenue_pct_gdp  inclusion_index  urban_rate
0      DE  1995              0.0              NaN         NaN
1      DE  1996              0.0              NaN         NaN
2      DE  1997              0.0              NaN         NaN
3      DE  1998              0.0              NaN         NaN
4      DE  1999              0.0              NaN         NaN


In [38]:
df_flows_base.sample()

Unnamed: 0,country,year,revenue_pct_gdp,inclusion_index,urban_rate
76,EU27_2020,2013,0.0,,


In [39]:
df_exp_base['year']

0     1995
1     1996
2     1997
3     1998
4     1999
      ... 
82    2019
83    2020
84    2021
85    2022
86    2023
Name: year, Length: 87, dtype: int64

In [40]:
print("Columnas de df_exp_base:", df_exp_base.columns.tolist())
print("Columnas de df_flow_base:", df_flows_base.columns.tolist())

Columnas de df_exp_base: ['country', 'year', 'education_pct_gdp', 'pensions_pct_gdp']
Columnas de df_flow_base: ['country', 'year', 'revenue_pct_gdp', 'inclusion_index', 'urban_rate']


In [41]:
df_exp_base['year'] = df_exp_base['year'].astype(int)
df_flows_base['year'] = df_flows_base['year'].astype(int)

In [42]:
# 2) Mergear ambos DataFrames
df_mod = pd.merge(df_exp_base, df_flows_base, on=['country','year'], how='inner')

# 3) Calcular variación anual en educación
df_mod = df_mod.sort_values(['country','year'])
df_mod['delta_edu'] = df_mod.groupby('country')['education_pct_gdp'].diff()

# 4) Mostrar un preview
print(df_mod.head(10))

  country  year  education_pct_gdp  pensions_pct_gdp  revenue_pct_gdp  \
0      ES  1995                4.3               6.6              0.0   
1      ES  1996                4.3               6.7              0.0   
2      ES  1997                4.2               6.6              0.0   
3      ES  1998                4.1               6.5              0.0   
4      ES  1999                4.2               6.4              0.0   
5      ES  2000                4.1               6.3              0.0   
6      ES  2001                4.0               6.1              0.0   
7      ES  2002                4.0               6.2              0.0   
8      ES  2003                4.0               6.1              0.0   
9      ES  2004                4.1               6.0              0.0   

   inclusion_index  urban_rate  delta_edu  
0              NaN         NaN        NaN  
1              NaN         NaN        0.0  
2              NaN         NaN       -0.1  
3              NaN  

In [43]:
coll = db['MODERNIZATION_METRICS']
coll.drop()
coll.insert_many(df_mod.to_dict('records'))
print("Registros en la colección: ", coll.count_documents({}))
print("Número de filas de df_raw: ", len(df_mod))

Registros en la colección:  58
Número de filas de df_raw:  58


In [44]:
# 3) Definir rangos y elasticidad
age_list   = [65, 66, 67]
beta_list  = [0.00, 0.05, 0.10]  # incremento relativo en educación
gamma_list = [0.00, 0.05, 0.10]  # ampliación de la base tributaria
alpha      = 0.005              # elasticidad: –0.5% gasto pensiones por año extra

# 4) Generar escenarios
scenarios = []
for A in age_list:
    for γ in gamma_list:
        for β in beta_list:
            scenario_name = f"edad{A}_base{int(γ*100)}_edu{int(β*100)}"
            for _, row in df_mod.iterrows():
                pens = row['pensions_pct_gdp'] * (1 - alpha * (A - 65))
                edu  = row['education_pct_gdp'] * (1 + β)
                rev  = row['revenue_pct_gdp']    * (1 + γ)
                scenarios.append({
                    "country": row['country'],
                    "scenario": scenario_name,
                    "year": row['year'],
                    "pensiones_pct_gdp": pens,
                    "education_pct_gdp": edu,
                    "revenue_pct_gdp": rev,
                    "deficit_pct_gdp": pens + edu - rev
                })

df_scen = pd.DataFrame(scenarios)

# Vista previa
print("Primeros escenarios generados:")
print(df_scen.head(10))

Primeros escenarios generados:
  country           scenario  year  pensiones_pct_gdp  education_pct_gdp  \
0      ES  edad65_base0_edu0  1995                6.6                4.3   
1      ES  edad65_base0_edu0  1996                6.7                4.3   
2      ES  edad65_base0_edu0  1997                6.6                4.2   
3      ES  edad65_base0_edu0  1998                6.5                4.1   
4      ES  edad65_base0_edu0  1999                6.4                4.2   
5      ES  edad65_base0_edu0  2000                6.3                4.1   
6      ES  edad65_base0_edu0  2001                6.1                4.0   
7      ES  edad65_base0_edu0  2002                6.2                4.0   
8      ES  edad65_base0_edu0  2003                6.1                4.0   
9      ES  edad65_base0_edu0  2004                6.0                4.1   

   revenue_pct_gdp  deficit_pct_gdp  
0              0.0             10.9  
1              0.0             11.0  
2              0.0

In [None]:
SCENARIOS_METRICS

In [45]:
coll = db['SCENARIOS_METRICS']
coll.drop()
coll.insert_many(df_scen.to_dict('records'))
print("Registros en la colección: ", coll.count_documents({}))
print("Número de filas de df_scene: ", len(df_scen))

Registros en la colección:  1566
Número de filas de df_scene:  1566
