In [310]:
import pandas as pd 
import geopandas as gpd
import numpy as np 
import re

In [311]:
countriesInfo = gpd.read_file('../Data/ne_50m_admin_0_countries/ne_50m_admin_0_countries.shp')

In [312]:
pibStates = pd.read_csv('../Data/Day_28Data/pib_estados.csv')

In [313]:
gdpData =  pd.read_csv('../Data/GDP_per_country_worldbank/API_NY.GDP.MKTP.CD_DS2_en_csv_v2_9865.csv', skiprows=4)

In [314]:
mapStates = gpd.read_file('../Data/dest23gw_c/dest23cw.shp', encoding='utf-8')

In [315]:
gdpData.columns = [x.lower().replace(' ', '_') for x in gdpData.columns]
mapStates.columns = [x.lower()for x in mapStates.columns]

In [316]:
pibStates = pibStates[['estado', '2021']].rename(columns={'2021': 'gdp_state', 'estado':'mexican_state'})

In [317]:
gdpData= gdpData[['country_name', 'country_code', '2021']].rename(columns={'2021': 'gdp_country'})

In [318]:
exchangeRate = pd.read_csv('../Data/Day_28Data/banxico_tipo_de_cambio_2021.csv')

In [319]:
avgExchangeRate = np.mean(exchangeRate['tipo_de_cambio'])
avgExchangeRate

np.float64(20.205254166666666)

In [320]:
pibStates['gdp_state'] = pibStates['gdp_state'].str.replace(',', '')

In [321]:
pibStates['gdp_state'] = pd.to_numeric(pibStates['gdp_state'])

In [322]:
pibStates['gdp_state_dollar'] = (pibStates['gdp_state']/avgExchangeRate)*1e6
pibStates

Unnamed: 0,mexican_state,gdp_state,gdp_state_dollar
0,Aguascalientes,318347,15755650000.0
1,Baja California,923218,45691980000.0
2,Baja California Sur,212813,10532560000.0
3,Campeche,483398,23924370000.0
4,Coahuila de Zaragoza,901801,44632000000.0
5,Colima,145781,7215005000.0
6,Chiapas,379227,18768730000.0
7,Chihuahua,878625,43484980000.0
8,Ciudad de México,3701686,183204100000.0
9,Durango,301207,14907360000.0


In [323]:
countriesInfo = countriesInfo[['ISO_A2', 'SOV_A3', 'NAME_EN', 'ISO_A3']]
countriesInfo.columns = [x.lower().replace(' ', '_') for x in countriesInfo.columns]


In [324]:
from forex_python.converter import CurrencyRates
c = CurrencyRates()

In [325]:
def get_rates(x):
    rates = c.get_rate('MXN', 'USD', x)
    return rates 

In [326]:
gdpData.head()

Unnamed: 0,country_name,country_code,gdp_country
0,Aruba,ABW,3103184000.0
1,Africa Eastern and Southern,AFE,1086772000000.0
2,Afghanistan,AFG,14266500000.0
3,Africa Western and Central,AFW,845993000000.0
4,Angola,AGO,66505130000.0


In [327]:
def find_closest_country(pib, countries_df):
    differences = abs(countries_df['gdp_country'] - pib)
    closest_country = countries_df.loc[differences.idxmin()]
    return closest_country['country_name'], closest_country['country_code']



In [328]:
def find_closest_country(pib, countries_df):
    # Validar que 'pib' sea numérico
    if not isinstance(pib, (int, float)):
        raise ValueError(f"El valor de 'pib' debe ser un número, pero se recibió: {type(pib)}")
    
    # Validar que 'countries_df' tenga la columna esperada
    if 'gdp_country' not in countries_df.columns or countries_df.empty:
        raise ValueError("El DataFrame 'countries_df' está vacío o no contiene la columna 'gdp_country'.")
    
    # Convertir la columna 'gdp_country' a numérica
    countries_df['gdp_country'] = pd.to_numeric(countries_df['gdp_country'], errors='coerce')
    
    # Eliminar filas con valores NaN en 'gdp_country'
    countries_df = countries_df.dropna(subset=['gdp_country'])
    
    # Validar que haya valores suficientes para comparar
    if countries_df['gdp_country'].nunique() <= 1:
        raise ValueError("No hay suficientes valores únicos en 'gdp_country' para encontrar el país más cercano.")
    
    # Calcular las diferencias absolutas y encontrar el país más cercano
    differences = abs(countries_df['gdp_country'] - pib)
    closest_country = countries_df.loc[differences.idxmin()]
    
    # Retornar el nombre y el código del país
    return closest_country['country_name'], closest_country['country_code'], closest_country['gdp_country']


In [329]:
gdpData= gdpData[gdpData['country_code']!='CSS']

In [330]:
# Asignar país a cada estado
closest_matches = []
for _, row in pibStates.iterrows():
    closest_country, country_code, country_gdp = find_closest_country(row['gdp_state_dollar'], gdpData)
    closest_matches.append({'state': row['mexican_state'], 'country': closest_country, 'country_code': country_code, 'original_gdp': row['gdp_state_dollar'], 'country_gdp': country_gdp})

# Convertir resultados en DataFrame
closest_matches_df = pd.DataFrame(closest_matches)


In [331]:
closest_matches_df

Unnamed: 0,state,country,country_code,original_gdp,country_gdp
0,Aguascalientes,Guinea,GIN,15755650000.0,16091820000.0
1,Baja California,Jordan,JOR,45691980000.0,46296100000.0
2,Baja California Sur,New Caledonia,NCL,10532560000.0,10071350000.0
3,Campeche,Bosnia and Herzegovina,BIH,23924370000.0,23672710000.0
4,Coahuila de Zaragoza,Cameroon,CMR,44632000000.0,44993520000.0
5,Colima,Bermuda,BMU,7215005000.0,7286607000.0
6,Chiapas,Botswana,BWA,18768730000.0,18750950000.0
7,Chihuahua,Cameroon,CMR,43484980000.0,44993520000.0
8,Ciudad de México,Hungary,HUN,183204100000.0,182110000000.0
9,Durango,Niger,NER,14907360000.0,14915000000.0


In [332]:
statesMatches = pd.merge(closest_matches_df, countriesInfo, left_on='country_code', right_on='iso_a3')

In [333]:
statesMatches

Unnamed: 0,state,country,country_code,original_gdp,country_gdp,iso_a2,sov_a3,name_en,iso_a3
0,Aguascalientes,Guinea,GIN,15755650000.0,16091820000.0,GN,GIN,Guinea,GIN
1,Baja California,Jordan,JOR,45691980000.0,46296100000.0,JO,JOR,Jordan,JOR
2,Baja California Sur,New Caledonia,NCL,10532560000.0,10071350000.0,NC,FR1,New Caledonia,NCL
3,Campeche,Bosnia and Herzegovina,BIH,23924370000.0,23672710000.0,BA,BIH,Bosnia and Herzegovina,BIH
4,Coahuila de Zaragoza,Cameroon,CMR,44632000000.0,44993520000.0,CM,CMR,Cameroon,CMR
5,Colima,Bermuda,BMU,7215005000.0,7286607000.0,BM,GB1,Bermuda,BMU
6,Chiapas,Botswana,BWA,18768730000.0,18750950000.0,BW,BWA,Botswana,BWA
7,Chihuahua,Cameroon,CMR,43484980000.0,44993520000.0,CM,CMR,Cameroon,CMR
8,Ciudad de México,Hungary,HUN,183204100000.0,182110000000.0,HU,HUN,Hungary,HUN
9,Durango,Niger,NER,14907360000.0,14915000000.0,NE,NER,Niger,NER


In [334]:
print(statesMatches['state'].unique())

print(mapStates['nomgeo'].unique())


['Aguascalientes' 'Baja California' 'Baja California Sur' 'Campeche'
 'Coahuila de Zaragoza' 'Colima' 'Chiapas' 'Chihuahua' 'Ciudad de México'
 'Durango' 'Guanajuato' 'Guerrero' 'Hidalgo' 'Jalisco' 'México'
 'Michoacán de Ocampo' 'Morelos' 'Nayarit' 'Nuevo León' 'Oaxaca' 'Puebla'
 'Querétaro' 'Quintana Roo' 'San Luis Potosí' 'Sinaloa' 'Sonora' 'Tabasco'
 'Tamaulipas' 'Tlaxcala' 'Veracruz de Ignacio de la Llave' 'Yucatán'
 'Zacatecas']
['Aguascalientes' 'Baja California' 'Baja California Sur' 'Campeche'
 'Coahuila de Zaragoza' 'Colima' 'Chiapas' 'Chihuahua' 'Ciudad de México'
 'Durango' 'Guanajuato' 'Guerrero' 'Hidalgo' 'Jalisco' 'México'
 'Michoacán de Ocampo' 'Morelos' 'Nayarit' 'Nuevo León' 'Oaxaca' 'Puebla'
 'Querétaro' 'Quintana Roo' 'San Luis Potosí' 'Sinaloa' 'Sonora' 'Tabasco'
 'Tamaulipas' 'Tlaxcala' 'Veracruz de Ignacio de la Llave' 'Yucatán'
 'Zacatecas']


In [335]:
statesMatches = pd.merge(statesMatches, mapStates, left_on='state', right_on='nomgeo')

In [336]:
statesMatches['iso_a2'] = statesMatches['iso_a2'].str.lower()
statesMatches['iso_a3'] = statesMatches['iso_a3'].str.lower()


In [337]:
statesMatches = gpd.GeoDataFrame(statesMatches)

In [338]:
statesMatches.to_file('../Data/Day_28Data/pib_estados_mexico_vs_countries.geojson', driver="GeoJSON")

In [95]:
#historicalRates = pd.DataFrame(pd.date_range(start='2021-01-01', end='2021-12-31', freq='D'), columns=['date'])