# Merge operation

In this notebook you will find the implementation to map the codes from inegi dataset to our current dataset.

In [1]:
import pandas as pd

In [2]:
dtype_dict = {
    'CVE_ENT': str,
    'CVE_MUN': str
}

inegi_df = pd.read_csv('../../data/dataset_inegi.csv', encoding='cp1252', dtype=dtype_dict)

In [3]:
inegi_df.columns

Index(['MAPA', 'Estatus', 'CVE_ENT', 'NOM_ENT', 'NOM_ABR', 'CVE_MUN',
       'NOM_MUN', 'CVE_LOC', 'NOM_LOC', 'AMBITO', 'LATITUD', 'LONGITUD',
       'LAT_DECIMAL', 'LON_DECIMAL', 'ALTITUD', 'CVE_CARTA', 'POB_TOTAL',
       'POB_MASCULINA', 'POB_FEMENINA', 'TOTAL DE VIVIENDAS HABITADAS'],
      dtype='object')

In [4]:
inegi_df.drop(columns=['MAPA'])

Unnamed: 0,Estatus,CVE_ENT,NOM_ENT,NOM_ABR,CVE_MUN,NOM_MUN,CVE_LOC,NOM_LOC,AMBITO,LATITUD,LONGITUD,LAT_DECIMAL,LON_DECIMAL,ALTITUD,CVE_CARTA,POB_TOTAL,POB_MASCULINA,POB_FEMENINA,TOTAL DE VIVIENDAS HABITADAS
0,,01,Aguascalientes,Ags.,001,Aguascalientes,1,Aguascalientes,U,"21°52´47.362N""","102°17´45.768W""",21.879822,-102.296046,1878,F13D19,863893,419168,444725,246259
1,,01,Aguascalientes,Ags.,001,Aguascalientes,94,Granja Adelita,R,"21°52´18.749N""","102°22´24.710W""",21.871874,-102.373530,1901,F13D18,5,*,*,2
2,,01,Aguascalientes,Ags.,001,Aguascalientes,96,Agua Azul,R,"21°53´01.522N""","102°21´25.639W""",21.883756,-102.357122,1861,F13D18,41,24,17,12
3,,01,Aguascalientes,Ags.,001,Aguascalientes,100,Rancho Alegre,R,"21°51´16.556N""","102°22´21.884W""",21.854599,-102.372745,1879,F13D18,0,0,0,0
4,,01,Aguascalientes,Ags.,001,Aguascalientes,102,Los Arbolitos [Rancho],R,"21°46´48.650N""","102°21´26.261W""",21.780180,-102.357295,1861,F13D18,8,*,*,2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
299563,,32,Zacatecas,Zac.,058,Santa María de la Paz,39,San Isidro,R,"21°29´11.130N""","103°20´14.164W""",21.486425,-103.337267,2036,F13D35,1,*,*,1
299564,,32,Zacatecas,Zac.,058,Santa María de la Paz,40,San José,R,"21°30´59.539N""","103°27´23.645W""",21.516539,-103.456568,1821,F13D25,0,0,0,0
299565,,32,Zacatecas,Zac.,058,Santa María de la Paz,41,San Miguel Tepetitlán,R,"21°30´15.168N""","103°20´09.356W""",21.504213,-103.335932,1977,F13D25,97,47,50,31
299566,,32,Zacatecas,Zac.,058,Santa María de la Paz,42,San Rafael,R,"21°31´39.341N""","103°22´20.134W""",21.527594,-103.372259,2041,F13D25,3,*,*,1


In [5]:
acreditados_df = pd.read_csv('../../data/productores_autorizados_fertilizantes.csv')

In [6]:
acreditados_df

Unnamed: 0,ESTADO,MUNICIPIO,ACUSE,APELLIDO PATERNO,APELLIDO MATERNO,NOMBRE (S),PAQUETE
0,Aguascalientes,Tepezalá,23-PRONAFE-FERT-006426-S000-AS,VERA,MARTINEZ,ROBERTO,2
1,Aguascalientes,Asientos,23-PRONAFE-FERT-001512-S000-AS,SANCHEZ,BERNAL,LIDIA,1
2,Aguascalientes,Asientos,23-PRONAFE-FERT-001872-S000-AS,SANCHEZ,BERNAL,VICTORIO,2
3,Aguascalientes,Asientos,23-PRONAFE-FERT-001538-S000-AS,SANCHEZ,CAMPOS,EVA,2
4,Aguascalientes,Asientos,23-PRONAFE-FERT-002131-S000-AS,SANCHEZ,CASTILLO,ALFONSO,2
...,...,...,...,...,...,...,...
1525715,Yucatán,San Lorenzo Axocomanitla,23-PRONAFE-FERT-012460-S000-YN,EK,YAH,RAUL,2
1525716,Yucatán,San Lorenzo Axocomanitla,23-PRONAFE-FERT-012459-S000-YN,EK,YAH,MARCELO,2
1525717,Yucatán,San Lorenzo Axocomanitla,23-PRONAFE-FERT-012455-S000-YN,EK,Y YAH,BACILIO,2
1525718,Yucatán,San Lorenzo Axocomanitla,23-PRONAFE-FERT-012489-S000-YN,HUCHIM,MARIN,FELIPE VIDAL,2


In [7]:
# Assuming 'df' is your original DataFrame with 'nom_ent' and 'cve_ent' columns
mapping_dict = inegi_df.set_index('NOM_ENT')['CVE_ENT'].to_dict()

# Create a new column 'cve_ent' in 'acreditados_df' DataFrame
acreditados_df['cve_ent'] = acreditados_df['ESTADO'].map(mapping_dict)

In [8]:
acreditados_df

Unnamed: 0,ESTADO,MUNICIPIO,ACUSE,APELLIDO PATERNO,APELLIDO MATERNO,NOMBRE (S),PAQUETE,cve_ent
0,Aguascalientes,Tepezalá,23-PRONAFE-FERT-006426-S000-AS,VERA,MARTINEZ,ROBERTO,2,01
1,Aguascalientes,Asientos,23-PRONAFE-FERT-001512-S000-AS,SANCHEZ,BERNAL,LIDIA,1,01
2,Aguascalientes,Asientos,23-PRONAFE-FERT-001872-S000-AS,SANCHEZ,BERNAL,VICTORIO,2,01
3,Aguascalientes,Asientos,23-PRONAFE-FERT-001538-S000-AS,SANCHEZ,CAMPOS,EVA,2,01
4,Aguascalientes,Asientos,23-PRONAFE-FERT-002131-S000-AS,SANCHEZ,CASTILLO,ALFONSO,2,01
...,...,...,...,...,...,...,...,...
1525715,Yucatán,San Lorenzo Axocomanitla,23-PRONAFE-FERT-012460-S000-YN,EK,YAH,RAUL,2,31
1525716,Yucatán,San Lorenzo Axocomanitla,23-PRONAFE-FERT-012459-S000-YN,EK,YAH,MARCELO,2,31
1525717,Yucatán,San Lorenzo Axocomanitla,23-PRONAFE-FERT-012455-S000-YN,EK,Y YAH,BACILIO,2,31
1525718,Yucatán,San Lorenzo Axocomanitla,23-PRONAFE-FERT-012489-S000-YN,HUCHIM,MARIN,FELIPE VIDAL,2,31


In [9]:
# Assuming 'df' is your original DataFrame with 'nom_ent' and 'cve_ent' columns
mapping_dict = inegi_df.set_index('NOM_MUN')['CVE_MUN'].to_dict()

# Create a new column 'cve_ent' in 'acreditados_df' DataFrame
acreditados_df['cve_mun'] = acreditados_df['MUNICIPIO'].map(mapping_dict)

In [10]:
acreditados_df

Unnamed: 0,ESTADO,MUNICIPIO,ACUSE,APELLIDO PATERNO,APELLIDO MATERNO,NOMBRE (S),PAQUETE,cve_ent,cve_mun
0,Aguascalientes,Tepezalá,23-PRONAFE-FERT-006426-S000-AS,VERA,MARTINEZ,ROBERTO,2,01,009
1,Aguascalientes,Asientos,23-PRONAFE-FERT-001512-S000-AS,SANCHEZ,BERNAL,LIDIA,1,01,002
2,Aguascalientes,Asientos,23-PRONAFE-FERT-001872-S000-AS,SANCHEZ,BERNAL,VICTORIO,2,01,002
3,Aguascalientes,Asientos,23-PRONAFE-FERT-001538-S000-AS,SANCHEZ,CAMPOS,EVA,2,01,002
4,Aguascalientes,Asientos,23-PRONAFE-FERT-002131-S000-AS,SANCHEZ,CASTILLO,ALFONSO,2,01,002
...,...,...,...,...,...,...,...,...,...
1525715,Yucatán,San Lorenzo Axocomanitla,23-PRONAFE-FERT-012460-S000-YN,EK,YAH,RAUL,2,31,054
1525716,Yucatán,San Lorenzo Axocomanitla,23-PRONAFE-FERT-012459-S000-YN,EK,YAH,MARCELO,2,31,054
1525717,Yucatán,San Lorenzo Axocomanitla,23-PRONAFE-FERT-012455-S000-YN,EK,Y YAH,BACILIO,2,31,054
1525718,Yucatán,San Lorenzo Axocomanitla,23-PRONAFE-FERT-012489-S000-YN,HUCHIM,MARIN,FELIPE VIDAL,2,31,054


In [11]:
final_column_names = {
    'ESTADO': 'estado',
    'MUNICIPIO': 'municipio',
    'ACUSE': 'acuse',
    'APELLIDO PATERNO': 'apellido_paterno',
    'APELLIDO MATERNO': 'apellido_materno',
    'NOMBRE (S)': 'nombre_propio',
    'PAQUETE': 'paquete',    # Add more column names here
}

acreditados_df.rename(columns=final_column_names, inplace=True)

In [12]:
acreditados_df

Unnamed: 0,estado,municipio,acuse,apellido_paterno,apellido_materno,nombre_propio,paquete,cve_ent,cve_mun
0,Aguascalientes,Tepezalá,23-PRONAFE-FERT-006426-S000-AS,VERA,MARTINEZ,ROBERTO,2,01,009
1,Aguascalientes,Asientos,23-PRONAFE-FERT-001512-S000-AS,SANCHEZ,BERNAL,LIDIA,1,01,002
2,Aguascalientes,Asientos,23-PRONAFE-FERT-001872-S000-AS,SANCHEZ,BERNAL,VICTORIO,2,01,002
3,Aguascalientes,Asientos,23-PRONAFE-FERT-001538-S000-AS,SANCHEZ,CAMPOS,EVA,2,01,002
4,Aguascalientes,Asientos,23-PRONAFE-FERT-002131-S000-AS,SANCHEZ,CASTILLO,ALFONSO,2,01,002
...,...,...,...,...,...,...,...,...,...
1525715,Yucatán,San Lorenzo Axocomanitla,23-PRONAFE-FERT-012460-S000-YN,EK,YAH,RAUL,2,31,054
1525716,Yucatán,San Lorenzo Axocomanitla,23-PRONAFE-FERT-012459-S000-YN,EK,YAH,MARCELO,2,31,054
1525717,Yucatán,San Lorenzo Axocomanitla,23-PRONAFE-FERT-012455-S000-YN,EK,Y YAH,BACILIO,2,31,054
1525718,Yucatán,San Lorenzo Axocomanitla,23-PRONAFE-FERT-012489-S000-YN,HUCHIM,MARIN,FELIPE VIDAL,2,31,054


In [13]:
n_unique3 = acreditados_df['cve_mun'].nunique()

In [14]:
n_unique3

560

In [15]:
n_unique4 = acreditados_df['municipio'].nunique()

In [16]:
n_unique4

1684

In [17]:
acreditados_df['estado'] = acreditados_df['estado'].astype(str)
acreditados_df['municipio'] = acreditados_df['municipio'].astype(str)
acreditados_df['acuse'] = acreditados_df['acuse'].astype(str)
acreditados_df['apellido_paterno'] = acreditados_df['apellido_paterno'].astype(str)
acreditados_df['apellido_materno'] = acreditados_df['apellido_materno'].astype(str)
acreditados_df['nombre_propio'] = acreditados_df['nombre_propio'].astype(str)
acreditados_df['paquete'] = acreditados_df['paquete'].astype(int)
acreditados_df['cve_ent'] = acreditados_df['cve_ent'].astype(str)
acreditados_df['cve_mun'] = acreditados_df['cve_mun'].astype(str)

In [18]:
acreditados_df.dtypes

estado              object
municipio           object
acuse               object
apellido_paterno    object
apellido_materno    object
nombre_propio       object
paquete              int64
cve_ent             object
cve_mun             object
dtype: object

In [19]:
acreditados_df.to_csv('../../data/productores_autorizados_final.csv', index=False)