# 1 Descarga de datos

Notebook dedicado al proceso de descarga de los datos de listados de productores autorizados del Programa de Fertilizantes para el  Bienestar 2023  


In [1]:
import requests
from bs4 import BeautifulSoup
import os
import pandas as pd

In [2]:
def scrape_urls(url):
    response = requests.get(url)
    soup = BeautifulSoup(response.text, 'html.parser')

    urls = []
    for link in soup.find_all('a'):
        href = link.get('href')
        if href and href.startswith('http') and href.endswith('.csv'):
            urls.append(href)

    return urls

def scrape_xlsx(url):
    response = requests.get(url)
    soup = BeautifulSoup(response.text, 'html.parser')

    urls = []
    for link in soup.find_all('a'):
        href = link.get('href')
        if href and href.startswith('http') and href.endswith('.xlsx'):
            urls.append(href)

    return urls

def download_datasets(urls, destination_folder):
    failed_urls = []
    failed_count = 0
    good_urls = []
    good_count = 0
    for i, url in enumerate(urls):
        response = requests.get(url)
        if response.status_code == 200:
            # Create a file path based on the URL
            filename = url.split("/")[-1]
            destination_path = f"{destination_folder}/{filename}"
            with open(destination_path, 'wb') as file:
                file.write(response.content)
            print(f"Dataset {i+1} downloaded successfully.\n")
            good_urls.append(url)
            good_count += 1
        else:
            print(f"Failed to download dataset {i+1}. URL: {url}\n")
            failed_count += 1
            failed_urls.append(url)
    print("====DOWNLOAD RESULTS====")
    print(f"Failed count: {failed_count}")
    print(",".join(failed_urls))
    print(f" \n\nSuccessfully count {good_count}")
    print(",".join(good_urls))

def convert_xlsx_to_csv_in_directory(directory_path):
    # List all .xlsx files in the directory
    xlsx_files = [f for f in os.listdir(directory_path) if f.endswith('.xlsx')]
    total_files = len(xlsx_files)
    
    if total_files == 0:
        return
    
    for index, filename in enumerate(xlsx_files, start=1):
        # Construct full file path
        file_path = os.path.join(directory_path, filename)
        # Read the .xlsx file
        df = pd.read_excel(file_path)
        # Construct the .csv filename
        csv_filename = filename.replace('.xlsx', '.csv')
        csv_file_path = os.path.join(directory_path, csv_filename)
        # Save as .csv
        df.to_csv(csv_file_path, index=False)
        print(f"Converted {filename} to {csv_filename}")

### 1.1 Productores Autorizados

In [3]:
url = "https://www.datos.gob.mx/busca/dataset/programa-de-fertilizantes-2023-listados-autorizados"
count = 0
urls_autorizados = scrape_urls(url)
for url in urls_autorizados:
    print(f"URL: {url}\n")

URL: https://www.agricultura.gob.mx/sites/default/files/sagarpa/Publicaciones/datos_abiertos/2023/Fertilizantes_autorizados/anexo_unico_2do_listado_autori_fertilizantes_23_mor_convo_2.csv

URL: https://www.agricultura.gob.mx/sites/default/files/sagarpa/Publicaciones/datos_abiertos/2023/Fertilizantes_autorizados/anexo_unico_nvoingreso_listado_autori_fertilizantes_23_son.csv

URL: https://www.agricultura.gob.mx/sites/default/files/sagarpa/Publicaciones/datos_abiertos/2023/Fertilizantes_autorizados/anexo_unico_nvoingreso_listado_autori_fertilizantes_23_qur.csv

URL: https://www.agricultura.gob.mx/sites/default/files/sagarpa/Publicaciones/datos_abiertos/2023/Fertilizantes_autorizados/anexo_unico_nvoingreso_listado_autori_fertilizantes_23_bcs.csv

URL: https://www.agricultura.gob.mx/sites/default/files/sagarpa/Publicaciones/datos_abiertos/2023/Fertilizantes_autorizados/anexo_unico_1er_listado_autori_fertilizantes_23_coa.csv

URL: https://www.agricultura.gob.mx/sites/default/files/sagarpa/Pu

In [4]:
download_urls_autorizados = []
url = "https://www.datos.gob.mx/busca/dataset/programa-de-fertilizantes-2023-listados-autorizados"
urls = scrape_urls(url)
for url in urls:
    download_urls_autorizados.append(url)

In [5]:
destination_folder = "../../data/productores_autorizados"
download_datasets(download_urls_autorizados, destination_folder)

Dataset 1 downloaded successfully.

Dataset 2 downloaded successfully.

Dataset 3 downloaded successfully.

Dataset 4 downloaded successfully.

Dataset 5 downloaded successfully.

Dataset 6 downloaded successfully.

Dataset 7 downloaded successfully.

Dataset 8 downloaded successfully.

Dataset 9 downloaded successfully.

Dataset 10 downloaded successfully.

Dataset 11 downloaded successfully.

Dataset 12 downloaded successfully.

Dataset 13 downloaded successfully.

Dataset 14 downloaded successfully.

Dataset 15 downloaded successfully.

Dataset 16 downloaded successfully.

Dataset 17 downloaded successfully.

Dataset 18 downloaded successfully.

Dataset 19 downloaded successfully.

Dataset 20 downloaded successfully.

Dataset 21 downloaded successfully.

Dataset 22 downloaded successfully.

Dataset 23 downloaded successfully.

Dataset 24 downloaded successfully.

Dataset 25 downloaded successfully.

Dataset 26 downloaded successfully.

Dataset 27 downloaded successfully.

Dataset 28

### 1.2 Listado Beneficiarios 2023

In [6]:
#Get urls
url = "https://www.datos.gob.mx/busca/dataset/programa-de-fertilizantes-2023-listados-de-beneficiarios"
count = 0
url_beneficiarios = scrape_urls(url)
for url in url_beneficiarios:
    print(f"URL: {url}\n")

URL: https://www.agricultura.gob.mx/sites/default/files/sagarpa/Publicaciones/datos_abiertos/2023/Fertilizantes_beneficiarios/listado_2_beneficiarios_fertilizantes_2023_pe_corte_310324.csv

URL: https://www.agricultura.gob.mx/sites/default/files/sagarpa/Publicaciones/datos_abiertos/2023/Fertilizantes_beneficiarios/listado_4_beneficiarios_fertilizantes_2023_corte_310324.csv

URL: https://www.agricultura.gob.mx/sites/default/files/sagarpa/Publicaciones/datos_abiertos/2023/Fertilizantes_beneficiarios/listado_3_beneficiarios_fertilizantes_2023_corte_091023.csv

URL: https://www.agricultura.gob.mx/sites/default/files/sagarpa/Publicaciones/datos_abiertos/2023/Fertilizantes_beneficiarios/listado_1_beneficiarios_fertilizantes_2023_pe_corte_091023.csv

URL: https://www.agricultura.gob.mx/sites/default/files/sagarpa/Publicaciones/datos_abiertos/2023/Fertilizantes_beneficiarios/listado_2_beneficiarios_fertilizantes_2023_corte_090823.csv

URL: https://www.agricultura.gob.mx/sites/default/files/sag

In [7]:
# Dataset de urls de beneficiarios2023
download_urls_beneficiarios = []
url = "https://www.datos.gob.mx/busca/dataset/programa-de-fertilizantes-2023-listados-de-beneficiarios"
urls = scrape_urls(url)
for url in urls:
    download_urls_beneficiarios.append(url)

In [8]:
destination_folder = "../../data/productores_beneficiarios"

download_datasets(download_urls_beneficiarios, destination_folder)

Dataset 1 downloaded successfully.

Dataset 2 downloaded successfully.

Dataset 3 downloaded successfully.

Dataset 4 downloaded successfully.

Dataset 5 downloaded successfully.

Dataset 6 downloaded successfully.

====DOWNLOAD RESULTS====
Failed count: 0

 

Successfully count 6
https://www.agricultura.gob.mx/sites/default/files/sagarpa/Publicaciones/datos_abiertos/2023/Fertilizantes_beneficiarios/listado_2_beneficiarios_fertilizantes_2023_pe_corte_310324.csv,https://www.agricultura.gob.mx/sites/default/files/sagarpa/Publicaciones/datos_abiertos/2023/Fertilizantes_beneficiarios/listado_4_beneficiarios_fertilizantes_2023_corte_310324.csv,https://www.agricultura.gob.mx/sites/default/files/sagarpa/Publicaciones/datos_abiertos/2023/Fertilizantes_beneficiarios/listado_3_beneficiarios_fertilizantes_2023_corte_091023.csv,https://www.agricultura.gob.mx/sites/default/files/sagarpa/Publicaciones/datos_abiertos/2023/Fertilizantes_beneficiarios/listado_1_beneficiarios_fertilizantes_2023_pe_corte

### 1.3 Listado Beneficiarios 2019-2022

In [9]:
download_urls_beneficiarios_19_22 = []
urls = ["https://datos.gob.mx/busca/dataset/programa-fertilizantes-2019", "https://datos.gob.mx/busca/dataset/programa-fertilizantes-2020", "https://datos.gob.mx/busca/dataset/programa-fertilizantes-2021", "https://datos.gob.mx/busca/dataset/programa-fertilizantes-2022"]
for url in [urls[0], urls[1], urls[2], urls[3]]:
    download_urls_beneficiarios_19_22 += scrape_xlsx(url)

In [10]:
destination_folder = "../../data/productores_beneficiarios 2019-2022"

download_datasets(download_urls_beneficiarios_19_22, destination_folder)

convert_xlsx_to_csv_in_directory("../../data/productores_beneficiarios 2019-2022")

Dataset 1 downloaded successfully.

Dataset 2 downloaded successfully.

Dataset 3 downloaded successfully.

Dataset 4 downloaded successfully.

====DOWNLOAD RESULTS====
Failed count: 0

 

Successfully count 4
https://www.agricultura.gob.mx/sites/default/files/sagarpa/Publicaciones/datos_abiertos/2019_PEC/Fertilizantes/fertilizantes_2019.xlsx,https://www.agricultura.gob.mx/sites/default/files/sagarpa/Publicaciones/datos_abiertos/2020/Fertilizantes_rdc/listado_beneficiarios_fertilizantes_2020.xlsx,https://www.agricultura.gob.mx/sites/default/files/sagarpa/Publicaciones/datos_abiertos/2021/Fertilizantes_rdc/listado_beneficiarios_fertilizantes_2021.xlsx,https://www.agricultura.gob.mx/sites/default/files/sagarpa/Publicaciones/datos_abiertos/2022/Fertilizantes_rdc/listado_beneficiarios_fertilizantes_2022.xlsx
Converted fertilizantes_2019.xlsx to fertilizantes_2019.csv
Converted listado_beneficiarios_fertilizantes_2020.xlsx to listado_beneficiarios_fertilizantes_2020.csv
Converted listado_be