In [1]:
import requests
import pandas as pd

# Lista de URLs de los archivos Excel
urls = [
    'https://datos.sonora.gob.mx/dataset/092f6ac2-2714-44b3-a18a-bdca7893bb73/resource/b69e3632-2484-4aa3-a344-b6c1b6a93923/download/ganaderia_sonora_-2012.xlsx',
    'https://datos.sonora.gob.mx/dataset/092f6ac2-2714-44b3-a18a-bdca7893bb73/resource/c7055978-7577-4b1a-af1a-7e9f124ceb35/download/ganaderia_sonora_-2014.xlsx',
    'https://datos.sonora.gob.mx/dataset/092f6ac2-2714-44b3-a18a-bdca7893bb73/resource/b79aaf86-ed01-4e89-9fd9-e8f071a3bed4/download/ganaderia_sonora_-2015.xlsx',
    'https://datos.sonora.gob.mx/dataset/092f6ac2-2714-44b3-a18a-bdca7893bb73/resource/e4024afc-3ee8-4f84-900c-098b184e64ca/download/ganaderia_sonora_-2016.xlsx',
    'https://datos.sonora.gob.mx/dataset/092f6ac2-2714-44b3-a18a-bdca7893bb73/resource/0662440f-224a-4d53-8dad-746441053e03/download/ganaderia_sonora_-2017.xlsx',
    'https://datos.sonora.gob.mx/dataset/092f6ac2-2714-44b3-a18a-bdca7893bb73/resource/360b50f3-e1c7-43d5-a5e6-5f765c46ec5a/download/ganaderia_sonora_-2018.xlsx',
    'https://datos.sonora.gob.mx/dataset/092f6ac2-2714-44b3-a18a-bdca7893bb73/resource/5042b5e0-7c56-453b-8535-1fc479b4f867/download/ganaderia_sonora_-2021.xlsx',
    'https://datos.sonora.gob.mx/dataset/092f6ac2-2714-44b3-a18a-bdca7893bb73/resource/148571ca-009e-4cc2-84ee-b38484ca8fc7/download/ganaderia_sonora_-2022.xlsx',
]

# Valores para filtrar
valores_cmun = [1, 6, 13, 14, 19, 34, 53, 66]

# Lista para almacenar DataFrames filtrados
dataframes_filtrados = []

# Bucle para descargar y procesar cada archivo
for url in urls:
    # Extraer el año del nombre del archivo
    año = url.split('-')[-1].split('.')[0]
    nombre_archivo = f'ganaderia_{año}.xlsx'

    # Realizar la solicitud GET
    respuesta = requests.get(url)

    # Verificar si la solicitud fue exitosa
    if respuesta.status_code == 200:
        # Guardar el contenido del archivo
        with open(nombre_archivo, 'wb') as archivo:
            archivo.write(respuesta.content)
        print(f'Archivo descargado y guardado como {nombre_archivo}')

        # Cargar el archivo Excel en un DataFrame
        ganaderia = pd.read_excel(nombre_archivo)

        # Filtrar la tabla donde CMUN está en la lista especificada
        ganaderia_filtrada = ganaderia[ganaderia['CMUN'].isin(valores_cmun)]

        # Agregar el DataFrame filtrado a la lista
        dataframes_filtrados.append(ganaderia_filtrada)

        # Mostrar la tabla filtrada
        print(f'Tabla filtrada para el año {año}:')
        print(ganaderia_filtrada)

        # Guardar el DataFrame filtrado como CSV
        ganaderia_filtrada.to_csv(f'tabla_filtrada_{año}.csv', index=False)
        print(f'Tabla filtrada guardada como tabla_filtrada_{año}.csv')
    else:
        print(f'Error al descargar el archivo: {respuesta.status_code}')

# Realizar el merge de todos los DataFrames filtrados por 'CMUN'
if dataframes_filtrados:
    ganaderia_merge = pd.concat(dataframes_filtrados, ignore_index=True)

    # Guardar el DataFrame combinado como CSV
    ganaderia_merge.to_csv('ganaderia_merge.csv', index=False)
    print('Merge de todas las tablas guardado como ganaderia_merge.csv')


Archivo descargado y guardado como ganaderia_2012.xlsx
Tabla filtrada para el año 2012:
      ANO                           CIERYAVAN  CDDR             NDDR  CMUN  \
8    2012  CIERRE DE PRODUCCION PECUARIA 2012   142         142 URES    34   
9    2012  CIERRE DE PRODUCCION PECUARIA 2012   142         142 URES    34   
10   2012  CIERRE DE PRODUCCION PECUARIA 2012   142         142 URES    34   
11   2012  CIERRE DE PRODUCCION PECUARIA 2012   142         142 URES    34   
62   2012  CIERRE DE PRODUCCION PECUARIA 2012   142         142 URES     1   
63   2012  CIERRE DE PRODUCCION PECUARIA 2012   142         142 URES     1   
64   2012  CIERRE DE PRODUCCION PECUARIA 2012   142         142 URES     1   
65   2012  CIERRE DE PRODUCCION PECUARIA 2012   142         142 URES     1   
66   2012  CIERRE DE PRODUCCION PECUARIA 2012   142         142 URES     1   
79   2012  CIERRE DE PRODUCCION PECUARIA 2012   142         142 URES     6   
80   2012  CIERRE DE PRODUCCION PECUARIA 2012   142   

In [2]:
import pandas as pd
ganaderia_merge = pd.read_csv('ganaderia_merge.csv')

In [3]:
ganaderia_merge

Unnamed: 0,ANO,CIERYAVAN,CDDR,NDDR,CMUN,NMUN,CVEMES,NMES,CVESPE,ESPECIE,PRODUC,UMED,VOLTON,VALPROD
0,2012,CIERRE DE PRODUCCION PECUARIA 2012,142,142 URES,34,HUEPAC,,,1,BOVINO,CARNE,TONELADAS,235.000,11670.000
1,2012,CIERRE DE PRODUCCION PECUARIA 2012,142,142 URES,34,HUEPAC,,,6,OVINO,CARNE,TONELADAS,3.000,113.000
2,2012,CIERRE DE PRODUCCION PECUARIA 2012,142,142 URES,34,HUEPAC,,,2,BOVINO,LECHE,MILES DE LITROS,291.000,1495.000
3,2012,CIERRE DE PRODUCCION PECUARIA 2012,142,142 URES,34,HUEPAC,,,9,ABEJA,MIEL,TONELADAS,0.570,23.000
4,2012,CIERRE DE PRODUCCION PECUARIA 2012,142,142 URES,1,ACONCHI,,,1,BOVINO,CARNE,TONELADAS,275.000,13471.000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
267,2022,CIERRE DE PRODUCCIÓN PECUARIA 2022,142,URES,66,URES,1.0,ENERO,3,PORCINO,CARNE,TONELADAS,2458.370,107048.000
268,2022,CIERRE DE PRODUCCIÓN PECUARIA 2022,142,URES,66,URES,1.0,ENERO,4,CAPRINO,CARNE,TONELADAS,13.517,1045.610
269,2022,CIERRE DE PRODUCCIÓN PECUARIA 2022,142,URES,66,URES,1.0,ENERO,6,OVINO,CARNE,TONELADAS,37.901,2941.090
270,2022,CIERRE DE PRODUCCIÓN PECUARIA 2022,142,URES,66,URES,1.0,ENERO,7,AVE,HUEVO,TONELADAS,1406.420,34561.172


In [4]:
!pip install -U ydata-profiling

Collecting ydata-profiling
  Downloading ydata_profiling-4.10.0-py2.py3-none-any.whl.metadata (20 kB)
Collecting visions<0.7.7,>=0.7.5 (from visions[type_image_path]<0.7.7,>=0.7.5->ydata-profiling)
  Downloading visions-0.7.6-py3-none-any.whl.metadata (11 kB)
Collecting htmlmin==0.1.12 (from ydata-profiling)
  Downloading htmlmin-0.1.12.tar.gz (19 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting phik<0.13,>=0.11.1 (from ydata-profiling)
  Downloading phik-0.12.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (5.6 kB)
Collecting multimethod<2,>=1.4 (from ydata-profiling)
  Downloading multimethod-1.12-py3-none-any.whl.metadata (9.6 kB)
Collecting imagehash==4.3.1 (from ydata-profiling)
  Downloading ImageHash-4.3.1-py2.py3-none-any.whl.metadata (8.0 kB)
Collecting dacite>=1.8 (from ydata-profiling)
  Downloading dacite-1.8.1-py3-none-any.whl.metadata (15 kB)
Collecting PyWavelets (from imagehash==4.3.1->ydata-profiling)
  Downloading pywavelets-1.

In [5]:
from ydata_profiling import ProfileReport
ganaderia_profile = ProfileReport(ganaderia_merge, title="Reporte de ganaderia")


In [6]:
ganaderia_profile.to_notebook_iframe()

Summarize dataset:   0%|          | 0/5 [00:00<?, ?it/s]

Generate report structure:   0%|          | 0/1 [00:00<?, ?it/s]

Render HTML:   0%|          | 0/1 [00:00<?, ?it/s]

In [7]:
ganaderia_profile.to_file("ganaderia_profilling_report.html")

Export report to file:   0%|          | 0/1 [00:00<?, ?it/s]

In [8]:
!pip install sweetviz

Collecting sweetviz
  Downloading sweetviz-2.3.1-py3-none-any.whl.metadata (24 kB)
Downloading sweetviz-2.3.1-py3-none-any.whl (15.1 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m15.1/15.1 MB[0m [31m54.0 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: sweetviz
Successfully installed sweetviz-2.3.1


In [9]:
import sweetviz as sv
ganaderia_sv = sv.analyze(ganaderia_merge)

                                             |          | [  0%]   00:00 -> (? left)

In [10]:
ganaderia_sv.show_html(
    filepath='SWEETVIZ_REPORT.html',
    open_browser=True,
    layout='widescreen'
)

Report SWEETVIZ_REPORT.html was generated! NOTEBOOK/COLAB USERS: the web browser MAY not pop up, regardless, the report IS saved in your notebook/colab files.


In [11]:
!pip install summarytools

Collecting summarytools
  Downloading summarytools-0.3.0-py3-none-any.whl.metadata (3.5 kB)
Collecting jedi>=0.16 (from ipython>=7.20.0->summarytools)
  Using cached jedi-0.19.1-py2.py3-none-any.whl.metadata (22 kB)
Downloading summarytools-0.3.0-py3-none-any.whl (12 kB)
Using cached jedi-0.19.1-py2.py3-none-any.whl (1.6 MB)
Installing collected packages: jedi, summarytools
Successfully installed jedi-0.19.1 summarytools-0.3.0


In [12]:
from summarytools import dfSummary
dfSummary(ganaderia_merge)

No,Variable,Stats / Values,Freqs / (% of Valid),Graph,Missing
1,ANO [int64],1. 2018 2. 2021 3. 2022 4. 2015 5. 2017 6. 2016 7. 2012 8. 2014,36 (13.2%) 36 (13.2%) 36 (13.2%) 34 (12.5%) 34 (12.5%) 33 (12.1%) 32 (11.8%) 31 (11.4%),,0 (0.0%)
2,CIERYAVAN [object],1. CIERRE DE PRODUCCION PECUARIA 2. CIERRE DE PRODUCCIÓN PECUARIA 3. CIERRE DE PRODUCCIÓN PECUARIA 4. CIERRE DE PRODUCCION PECUARIA 5. CIERRE DE PRODUCCION PECUARIA 6. CIERRE DE PRODUCCIÓN PECUARIA 7. CIERRE DE PRODUCCION PECUARIA 8. CIERRE DE PRODUCCION PECUARIA,36 (13.2%) 36 (13.2%) 36 (13.2%) 34 (12.5%) 34 (12.5%) 33 (12.1%) 32 (11.8%) 31 (11.4%),,0 (0.0%)
3,CDDR [int64],1. 142 2. 141,259 (95.2%) 13 (4.8%),,0 (0.0%)
4,NDDR [object],1. URES 2. 142 URES 3. AGUA PRIETA 4. 141 AGUA PRIETA,198 (72.8%) 61 (22.4%) 11 (4.0%) 2 (0.7%),,0 (0.0%)
5,CMUN [int64],1. 66 2. 6 3. 1 4. 14 5. 34 6. 13 7. 53 8. 19,51 (18.8%) 41 (15.1%) 40 (14.7%) 40 (14.7%) 38 (14.0%) 32 (11.8%) 17 (6.2%) 13 (4.8%),,0 (0.0%)
6,NMUN [object],1. URES 2. ARIZPE 3. ACONCHI 4. BAVIACORA 5. HUEPAC 6. BANAMICHI 7. SAN FELIPE DE JESUS 8. CANANEA,51 (18.8%) 41 (15.1%) 40 (14.7%) 40 (14.7%) 38 (14.0%) 32 (11.8%) 17 (6.2%) 13 (4.8%),,0 (0.0%)
7,CVEMES [float64],1. 12.0 2. 1.0 3. nan,137 (50.4%) 72 (26.5%) 63 (23.2%),,63 (23.2%)
8,NMES [object],1. DICIEMBRE 2. ENERO 3. nan,137 (50.4%) 72 (26.5%) 63 (23.2%),,63 (23.2%)
9,CVESPE [int64],1. 1 2. 2 3. 6 4. 3 5. 9 6. 7 7. 4,64 (23.5%) 56 (20.6%) 53 (19.5%) 46 (16.9%) 39 (14.3%) 8 (2.9%) 6 (2.2%),,0 (0.0%)
10,ESPECIE [object],1. BOVINO 2. OVINO 3. PORCINO 4. ABEJA 5. AVE 6. CAPRINO 7. AVE,120 (44.1%) 53 (19.5%) 46 (16.9%) 39 (14.3%) 7 (2.6%) 6 (2.2%) 1 (0.4%),,0 (0.0%)


In [13]:
!pip install autoviz

Collecting autoviz
  Downloading autoviz-0.1.905-py3-none-any.whl.metadata (14 kB)
Collecting emoji (from autoviz)
  Downloading emoji-2.13.2-py3-none-any.whl.metadata (5.8 kB)
Collecting pyamg (from autoviz)
  Downloading pyamg-5.2.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (8.1 kB)
Collecting xgboost<1.7,>=0.82 (from autoviz)
  Downloading xgboost-1.6.2-py3-none-manylinux2014_x86_64.whl.metadata (1.8 kB)
Collecting pandas-dq>=1.29 (from autoviz)
  Downloading pandas_dq-1.29-py3-none-any.whl.metadata (19 kB)
Collecting hvplot>=0.9.2 (from autoviz)
  Downloading hvplot-0.11.0-py3-none-any.whl.metadata (15 kB)
Collecting matplotlib>3.7.4 (from autoviz)
  Downloading matplotlib-3.9.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (11 kB)
Downloading autoviz-0.1.905-py3-none-any.whl (67 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m67.5/67.5 kB[0m [31m1.4 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading hvplot-0.11.0-py3

In [14]:
%matplotlib inline

from autoviz import AutoViz_Class
AV = AutoViz_Class()
dft = AV.AutoViz(ganaderia_merge, chart_format ='png')

Imported v0.1.905. Please call AutoViz in this sequence:
    AV = AutoViz_Class()
    %matplotlib inline
    dfte = AV.AutoViz(filename, sep=',', depVar='', dfte=None, header=0, verbose=1, lowess=False,
               chart_format='svg',max_rows_analyzed=150000,max_cols_analyzed=30, save_plot_dir=None)
Shape of your Data Set loaded: (272, 14)
#######################################################################################
######################## C L A S S I F Y I N G  V A R I A B L E S  ####################
#######################################################################################
Classifying variables in data set...
    Number of Numeric Columns =  2
    Number of Integer-Categorical Columns =  2
    Number of String-Categorical Columns =  5
    Number of Factor-Categorical Columns =  0
    Number of String-Boolean Columns =  2
    Number of Numeric-Boolean Columns =  2
    Number of Discrete String Columns =  0
    Number of NLP String Columns =  0
    Number of 

Unnamed: 0,Data Type,Missing Values%,Unique Values%,Minimum Value,Maximum Value,DQ Issue
ANO,int64,0.0,2.0,2012.0,2022.0,Possible date-time colum: transform before modeling step.
CIERYAVAN,object,0.0,2.0,,,No issue
CDDR,int64,0.0,0.0,141.0,142.0,No issue
NDDR,object,0.0,1.0,,,1 rare categories: ['141 AGUA PRIETA']. Group them into a single category or drop the categories.
CMUN,int64,0.0,2.0,1.0,66.0,No issue
NMUN,object,0.0,2.0,,,No issue
CVEMES,float64,23.161765,0.0,,,"63 missing values. Impute them with mean, median, mode, or a constant value such as 123."
NMES,object,23.161765,0.0,,,"63 missing values. Impute them with mean, median, mode, or a constant value such as 123., Mixed dtypes: has 2 different data types: float, object,"
CVESPE,int64,0.0,2.0,1.0,9.0,No issue
ESPECIE,object,0.0,2.0,,,1 rare categories: ['AVE ']. Group them into a single category or drop the categories.


Number of All Scatter Plots = 3
All Plots done
Time to run AutoViz = 9 seconds 

 ###################### AUTO VISUALIZATION Completed ########################


In [15]:
!pip install dtale

Collecting dtale
  Downloading dtale-3.14.1-py2.py3-none-any.whl.metadata (17 kB)
Collecting dash-colorscales (from dtale)
  Downloading dash_colorscales-0.0.4.tar.gz (62 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m62.3/62.3 kB[0m [31m2.6 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting dash-daq (from dtale)
  Downloading dash_daq-0.5.0.tar.gz (642 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m642.7/642.7 kB[0m [31m13.7 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting Flask-Compress (from dtale)
  Downloading Flask_Compress-1.15-py3-none-any.whl.metadata (8.4 kB)
Collecting kaleido (from dtale)
  Downloading kaleido-0.2.1-py2.py3-none-manylinux1_x86_64.whl.metadata (15 kB)
Collecting squarify (from dtale)
  Downloading squarify-0.4.4-py3-none-any.whl.metadata (600 bytes)
Collecting strsimpy (from dtale)
  Downloading strsimpy-0.2.1-py3-

In [16]:
import dtale.app as dtale_app

#dtale_app.USE_NGROK = True
dtale_app.USE_COLAB = True

In [17]:
import dtale
dtale.show(ganaderia_merge)

https://p5gme3bmyvm-496ff2e9c6d22116-40000-colab.googleusercontent.com/dtale/main/1