In [131]:
# %pip install pandas
# %pip install matplotlib
# %pip install seaborn
# %pip install sklearn
# %pip install scipy
# %pip install statsmodels
# %pip install tqdm
# %pip install minio

In [132]:
import pandas as pd
import io
from minio import Minio
import json
from IPython.display import display

FILE_NAME = "pistacho.json"
BUCKET_FROM_NAME = "landing-zone"

# Connect to MinIO
minio_client = Minio(
    "localhost:9000",
    access_key="minio",
    secret_key="minio123",
    secure=False
)
# Get data from MinIO
data = minio_client.get_object(
    BUCKET_FROM_NAME, FILE_NAME).read().decode("utf-8")
json_data = json.loads(data)
display(json_data)
df_parcel = pd.DataFrame(json_data['Exploitation Parcel'])
# {'FarmHolder': {'Name': 'PISTACYL S.L',
#    'Identifier_Code': [{'Type': 'N. Registro de Explotaciones Autonomico',
#      'Code': '0847030810'}],
#    'Address': {'Street': 'LAGARTILLOS,2',
#     'Village': 'POZAL DE GALLINAS',
#     'Province': 'VALLADOLID',
#     'ZIP': '47450'},
#    'Phone': ['637812948'],
#    'Email': ''},
#   'HeatlhAdvisor': [{'HAd_Id': '37',
#     'Name': 'NAVAS GARRIDO, VICTOR',
#     'NIF_Code': '12342065N',
#     'ROPO_Code': '07/47/05604',
#     'Carnet Type': 'Basic'}],
df_farm = pd.json_normalize(json_data['Farm'], record_path=['FarmHolder'], meta=['Farm_Id'])
display(df_farm.describe())
df_parcel.describe()

{'Farm': {'FarmHolder': {'Name': 'PISTACYL S.L',
   'Identifier_Code': [{'Type': 'N. Registro de Explotaciones Autonomico',
     'Code': '0847030810'}],
   'Address': {'Street': 'LAGARTILLOS,2',
    'Village': 'POZAL DE GALLINAS',
    'Province': 'VALLADOLID',
    'ZIP': '47450'},
   'Phone': ['637812948'],
   'Email': ''},
  'HeatlhAdvisor': [{'HAd_Id': '37',
    'Name': 'NAVAS GARRIDO, VICTOR',
    'NIF_Code': '12342065N',
    'ROPO_Code': '07/47/05604',
    'Carnet Type': 'Basic'},
   {'HAd_Id': '67',
    'Name': 'PAREDES GARCIA JUAN RAMON',
    'NIF_Code': '12343911H',
    'ROPO_Code': '07/47/05604',
    'Carnet Type': 'Basic'},
   {'HAd_Id': '57',
    'Name': 'PEREZ HERAS,CARLOS',
    'NIF_Code': '12337236J',
    'ROPO_Code': '07/47/01256',
    'Carnet Type': 'Basic'},
   {'HAd_Id': '73',
    'Name': 'VELASCO PEREZ DAVID',
    'NIF_Code': '71181253W',
    'ROPO_Code': '07/47/05604',
    'Carnet Type': 'Basic'}]},
 'HeatlhApplicator': [{'HAp_Id': '42',
   'Description': 'ATOMIZADOR

TypeError: {'FarmHolder': {'Name': 'PISTACYL S.L', 'Identifier_Code': [{'Type': 'N. Registro de Explotaciones Autonomico', 'Code': '0847030810'}], 'Address': {'Street': 'LAGARTILLOS,2', 'Village': 'POZAL DE GALLINAS', 'Province': 'VALLADOLID', 'ZIP': '47450'}, 'Phone': ['637812948'], 'Email': ''}, 'HeatlhAdvisor': [{'HAd_Id': '37', 'Name': 'NAVAS GARRIDO, VICTOR', 'NIF_Code': '12342065N', 'ROPO_Code': '07/47/05604', 'Carnet Type': 'Basic'}, {'HAd_Id': '67', 'Name': 'PAREDES GARCIA JUAN RAMON', 'NIF_Code': '12343911H', 'ROPO_Code': '07/47/05604', 'Carnet Type': 'Basic'}, {'HAd_Id': '57', 'Name': 'PEREZ HERAS,CARLOS', 'NIF_Code': '12337236J', 'ROPO_Code': '07/47/01256', 'Carnet Type': 'Basic'}, {'HAd_Id': '73', 'Name': 'VELASCO PEREZ DAVID', 'NIF_Code': '71181253W', 'ROPO_Code': '07/47/05604', 'Carnet Type': 'Basic'}]} has non list value {'Name': 'PISTACYL S.L', 'Identifier_Code': [{'Type': 'N. Registro de Explotaciones Autonomico', 'Code': '0847030810'}], 'Address': {'Street': 'LAGARTILLOS,2', 'Village': 'POZAL DE GALLINAS', 'Province': 'VALLADOLID', 'ZIP': '47450'}, 'Phone': ['637812948'], 'Email': ''} for path FarmHolder. Must be list or null.

In [None]:
df_parcel.dtypes

Parcel_Id            object
CAP_Code             object
Use                  object
Official_Area        object
Used_Area            object
Crop                 object
Rainfed_Irrigated    object
Protected_Area?      object
dtype: object

In [None]:
# Show missing values
df_parcel.isnull().sum()

Parcel_Id            0
CAP_Code             0
Use                  0
Official_Area        0
Used_Area            0
Crop                 0
Rainfed_Irrigated    0
Protected_Area?      0
dtype: int64

In [None]:
# Melt CAP_Code and Crop columns
display(df_parcel['CAP_Code'][0])
display(df_parcel['Crop'][0])
# Crop column
df_parcel['Crop_Species'] = df_parcel['Crop'].apply(
    lambda x: x['Species'])
df_parcel['Crop_Variety'] = df_parcel['Crop'].apply(
    lambda x: x['Variety'])
# Drop Crop column
df_parcel.drop(columns=['Crop'], inplace=True)
df_parcel
# CAP_Code column
df_parcel['CAP_Code'] = df_parcel['CAP_Code'].apply(
    lambda x: f'{x["Province"]["Province_Code"]}-{x["Municipality"]["Municipality_Code"]}-{x["Agregate"]}-{x["Zone"]}-{x["Polygon"]}-{x["Parcel"]}-{x["Enclosure"]}')
# Put 0s between tow -
df_parcel['CAP_Code'] = df_parcel['CAP_Code'].apply(
    lambda x: x.replace('---', '-0-0-'))
df_parcel

{'Province': {'Province_Code': '47', 'Province_Name': ''},
 'Municipality': {'Municipality_Code': '96',
  'Municipality_Name': 'MORALEJA DE LAS PANADERAS'},
 'Agregate': '',
 'Zone': '',
 'Polygon': '5',
 'Parcel': '20',
 'Enclosure': '1'}

{'Species': 'PISTACHO', 'Variety': 'KERMAN'}

Unnamed: 0,Parcel_Id,CAP_Code,Use,Official_Area,Used_Area,Rainfed_Irrigated,Protected_Area?,Crop_Species,Crop_Variety
0,29,47-96-0-0-5-20-1,FS,13.63,13.63,Irrigated,N,PISTACHO,KERMAN
1,27,47-124-0-0-5-5170-2,TA,0.73,0.73,Irrigated,N,PISTACHO,KERMAN
2,27,47-124-0-0-4-560-1,FS,2.14,2.14,Irrigated,N,PISTACHO,KERMAN
3,26,47-124-0-0-5-585-2,VI,1.46,1.46,Irrigated,N,PISTACHO,KERMAN
4,26,47-124-0-0-5-5221-1,FS,3.4,3.4,Irrigated,N,PISTACHO,KERMAN
5,26,47-124-0-0-5-5226-1,FS,1.12,1.12,Irrigated,N,PISTACHO,KERMAN
6,28,47-124-0-0-1-70-1,FS,4.35,4.35,Irrigated,N,PISTACHO,KERMAN
7,28,47-124-0-0-1-71-1,FS,0.81,0.81,Irrigated,N,PISTACHO,KERMAN
8,28,47-124-0-0-1-74-1,FS,5.34,5.34,Irrigated,N,PISTACHO,KERMAN
9,28,47-124-0-0-1-77-1,FS,2.17,2.17,Irrigated,N,PISTACHO,KERMAN
