In [1]:
import os
import pandas as pd

In [34]:
table_I_dict = {
    # Defined
    "quantitySold":   ['I_SA_', '_Q'], # Quantity of Sold Production ([tons])
    "valueSales":     ['I_SA_', '_V'], # Value of Sales (PLV - [€])
    "cropProduction": ['I_PR_', '_Q'], # Value of total production (PLT - [€])
    "irrigatedArea":  ['I_A_', '_IR'], # Irrigated Area (IA - [ha])
    "cultivatedArea": ['I_A_', '_TA'], # Utilized Agricultural Area (UAA - [ha])
    "quantityUsed":   ['I_FU_', '_V'], # Quantity of Used Production ([tons])

    # Not available -> Generated from other data
    "organicProductionType": ["", ".organicProductionType"], 
    "variableCostsCrops":    ["", ".variableCostsCrops"], # Variable Costs per produced unit (CV - [€/ton])
    "landValue":             ["", ".landValue"], # Land Value (PVF - [€])
    "sellingPrice":          ["", ".sellingPrice"], # Unit selling price (PVU - [€/unit])
    }


table_J_dict = {
    # Defined
    "numberOfAnimals":              ['J_AN_', '_A'], # Number of Animals [units] 
    "numberOfAnimalsSold":          ['J_SA_', '_N'], # Number of Animals Sold [units]
    "valueSoldAnimals":             ['J_SA_', '_V'], # Value of Sold Animals ([€])
    "numberAnimalsRearingBreading": ['J_SR_', '_N'], # Number of Animals for Rearing/Breeding [units]
    "valueAnimalsRearingBreading":  ['J_SR_', '_V'], # Value of Animals for Rearing/Breeding ([€])
    "numberAnimalsForSlaughtering": ['J_SS_', '_N'], # Number of Animals for Slaughtering [units]
    "valueSlaughteredAnimals":      ['J_SS_', '_V'], # Value of Slaughtered Animals ([€])
}


table_K_dict = {
    # Animal products codes
    # 261 Cows' milk  
    # 262 Buffalo's cows' milk 
    # 311 Sheep's milk 
    # 319 Other sheep
    # 321 Goat's milk
    # Any other code -> VALUE = 0
    "milkTotalProduction": ['K_PR_', '_Q'], # Number of tons of milk produced [tons]
    "milkProductionSold":  ['K_SA_', '_Q'], # Number of tons of milk sold [tons]
    "milkTotalSales":      ['K_SA_', '_V'], # Value of milk sold ([€])

    # Not available
    "milkVariableCosts":    ["", ".milkVariableCosts"],   # Variable Costs per produced unit (CV - [€/ton])
    "dairyCows":            ["", ".dairyCows"],           # Number of dairy cows [UBA - [units]]
    "variableCostsAnimals": ["", ".variableCostsAnimals"], # Average variable cost per unit of product[€/ ton]

    "woolTotalProduction": ['K_PR_', '_Q'], # Wool Production Quantity 
    "woolProductionSold":  ['K_SA_', '_Q'], # Wool Sales Quantity 

    "eggsTotalProduction": ['K_PR_', '_Q'], # Eggs Production Quantity 
    "eggsProductionSold":  ['K_SA_', '_Q'], # Eggs Sales Quantity 
    "eggsTotalSales":      ['K_SA_', '_V'], # Eggs Sales Value 

    "manureTotalSales": ['K_SA_', '_V'], #Sales Value 
}

# This table 
table_M_dict = {
    #"policyIdentifier":  ["", ".policyIdentifier"], 
    #"policyDescription": ["", ".policyDescription"], 
    #"isCoupled":         ["", ".isCoupled"], 
    "value":             ["M_S_", "_FI_BU_V"],
}


Farm_dict = {
    "lat":      "A_LO_20_DG", 
    "long":     "A_LO_30_DG", 
    "altitude": "A_CL_170_C", 

    "farmCode": "A_ID_10_H", 
    "technicalEconomicOrientation": "A_TY_90_TF", 
    "weight_ra": "A_TY_80_W", 

    "regionLevel1Name": "regionLevel1Name",
    "regionLevel2Name": "regionLevel2Name", 
    "regionLevel3Name": "regionLevel3Name", 
    
    # Not available
    "regionLevel1": "A_LO_40_N2",   # NUTS2
    "regionLevel2": "A_LO_40_N",    # NUTS3
    "regionLevel3": "regionLevel3", # Region

    "weight_reg":   "weight_reg", 
    "rentBalanceIn": "B_UT_20_A",
    "rentBalanceOut": "I_A_90100_TA", 
}

ClosingValue_dict = {
    "agriculturalLandArea":                       "SE025",       # Total Area of type Agricultural Land [ha]
    "agriculturalLandValue":                      "D_CV_3010_V", # Total value of Agricultural Land [€]
    "agriculturalLandHectaresAdquisition":        "agriculturalLandHectaresAdquisition", # Acquired Agricultural Land [ha]
    "landImprovements":                           "D_CV_3020_V", # Invesment in Land improvements [€]
    "forestLandArea":                             "SE075",       # Total Area of type Forest Land [ha]
    "forestLandValue":                            "D_CV_5010_V", # Total value of Forest Land [€]
    "farmBuildingsValue":                         "D_CV_3030_V", # Value of Buildings in the farm [€]
    "machineryAndEquipment":                      "D_CV_4010_V", # Value of Machinery and Equipment in the farm [€]
    "intangibleAssetsTradable":                   "D_CV_7010_V", # Value of intangible assets that are tradable [€]
    "intangibleAssetsNonTradable":                "D_CV_7020_V", # Value of intangible assets that are non-tradable [€]
    "otherNonCurrentAssets":                      "D_CV_8010_V", # Value of other non-current assets [€]
    "longAndMediumTermLoans":                     "SE490", # Total value of established long and medium term loans [€]
    "totalCurrentAssets":                         "SE465", # Total value of current assets [€]
    "farmNetIncome":                              "SE420", # Farm Net Income [€]
    "grossFarmIncome":                            "SE410", # Gross Farm Income [€]
    "subsidiesOnInvestments":                     "SE406", # Total value of subsidies on investments [€]
    "vatBalanceOnInvestments":                    "SE408", # Balance of Taxes on Investments [€]
    "totalOutputCropsAndCropProduction":          "SE135", # Total value of Agricultural Production [€]
    "totalOutputLivestockAndLivestockProduction": "SE206", # Total value of Livestock Production [€]
    "otherOutputs":                               "SE256", # Total value of other outputs [€]
    "totalIntermediateConsumption":               "SE275", # Total value of intermediate consumption [€]
    "taxes":                                      "SE390", # Value of Taxes (>0 received , <0 paid) [€]
    "vatBalanceExcludingInvestments":             "SE395", # Balance of VAT excluding investments [€]
    "fixedAssets":                                "SE441", # Total value of Fixed Assets [€]
    "depreciation":                               "SE360", # Yearly Depreciation [€]
    "totalExternalFactors":                       "SE365", # Total value of External Factors [€]
    #"machinery":                                  "D_CV_4010_V", # Total value of Machinery [€] # Duplicated 
    "rentBalance":                                "I_SA_90100_V", # Balance (>0 received , <0 paid) of rent operations [€]
    }
        
HolderFarmYearData_dict = {
    "holderAge":    "C_UR_10_B", #["C_UR_10_G", "C_UR_20_G"], 
    "holderGender": "C_UR_10_G", #["C_UR_10_B", "C_UR_20_B"], 
    
    # Not available
    "holderSuccessors":    "holderSuccessors", 
    "holderSuccessorsAge": "holderSuccessorsAge", 
    "holderFamilyMembers": "holderFamilyMembers", 
    "yearNumber":          "YEAR" # Note
    }

# Note: althoug year is a parameter that appears in other agent fields, it has been included here
# because no csv modification is required

# Required to build organic version of crops although not required in the agents
Organic_variables_dict = {
    "organicFarming":        "A_CL_140_C", # Organic farming 
    "sectorsOrganicFarming": "A_CL_141_C", # Sectors in organic farming
}



In [36]:
agricore_vars = list(table_I_dict.keys()) + list(table_J_dict.keys()) + list(table_K_dict.keys()) + list(table_M_dict.keys()) + list(Farm_dict.keys()) + list(ClosingValue_dict.keys()) + list(HolderFarmYearData_dict.keys()) + list(Organic_variables_dict.keys()) 

In [196]:
data = pd.read_csv("./../../../../../../mnt/c/users/idener/downloads/df_farm_complete.csv")

data = data.rename(columns={c: c.replace("ABM_", "") for c in data.columns})
data = data.rename(columns={c: c.replace("subsidy", "value") for c in data.columns})

for var in data.columns:
    if var.endswith(".variableCosts"):
        # Animal costs
        if var in ["DAIRY.variableCosts", "OTHER_LIVESTOCK.variableCosts"]:
            data = data.rename(columns={var: var.replace("variableCosts", "variableCostsAnimals")})
        # Crop costs
        else:
            data = data.rename(columns={var: var.replace("variableCosts", "variableCostsCrops")})
            


data = data.rename(columns={"region_level_3_name": "regionLevel3Name", 
                            "region_level_3": "regionLevel3", 
                            "region_level_1": "regionLevel1", 
                            "region_level_1_name": "regionLevel1Name", 
                            "region_level_2": "regionLevel2", 
                            "region_level_2_name": "regionLevel2Name", 
                            "Anno": "yearNumber", 
                            "Cod_Azienda": "farmCode", 
                            #"OTE": "A_TY_90_TF", 
                            "Genere": "holderGender",
                            "SAU": "agriculturalLandArea", 
                            "SAU_Proprietà": "B_UO_10_A", 
                            "SAU_Affitto": "B_UT_20_A", 
                            "Superficie_Forestale": "forestLandArea", 
                            "Classe_Altre_Att_Lucrative": "A_CL_140_C", 

                            })

UNKNOWN = ["ZSVA", 
            "Cod_Zona_Altimetrica_3", 
            "Zona_Altimetrica_3", 
            "Zona_Altimetrica_5", 
            "Cod_Zona_Altimetrica_5", 
            "Cod_Reg_Agraria", 
            "Regione_Agraria", 
            "OTE", 
            "ID_PoloOTE", 
            "PoloOTE", 
            "UDE_INEA", 
            "UDE", 
            "UDE10", 
            "UDE_EU", 
            "Gruppo_DE", 
            "Produzione_Standard_Aziendale", 
            "Cod_Conduzione", 
            "Conduzione", 
            "Forma_Giuridica", 
            "Cod_Forma_Giuridica",  
            
            
            "Cod_Insediamento", 
            "Insediamento", 
            "Giovane", 
            "Diversificata", 
            "Biologica", 
            "Num_Corpi_Aziendali", 
            "Num_Centri_Aziendali", 
            
            "SAU_Comodato", 
            "SAU_Irrigata", 
            
            "SAU_Comodato", 
            "SAU_Irrigata", 
            "UBA_Totale", 
            "KW_Macchine", 
            "Ore_Totali", 
            
            "UL", 
            "ULF", 
            
            "Cod_Dim_Economica_BDR", 
            "Dim_Economica_BDR", 
            "Unnamed: 63", 
            "plantationsValue", ]

data = data.drop(columns=["COD_NUTS3", # COD_NUTS3 = regionLevel2
                          "Cod_Provincia", #
                          "Provincia", # Provincia = reguionLevel2Name
                          "Sigla_Prov", #
                          "Regione", # Regione = regionLevel1Name
                          "Cod_Regione_ISTAT", #
                          "Cod_Regione_UE", 
                          "Cod_Regione_UE", 
                          "Cod_Regione_UE", 
                          "Sigla_Ripartizione", 
                          "Sigla_Ripartizione", 
                          "COD_NUTS2", # COD_NUTS2 = reginoLevel1
                          #"ZSVA", ???????
                          #"Cod_Zona_Altimetrica_3", 
                          #"Zona_Altimetrica_3", 
                          #"Zona_Altimetrica_5", 
                          #"Cod_Zona_Altimetrica_5", 
                          #"Cod_Reg_Agraria", 
                          #"Regione_Agraria", 
                          "machinery", 
                          "Superficie_Totale", # Duplicated
                          "Costo_Opp_Lavoro_Uomo_Orario", # Hourly human cost
                          "Costo_Opp_Lavoro_Macchina_Orario", # Hourly machine cost

                          "Cod_Profilo_Strategico", 
                          "Profilo_Strategico", 
                          "Cod_Classe_di_SAU", 
                          "Classe_di_SAU", 
                          "Incidenza_Altre_Att_Lucrative", 
                          "Cod_Polo_BDR", 
                          "Descrizione_Polo_BDR", 
                          
                          ] + UNKNOWN)


In [197]:
identified = []

for i, c in enumerate(data.columns):
    
    if "." in c:
        var_ = c[c.index(".")+1:]
    else:
        var_ = c

    if c not in ["A_TY_90_TF", "B_UO_10_A", "B_UT_20_A", "A_CL_140_C"]:
        if var_ not in agricore_vars and var_ not in UNKNOWN:
            if not var_.endswith("variableCosts"):
                #print(var_)
                print(f'"{c}", ')
            else:
                print(var_)

        else:
            if var not in UNKNOWN:
                identified.append(var_)


In [None]:
try:
    data["SAU_Comodato"] = data["SAU_Comodato"].apply(lambda x: x.replace(",", ".")).astype(float)
except:
    pass
                                                  
data[data["SAU_Comodato"]>0]["SAU_Comodato"].mean()

In [None]:
for var in agricore_vars:
    if "." in var:
        var_ = var[var.index(".")+1:
    else:
        var_ = var
    if var_ not in data.columns:
        print(var)

In [None]:
data[["weight_ra", "weight_reg"]]