In [8]:
# Configure the upload parameters here
destination_server = 2 # 1 for production (.es), 2 for staging (.ai)
this_server = 1 # 1 for production (.es), 2 for staging (.ai)
use_case = "andalusia" # "andalusia" or "italy"
SYNTHETIC_POPULATION_FILE = "Synthetic-Population-andalusia-2014-8-19-18-21.csv"
                            
year = 2014 # check from synthetic population file
population_name_description = "Andalucia UC - 2014 - v5.0"

rents_dict = {
    "andalusia": 250,
    "italy": 589,
    "greece": 452.25, 
    "poland": 163, 
    }

rent_ha_price = rents_dict[use_case]#589 # 250 € Spain, 589 Italy
batch_size = 5000 # for uploading farms in parts of this size

if destination_server == 1:
    basePath = "https://abm.agricore.idener.es"
elif destination_server == 2:
    basePath = "https://abm.agricore.idener.ai"

if this_server == 1:
    codePath = "/home/jovyan/work/carlos/complete_execution_andalucia"
elif this_server == 2:
    codePath = "/home/jovyan/AGRICORE-synthetic-population-notebook/complete_execution_andalucia"

In [9]:
from pydantic import BaseModel
from typing import List, Tuple, Dict, NamedTuple
import bisect
import pandas as pd
import random
from enum import Enum
from agricore_sp_models.abm_interface import ABMInterface
from agricore_sp_models.common_models import OrganicProductionType, PolicyJsonDTO, ProductGroupJsonDTO, FADNProductJsonDTO, PolicyGroupRelationJsonDTO, LandRentJsonDTO
from agricore_sp_models.import_models import PopulationJsonDTO, SyntheticPopulationJsonDTO, AgriculturalProductionJsonDTO, LivestockProductionJsonDTO, FarmJsonDTO, HolderFarmYearDataJsonDTO, ClosingValFarmValueDTO, FarmYearSubsidyDTO, GreeningFarmYearDataJsonDTO
from warnings import simplefilter
#simplefilter(action="ignore", category=pd.errors.PerformanceWarning)

class SpecieTypeEnum(Enum):
    VEGETAL = 1
    ANIMAL = 2
    
class PGSelector(NamedTuple):
    product_code: str
    organicProductionType: OrganicProductionType
    specieType: SpecieTypeEnum
    
class FADNAndPGNames(NamedTuple):
    fadnName: str
    pgName: str
    
class FADNObjectConverter:
    dictProductCodeToAggregation: Dict[PGSelector,FADNAndPGNames] = {}
    dictAggregationToProductCode: Dict[str, PGSelector] = {}
    productGroups: List[ProductGroupJsonDTO] = []
    policies: List[PolicyJsonDTO] = []
    df_farm: pd.DataFrame = pd.DataFrame()
    
    
    def __init__(self):
        self.dictProductCodeToAggregation: Dict[PGSelector,FADNAndPGNames] = {}
        self.dictAggregationToProductCode: Dict[str, PGSelector] = {}
        self.productGroups: List[ProductGroupJsonDTO] = []
        self.policies : List[PolicyJsonDTO] = []
        df_farm: pd.DataFrame = pd.DataFrame()
    
    def getSyntheticPopulationJson(self, data: pd.DataFrame, year: int, description: str, name: str, representativeness_csv: str, rent_ha_price: float = 250) -> SyntheticPopulationJsonDTO:
        farms:List[FarmJsonDTO] = []
        
        # Processing the Representativeness csv file to improve the productGroup information
        df_representativeness = pd.read_csv(representativeness_csv)
        df_representativeness['Ocurrence'] = df_representativeness['n_appearances_abs'].astype(float)
        df_representativeness['Area'] = df_representativeness['total_area'].astype(float)
        df_representativeness['Value'] = df_representativeness['sales_value'].astype(float)
        df_representativeness['fadn_code'] = df_representativeness['fadn_code'].astype(str)
        if 'Year' in df_representativeness.columns:
            df_representativeness = df_representativeness[df_representativeness['Year'] == year]
        product_groups: List[ProductGroupJsonDTO] = []
        for pg in self.productGroups:
            new_pg = pg.copy()
            new_pg.fadnProducts = []
            for fadn in pg.fadnProducts:
                this_fadn = fadn.copy()
                representativenessOcurrence: float = 0
                representativenessArea: float = 0
                representativenessValue: float = 0
                entries = df_representativeness[(df_representativeness['fadn_code'] == fadn.fadnIdentifier) & (df_representativeness['product_group'] == pg.name)]
                if len(entries) > 1:
                    print("More than one entry for the same FADN/ProductGroup combination. This should not happen. Ignoring it")
                elif len(entries) == 1:
                    representativenessOcurrence = entries['Ocurrence'].values[0]
                    representativenessArea = entries['Area'].values[0]
                    representativenessValue = entries['Value'].values[0]
                else:
                    print(f"No entry for {fadn.fadnIdentifier} in {pg.name}. Remember this can happen when a FADN product is linked to both organic and not organic products and only have production in one of them")
                this_fadn.representativenessOcurrence = representativenessOcurrence
                this_fadn.representativenessArea = representativenessArea
                this_fadn.representativenessValue = representativenessValue
                new_pg.fadnProducts.append(this_fadn)
            product_groups.append(new_pg)                
            
        # Used to store the total UAA of each farm
        totalUtilizedArea: Dict[str, float] = {}
        
        for index, row in data.iterrows():
            agriculturalProductions: List[AgriculturalProductionJsonDTO] = []
            livestockProductions: List[LivestockProductionJsonDTO] = []
            for aggregationName, aggregation_sel in self.dictAggregationToProductCode.items():
                if aggregation_sel.specieType == SpecieTypeEnum.VEGETAL:
                    if f'{aggregationName}.cultivatedArea' in data.columns and (row[f'{aggregationName}.cropProduction'] > 0 or row[f'{aggregationName}.cultivatedArea'] >0):
                        agrProduction: AgriculturalProductionJsonDTO =AgriculturalProductionJsonDTO (
                            yearNumber = year,
                            productName =aggregationName,
                            organicProductionType= aggregation_sel.organicProductionType,
                            cultivatedArea= row[f'{aggregationName}.cultivatedArea'],
                            irrigatedArea= row[f'{aggregationName}.irrigatedArea'], 
                            cropProduction= row[f'{aggregationName}.cropProduction'],
                            quantitySold= row[f'{aggregationName}.quantitySold'],
                            quantityUsed = row[f'{aggregationName}.quantityUsed'],
                            valueSales= row[f'{aggregationName}.valueSales'], 
                            variableCosts= row[f'{aggregationName}.variableCostsCrops'], 
                            landValue= row[f'{aggregationName}.landValue'], 
                            sellingPrice = row[f'{aggregationName}.sellingPrice'],
                        )
                        if row['farmCode'] not in totalUtilizedArea:
                            totalUtilizedArea[row['farmCode']] = 0
                        totalUtilizedArea[row['farmCode']] += agrProduction.cultivatedArea
                            
                        agriculturalProductions.append(agrProduction)
                elif aggregation_sel.specieType == SpecieTypeEnum.ANIMAL:
                    if f'{aggregationName}.numberOfAnimals' in data.columns and (row[f'{aggregationName}.numberOfAnimals'] > 0 or row[f'{aggregationName}.valueSoldAnimals'] > 0):
                        lstockProduction: LivestockProductionJsonDTO = LivestockProductionJsonDTO(
                            yearNumber= year,
                            productName = aggregationName,
                            numberOfAnimals = row[f'{aggregationName}.numberOfAnimals'],
                            dairyCows = row[f'{aggregationName}.dairyCows'],
                            numberOfAnimalsSold = row[f'{aggregationName}.numberOfAnimalsSold'],
                            valueSoldAnimals = row[f'{aggregationName}.valueSoldAnimals'],
                            numberAnimalsForSlaughtering = row[f'{aggregationName}.numberAnimalsForSlaughtering'],
                            valueSlaughteredAnimals = row[f'{aggregationName}.valueSlaughteredAnimals'],
                            numberAnimalsRearingBreading = row[f'{aggregationName}.numberAnimalsRearingBreading'],
                            valueAnimalsRearingBreading = row[f'{aggregationName}.valueAnimalsRearingBreading'],
                            milkTotalProduction = row[f'{aggregationName}.milkTotalProduction'],
                            milkProductionSold = row[f'{aggregationName}.milkProductionSold'],
                            milkTotalSales = row[f'{aggregationName}.milkTotalSales'],
                            milkVariableCosts = row[f'{aggregationName}.milkVariableCosts'],
                            woolTotalProduction = row[f'{aggregationName}.woolTotalProduction'],
                            woolProductionSold = row[f'{aggregationName}.woolProductionSold'],
                            eggsTotalSales = row[f'{aggregationName}.eggsTotalSales'],
                            eggsTotalProduction = row[f'{aggregationName}.eggsTotalProduction'],
                            eggsProductionSold = row[f'{aggregationName}.eggsProductionSold'],
                            manureTotalSales = row[f'{aggregationName}.manureTotalSales'],
                            variableCosts = row[f'{aggregationName}.variableCostsAnimals'],
                            # This variable should not exist for livestock
                            #sellingPrice = row[f'{aggregationName}.sellingPrice'],
                            #sellingPrice = row[f'{aggregationName}.valueSales']/row[f'{aggregationName}.quantitySold'] if row[f'{aggregationName}.quantitySold'] > 0 else 0,
                            sellingPrice=0
                        )
                        livestockProductions.append(lstockProduction)
            holderFarmYearsData: List[HolderFarmYearDataJsonDTO] = []
            hfyd:HolderFarmYearDataJsonDTO = HolderFarmYearDataJsonDTO (
                yearNumber= year,
                holderAge = row['holderAge'],
                holderFamilyMembers = row['holderFamilyMembers'],
                holderSuccessorsAge = round(float(row['holderSuccessorsAge'])),
                holderGender = row['holderGender'],
                holderSuccessors = row['holderSuccessors'],
            )
            holderFarmYearsData.append(hfyd)
            closingValFarmValues: List[ClosingValFarmValueDTO] = []
            cvfv: ClosingValFarmValueDTO = ClosingValFarmValueDTO(
                agriculturalLandArea = row['agriculturalLandArea'],
                agriculturalLandValue = row['agriculturalLandValue'],
                agriculturalLandHectaresAdquisition = row['agriculturalLandHectaresAdquisition'],
                landImprovements = row['landImprovements'],
                forestLandArea = row['forestLandArea'],
                forestLandValue = row['forestLandValue'],
                farmBuildingsValue = row['farmBuildingsValue'],
                machineryAndEquipment = row['machineryAndEquipment'],
                intangibleAssetsTradable = row['intangibleAssetsTradable'],
                intangibleAssetsNonTradable = row['intangibleAssetsNonTradable'],
                otherNonCurrentAssets = row['otherNonCurrentAssets'],
                longAndMediumTermLoans = row['longAndMediumTermLoans'],
                totalCurrentAssets = row['totalCurrentAssets'],
                farmNetIncome = row['farmNetIncome'],
                grossFarmIncome = row['grossFarmIncome'],
                subsidiesOnInvestments = row['subsidiesOnInvestments'],
                vatBalanceOnInvestments = row['vatBalanceOnInvestments'],
                totalOutputCropsAndCropProduction = row['totalOutputCropsAndCropProduction'],
                totalOutputLivestockAndLivestockProduction = row['totalOutputLivestockAndLivestockProduction'],
                otherOutputs = row['otherOutputs'],
                totalIntermediateConsumption = row['totalIntermediateConsumption'],
                taxes = row['taxes'],
                vatBalanceExcludingInvestments = row['vatBalanceExcludingInvestments'],
                fixedAssets = row['fixedAssets'],
                depreciation = row['depreciation'],
                totalExternalFactors = row['totalExternalFactors'],
                machinery = row['machinery'] if 'machinery' in row else 0,
                yearNumber = year,
                rentBalance = row['rentBalance']
            )
            closingValFarmValues.append(cvfv)
            farmYearSubsidies: List[FarmYearSubsidyDTO] = []
            policyValues:Dict[str,float] = {}
            for policy in self.policies:
                policy_column = f'{policy.policyIdentifier}.value'
                if policy_column in data.columns:
                    if row[policy_column] != 0:
                        if policy.policyIdentifier not in policyValues:
                            policyValues[policy.policyIdentifier] = row[policy_column]
                        else:
                            policyValues[policy.policyIdentifier] = policyValues[policy.policyIdentifier] + row[policy_column]
            for key, value in policyValues.items():
                subsidy = FarmYearSubsidyDTO(
                            yearNumber = year,
                            value = value,
                            policyIdentifier = key
                        )
                farmYearSubsidies.append(subsidy)                        
                
            landTransactions = []
            
            # Include greening Area accounting that greening area should be less or equal to 5% of the total land and that only 
            # area used for crops with the Nitrogen fixing flag should be accounted for it. This is only done if the 
            # farm receives a subsidy for greening
            greeningFarmYearData:List[GreeningFarmYearDataJsonDTO] = []
            
            greening_policies_identifiers = [x for x in self.policies if x.modelLabel == 'Greening']
            
            greening_subsidies = [x for x in farmYearSubsidies if x.policyIdentifier in greening_policies_identifiers]
            greening_subsidies_income = sum([x.value for x in greening_subsidies])
            
            if greening_subsidies_income > 0:
                farm_total_area = sum([x.cultivatedArea for x in agriculturalProductions])
                nitrogen_fixing_products = [x.name for x in self.productGroups if 'FixingNitrogen' in x.modelSpecificCategories]
                nitrogen_fixing_area = sum([x.cultivatedArea for x in agriculturalProductions if x.productName in nitrogen_fixing_products])
                greeningSurface = min(nitrogen_fixing_area, farm_total_area*0.05)
                if greeningSurface > 0:
                    greeningFarmYearData = [GreeningFarmYearDataJsonDTO(yearNumber=year, greeningSurface=greeningSurface)]
            
            this_farm: FarmJsonDTO = FarmJsonDTO (
                farmCode = row['farmCode'],
                lat = row['lat'],
                long = row['long'],
                altitude= row['altitude'],
                regionLevel1= row['regionLevel1'],
                regionLevel1Name= row['regionLevel1Name'],
                regionLevel2= row['regionLevel2'],
                regionLevel2Name= row['regionLevel2Name'],
                regionLevel3= row['regionLevel3'],
                regionLevel3Name= row['regionLevel3Name'],
                technicalEconomicOrientation= row['technicalEconomicOrientation'],
                landTransactions = landTransactions,
                farmYearSubsidies = farmYearSubsidies,
                closingValFarmValues = closingValFarmValues,
                holderFarmYearsData =holderFarmYearsData,
                livestockProductions =livestockProductions,
                agriculturalProductions = agriculturalProductions,
                greeningFarmYearData = greeningFarmYearData, 
            )
            farms.append(this_farm)
            
        rent_operations: List[LandRentJsonDTO] = []
        
        rentPrice = rent_ha_price # (86%) 177 non-irrigated to (14%) 720 if irrigated -> ~250 €

        rent_in = [ (x['farmCode'],x['rentBalance']) for i,x in data.iterrows() if x['rentBalance'] > 0.0]
        rent_out = [ (x['farmCode'],(-1)*x['rentBalance']) for i,x in data.iterrows() if x['rentBalance'] < 0.0]
        change_sign_rent_balance_needed = False
        if (len(rent_in) < len(rent_out)):
            print("Detected strange balance of rent in/rent out, swapping them")
            change_sign_rent_balance_needed = True

        # rent Processing per region
        #regions = list(set([x.regionLevel3 for x in farms]))
        regions = list(data['regionLevel3'].unique())
        left_amount = 0
        for region in regions:
            print(f'Region processing for region {region}')
            rent_in = [ (x['farmCode'],x['rentBalance']) for i,x in data.iterrows() if x['rentBalance'] > 0.0 and x['regionLevel3'] == region]
            rent_out = [ (x['farmCode'],(-1)*x['rentBalance']) for i,x in data.iterrows() if x['rentBalance'] < 0.0 and x['regionLevel3'] == region]
            
            if (change_sign_rent_balance_needed):
                rent_in_temp = rent_in
                rent_in = rent_out
                rent_out = rent_in_temp
            print(f"rent in: {len(rent_in)} farms; rent out: {len(rent_out)} farms")
            
            rent_in_sorted = sorted(rent_in, key=lambda a: a[1])
            rent_out_sorted = sorted(rent_out, key = lambda a: a[1])
            while (len(rent_in_sorted) > 0 and len(rent_out_sorted) > 0):
                rent_in = rent_in_sorted.pop()
                rent_out = rent_out_sorted.pop()
                available_land_in_farm = totalUtilizedArea[rent_in[0]]
                rent = min(rent_in[1], rent_out[1])
                # we cap the maximum rented in area of the farm to 95% of the total utilized area of the destination farm, and we consider that the rent_price(/ha) of such operation (if it is cap), is increased
                rent_area = min(rent/rentPrice, available_land_in_farm*0.95)
                totalUtilizedArea[rent_in[0]] -= rent_area
                rent_in = (rent_in[0], rent_in[1]- rent)
                rent_out = (rent_out[0], rent_out[1]- rent)
                rent_operations.append(LandRentJsonDTO(yearNumber=year, originFarmCode=rent_out[0], destinationFarmCode=rent_in[0], rentArea=rent_area, rentValue = rent))
                if rent_in[1] > 0:
                    bisect.insort(rent_in_sorted, rent_in, key = lambda a: a[1])
                if rent_out[1] < 0:
                    bisect.insort(rent_out_sorted, rent_out, key = lambda a: a[1])
            left_amount = left_amount + sum([x[1] for x in rent_in_sorted if x[1] > 0.0]) + sum([x[1] for x in rent_out_sorted if x[1] > 0.0])
        print(f"not assigned rents for {left_amount} € -> {left_amount/rentPrice} ha")

        if change_sign_rent_balance_needed:
            for farm in farms:
                for cv in farm.closingValFarmValues:
                    cv.rentBalance = (-1) + cv.rentBalance
        
        population = PopulationJsonDTO(
            description= description, 
            farms = farms, 
            productGroups = product_groups,
            policies= self.policies,
            policyGroupRelations= self.policyProductGroupRelations,
            landTransactions=[],
            landRents=rent_operations
            )
        
        synthetic_population = SyntheticPopulationJsonDTO(
            description = description,
            name = name, 
            yearNumber = year,
            population = population)
        
        return synthetic_population            
        
    def processAll(self, product_groups_csv:str, product_mapping_csv: str, policies_csv: str, animal_codes_csv: str) -> None:
        (policies, productGroups, policyProductGroupRelations, dictProductCodeToAggregation) = self.getPoliciesAndProductGroups(product_groups_csv, product_mapping_csv, policies_csv, animal_codes_csv)
        self.policies = policies
        self.productGroups = productGroups
        self.dictProductCodeToAggregation = dictProductCodeToAggregation
        self.policyProductGroupRelations = policyProductGroupRelations
        self.dictAggregationToProductCode = {value.pgName: key for key, value in dictProductCodeToAggregation.items()}
        
    @staticmethod
    def getProductGroups(product_groups_csv:str, product_mapping_csv: str, animal_codes_csv: str) -> Tuple[Dict[PGSelector,FADNAndPGNames], List[ProductGroupJsonDTO]]:
        productGroupsDict: Dict[PGSelector,FADNAndPGNames] = {}
        productGroups: List[ProductGroupJsonDTO] = []
        # Description,FADN Included products,FADN Included products IDs,CUSTOM GROUP (EN)
        # Agricultural products
        df_link = pd.read_csv(product_mapping_csv)
        df_link['product_code'] = df_link['product_code'].fillna('')
        df_link['FADN Included products IDs'] = df_link['FADN Included products IDs'].fillna('')
        df_link = df_link.astype({"FADN Included products IDs": str, "product_code" : str})
        
        df_categories = pd.read_csv(product_groups_csv)
        df_link_dict = df_link[df_link['product_code'] != '']
        df_link_dict = pd.merge(df_link_dict, df_categories, left_on='CUSTOM GROUP (EN)', right_on='PRODUCT GROUP')
        for index, row in df_link_dict.iterrows():
            productGroupsDict[PGSelector(row['product_code'].strip().replace(' ' , ''), row['Organic'], SpecieTypeEnum.VEGETAL)] = FADNAndPGNames(row['FADN Included products IDs'], row['CUSTOM GROUP (EN)'])
        
        df_link2 = df_link[df_link['FADN Included products IDs'] != '']
        df_link3 = df_link[df_link['product_code'] != '']

        df_link2=df_link2.groupby(['CUSTOM GROUP (EN)'], group_keys=False)['FADN Included products IDs'].apply(lambda x: ';'.join(x)).reset_index()
        df_link3=df_link3.groupby(['CUSTOM GROUP (EN)'], group_keys=False)['product_code'].apply(lambda x: ';'.join(x)).reset_index()
        df_link4=pd.merge(df_link2, df_link3, how='outer', on=['CUSTOM GROUP (EN)'])
        df_categories = pd.read_csv(product_groups_csv)
        df_link5=pd.merge(df_link4, df_categories, left_on='CUSTOM GROUP (EN)', right_on='PRODUCT GROUP')

        policies: List[PolicyJsonDTO] = []

        for index,row in df_link5.iterrows():
            modelSpecificCategories = row['Categories'].replace("'",'').replace('[', '').replace(']','').split(', ')
            fadnCodeList = [ x.strip() for x in row["FADN Included products IDs"].strip().split(';')] if type(row["FADN Included products IDs"]) is str else []
            productGroup = ProductGroupJsonDTO(
                name = row["CUSTOM GROUP (EN)"], 
                productType = 0, 
                originalNameDatasource = "FADN",
                organic = row['Organic'],
                modelSpecificCategories= modelSpecificCategories,
                productsIncludedInOriginalDataset = row['product_code'],
                policies= policies,
                fadnProducts= [FADNProductJsonDTO (
                    fadnIdentifier= fadnEntry,
                    description='',
                    productType=0,
                    arable=False
                    ) for fadnEntry in fadnCodeList]
                )
            productGroups.append(productGroup)
                
        del df_link
        del df_link2
        del df_link3
        del df_link4
        del df_categories
        del df_link_dict
        
        # Livestock products
        df_link = pd.read_csv(animal_codes_csv)
        df_link['product_code'] = df_link['code'].fillna('')
        df_link['FADN Included products IDs'] = df_link['code'].fillna('')
        df_link = df_link.astype({"FADN Included products IDs": str, "product_code" : str})
        
        df_categories = pd.read_csv(product_groups_csv)
        #df_link_dict = df_link[df_link['FADN Included products IDs'] != '']
        df_link_dict = df_link[df_link['product_code'] != '']
        # In the dataframe df_link_dict, the column
        df_link_dict = pd.merge(df_link_dict, df_categories, left_on='aggregation', right_on='PRODUCT GROUP')
        for index, row in df_link_dict.iterrows():
            productGroupsDict[PGSelector(row['product_code'].strip().replace(' ' , ''), OrganicProductionType.Undetermined, SpecieTypeEnum.ANIMAL)] = FADNAndPGNames(row['FADN Included products IDs'], row['aggregation'] )
        
        df_link2 = df_link[df_link['FADN Included products IDs'] != '']
        df_link3 = df_link[df_link['product_code'] != '']
        df_link2=df_link2.groupby(['aggregation'], group_keys=False)['FADN Included products IDs'].apply(lambda x: ';'.join(x)).reset_index()
        df_link3=df_link3.groupby(['aggregation'], group_keys=False)['product_code'].apply(lambda x: ';'.join(x)).reset_index()
        df_link4=pd.merge(df_link2, df_link3, how='outer', on=['aggregation'])
        df_categories = pd.read_csv(product_groups_csv)
        df_link5=pd.merge(df_link4, df_categories, left_on='aggregation', right_on='PRODUCT GROUP')
        for index,row in df_link5.iterrows():
            modelSpecificCategories = row['Categories'].replace("'",'').replace('[', '').replace(']','').split(', ')
            fadnCodeList = [ x.strip() for x in row["FADN Included products IDs"].strip().split(';')] if type(row["FADN Included products IDs"]) is str else []

            productGroup = ProductGroupJsonDTO(
                name = row["aggregation"], 
                productType = 1, 
                modelSpecificCategories= modelSpecificCategories,
                organic = OrganicProductionType.Undetermined,
                originalNameDatasource = "RICA", 
                productsIncludedInOriginalDataset = row['product_code'],
                policies= policies,
                fadnProducts= [FADNProductJsonDTO (
                    fadnIdentifier= fadnEntry,
                    description='',
                    productType=1,
                    arable=False
                    ) for fadnEntry in fadnCodeList]
                )
            productGroups.append(productGroup)
                
        del df_link
        del df_link2
        del df_link3
        del df_link4
        del df_link_dict
        del df_categories
        
        # Remove repeated FADN codes for the same product group
        for pg in productGroups:
            uniqueList = []
            uniqueListNames = []
            for fadnProduct in pg.fadnProducts:
                if fadnProduct.fadnIdentifier not in uniqueListNames:
                    uniqueList.append(fadnProduct)
                    uniqueListNames.append(fadnProduct.fadnIdentifier)
            pg.fadnProducts = uniqueList
        
        return (productGroupsDict,productGroups)
    
    @staticmethod
    def getPoliciesAndProductGroups(product_groups_csv:str, product_mapping_csv: str, policies_csv: str, animal_codes_csv: str) -> Tuple[List[PolicyJsonDTO], List[ProductGroupJsonDTO], List[PolicyGroupRelationJsonDTO],Dict[PGSelector,FADNAndPGNames]]:
        policies: List[PolicyJsonDTO] = []
        productGroups: List[ProductGroupJsonDTO] = []
        policyProductGroupRelations: List[PolicyGroupRelationJsonDTO]= []
        
        # Get productgroups:
        (productGroupsDict, productGroups) = FADNObjectConverter.getProductGroups(product_groups_csv, product_mapping_csv, animal_codes_csv)
        
        # Get the df of policies with their corresponding PGs
        df_policies = pd.read_csv(policies_csv)
        df_policies['Aggregated_product'] = df_policies['Aggregated_product'].fillna('')
        
        # Create the list of policies and fix the policy list in products
        for index, row in df_policies.iterrows():
            policy = PolicyJsonDTO(
                policyIdentifier = row['Subsidy_Code'],
                policyDescription = row['Description'],
                isCoupled = True if row['Coupled'] == 'Y' else False,
                populationId = 0,
                economicCompensation = row['Economic_compensation'],
                endYearNumber=row['EndYear'],
                modelLabel=row['Label'],
                startYearNumber=row['StartYear']
            )
            if policy.policyIdentifier not in [x.policyIdentifier for x in policies]:
                policies.append(policy)
            productGroup = row['Aggregated_product']
            if productGroup != '':
                policy.economicCompensation = 0
                for pg in productGroups:
                    if pg.name == productGroup:
                        ppgr:PolicyGroupRelationJsonDTO =PolicyGroupRelationJsonDTO(
                            populationId = 0,
                            policyIdentifier = row['Subsidy_Code'],
                            productGroupName = productGroup,
                            economicCompensation = row['Economic_compensation'],
                        )
                        policyProductGroupRelations.append(ppgr)
                        break

        del df_policies
        return (policies, productGroups, policyProductGroupRelations, productGroupsDict)

    

In [10]:
import requests
import json
import urllib.parse

def importSyntheticPopulation( population: SyntheticPopulationJsonDTO, basePath:str = "https://abm.agricore.idener.es") -> Tuple[int, int]:        
    headers = {'Accept': 'text/plain', 'Content-Type': 'application/json'}
    url = urllib.parse.urljoin(basePath, "/synthetic/import")
    response = requests.post(url, json.dumps(population.dict()), headers=headers)
    if response.status_code == 201:
        print("Request successful")
        id = response.json()["id"]
        populationId = response.json()["populationId"]
        return id, populationId
    else:
        print("Request Error")
        print ("{0}-{1}-{2}".format(response.status_code,response.content, response.url))
        return 0,0
    
def sendPartialFarmData( population_id: int, farms: List[FarmJsonDTO], basePath:str = "https://abm.agricore.idener.es") -> bool:        
    headers = {'Accept': 'text/plain', 'Content-Type': 'application/json'}
    url = urllib.parse.urljoin(basePath, f"/population/{population_id}/addPartialData")
    response = requests.post(url, json=[x.dict() for x in farms], headers=headers)
    if response.status_code == 200:
        print("Request successful")
        return True
    else:
        print("Request Error")
        print ("{0}-{1}-{2}".format(response.status_code,response.content, response.url))
        return False

# Split the population into chunks of size chunkSize
def splitPopulationJsonDto(population: SyntheticPopulationJsonDTO, chunkSize: int) -> Tuple[SyntheticPopulationJsonDTO, List[List[FarmJsonDTO]]]:
    chunks = []
    farm_list = population.population.farms
    population.population.farms = []
    for i in range(0, len(farm_list), chunkSize):
        if i == 0:
            population.population.farms = farm_list[i:i + chunkSize]
        else:
            chunk = farm_list[i:i + chunkSize]
            chunks.append(chunk)
    return population, chunks

In [11]:
import pandas as pd
import os



# preload data from previous simulation
os.chdir(codePath)

rica = FADNObjectConverter()

print("Initialising the Database")
abmInterface = ABMInterface()
abmInterface.setBaseURL(basePath)
abmInterface.cleanDicts()
abmInterface.loadFADNProducts()

# Use case main folder
use_case_folder = f'./data/use_case_{use_case}/'

# Single file import
synthetic_population = pd.read_csv(os.path.join(use_case_folder, "synthetic_population/", SYNTHETIC_POPULATION_FILE))
synthetic_population["farmCode"] = synthetic_population["farmCode"].astype("str")
synthetic_population["regionLevel3"] = synthetic_population["regionLevel3"].astype("int")

product_groups_csv = os.path.join(use_case_folder, 'metadata/Product_Groups.csv')
product_mapping_csv = os.path.join(use_case_folder, 'metadata/Product_Mapping.csv')
policies_csv= os.path.join(use_case_folder, 'metadata/subsidies.csv')
animal_codes_csv = os.path.join(use_case_folder, 'metadata/animal_codes.csv')
representativeness_csv = os.path.join(use_case_folder, f'results/FADN_Representativeness_{year}.csv')
rica.processAll(product_groups_csv, product_mapping_csv, policies_csv, animal_codes_csv)



In [12]:
# Adding random farm codes to each farm
entries = synthetic_population.shape[0]
print(f"The population has {entries} farms")
import numpy as np
ids = np.arange(entries)
np.random.shuffle(ids)
ids = [str(x) for x in ids]
synthetic_population["farmCode"] = ids

In [13]:
sp = rica.getSyntheticPopulationJson(
    synthetic_population, 
    year = year, 
    description = population_name_description, 
    name = population_name_description, 
    representativeness_csv = representativeness_csv, 
    rent_ha_price = rent_ha_price) 

sp, list_partial = splitPopulationJsonDto(sp, batch_size)
sp_id, pop_id = importSyntheticPopulation(sp, basePath = basePath)
if sp_id != 0:
    print(f"synthetic population created with id {sp_id}")
    for i in range(len(list_partial)):
        result = sendPartialFarmData(pop_id, list_partial[i], basePath = basePath)
        print("Partial data sent: {0}".format(result))
else: 
    print("Failed to save the synthetic population")

In [14]:
if False:
    #sp_path = "./data/use_case_greece/synthetic_population/Synthetic-Population-greece-2014-6-25-23-19.csv"
    sp_path = "./data/use_case_greece/synthetic_population/Synthetic-Population-greece-2018-6-25-23-51.csv"

    sp_2018_ = pd.read_csv(sp_path)
    sp_2018_["technicalEconomicOrientation"] = sp_2018_["technicalEconomicOrientation"].apply(lambda x: 0)
    sp_2018_["altitude"] = sp_2018_["altitude"].apply(lambda x: 1)

    sp_2018_.to_csv(sp_path, index=False)
