In [1]:
import os
import pandas as pd
import matplotlib.pyplot as plt

In [2]:
USE_CASES = ["andalusia", "italy", "greece", "poland"]

use_case = "andalusia"

BASE_PATH = f"./../data/use_case_{use_case}"

In [3]:
UC_LINK = {
    "andalusia": "AND",
    "italy": "ITA", 
    "greece": "ELL", 
    "poland": "POL"
}

In [4]:
microdata_ = pd.DataFrame()

for y in range(2014, 2021):
    MICRODATA_FILEPATH = os.path.join(BASE_PATH, f"microdata/AND{y}.csv")

    df_ = pd.read_csv(MICRODATA_FILEPATH)

    df_["YEAR"] = y

    microdata_ = pd.concat([microdata_, df_])


In [None]:
microdata_

## Relevant variables to perform crop representativeness analysis
| Variable      | Description | Formula |
| :------------ | :------- | :------- |
| I_A_{code}_TA | Total area |  |
| I_A_{code}_IR | Irrigted area |
| I_PR_{code}_Q | Production quanity |
| I_SA_{code}_Q | Sales quanity |
| I_SA_{code}_V | Sales values |  |
| SE025         | Total Urilised Agricultural Area |  ( B_UO_10_A + B_UT_20_A + B_US_30_A) / 100 |
  

In [10]:
class RentBalanceComputer():
    def __init__(self, data_path, use_case, year):

        UC_LINK = {
            "andalusia": "AND",
            "italy": "ITA", 
            "greece": "ELL", 
            "poland": "POL"}

        DATA_PATH = data_path
        USE_CASE = use_case
        YEAR = year
        
        self.MICRODATA_FILEPATH = os.path.join(DATA_PATH, f"use_case_{USE_CASE}/microdata/{UC_LINK[use_case]}{YEAR}.csv")
        self.METADATA_PATH = os.path.join(DATA_PATH, f"use_case_{USE_CASE}/metadata")
        
        crops_codes = pd.read_csv(os.path.join(DATA_PATH, f"use_case_{USE_CASE}/metadata/crops_codes.csv"))
        
        self.results = pd.DataFrame()

        self.results["code"] = crops_codes["code"]
        self.results["Description"] = crops_codes["Description"]
    
    
    def _load_external_files(self):
        """
        Read external files to be processed
        """

        microdata = pd.read_csv(self.MICRODATA_FILEPATH).reset_index(drop=True)
        print(microdata.shape)
        return microdata


    def _compute_rent_balance(self, microdata):
        """
        leaser: agent taking a property for rent
        renter: agent letting a property for rent

        TENANT -> rent in 
        B_UT_20_A: Rented UAA -
        H_FO_5071_V: Rent paid for land

        LANDLORD -> lease out
        I_A_90100_TA: Total area of rented agricultural land
        I_SA_90100_V: Receipts from renting out agricultural land
        """

        # Compute majoritary crop
        microdata["Majoritary crop"] = microdata[[c for c in microdata.columns if c.startswith("I_A") and c.endswith("TA")]].idxmax(axis=1)

        codes = [int(c.replace("I_A_", "").replace("_TA", "")) for c in microdata.columns if c.startswith("I_A") and c.endswith("TA")]
        
        for crop in codes:
            if f"I_A_{crop}_IR" in microdata.columns and f"I_A_{crop}_TA" in microdata.columns:
                microdata[f"IR ratio {crop}"] = microdata.apply(lambda x: x[f"I_A_{crop}_IR"]/x[f"I_A_{crop}_TA"] if x[f"I_A_{crop}_TA"]>0 else 0, axis=1)

        rent_balance = microdata[[
            "Majoritary crop", 
             "B_UT_20_A", 
             "H_FO_5071_V", 
             "I_A_90100_TA", 
             "I_SA_90100_V", 
             "A_OT_210_C", 
             "A_TY_90_TF", 
             ] + [c for c in microdata.columns if c.startswith("IR ratio")]].fillna(0).rename(columns={
                 "B_UT_20_A": "rent area", 
                 "H_FO_5071_V": "rent value", 
                 "I_A_90100_TA": "lease area",
                 "I_SA_90100_V": "lease value", 
             })
        # 
        rent_balance["Majoritary crop"] = rent_balance["Majoritary crop"].apply(lambda x: int(x.replace("I_A_", "").replace("_TA", "")))

        # Convert to hectares
        #rent_balance["ownership"] = rent_balance["ownership"].apply(lambda x: x/100)
        rent_balance["rent area"] = rent_balance["rent area"].apply(lambda x: x/100)
        rent_balance["lease area"] = rent_balance["lease area"].apply(lambda x: x/100)

        # Compute prices
        rent_balance["rent price"] = rent_balance.apply(lambda x: x["rent value"] / x["rent area"] if x["rent area"]>0 else 0, axis=1)
        rent_balance["lease price"] = rent_balance.apply(lambda x: x["lease value"] / x["lease area"] if x["lease area"]>0 else 0, axis=1)

        # Average price per hectare
        
        return rent_balance


    def main(self):
        
        # 0. Import external files
        microdata = self._load_external_files()

        # 1. Compute rent balance
        rent_balance = self._compute_rent_balance(microdata)

        return rent_balance
        

In [None]:
data_path = "./../data"
use_case = "andalusia"
year = 2015

rbc = RentBalanceComputer(data_path, use_case, year)
rent_balance = rbc.main()

In [None]:
rent_balance["Majoritary crop"]#.unique()


In [None]:
for crop in sorted(rent_balance["Majoritary crop"].unique()):
    sel = rent_balance[rent_balance["Majoritary crop"]==crop]["rent price"]
    try:
        print(f'{crop}: {sel.shape[0]} {round(sel.mean())} {round(sel.std())}')
        sel_ = rent_balance[(rent_balance["Majoritary crop"]==crop)&(rent_balance["rent price"]>0)]
        sel1 = sel_[sel_["IR ra"]]
        #["rent price"].hist(bins=sel.shape[0]*2 if sel.shape[0]>20 else 10)
        ["rent price"].hist()
        plt.show()
    except:
        pass

In [None]:
for c in rent_balance.columns:
    plt.title(c)
    plt.hist(rent_balance[rent_balance[c]>0][c], bins=50)
    plt.show()

In [None]:
rent_price_limit = 10
plt.scatter(rent_balance[rent_balance["rent price"]<rent_price_limit]["rent area"], rent_balance[rent_balance["rent price"]<rent_price_limit]["rent value"])

In [None]:
for c in rent_balance.columns:
    plt.title(c)
    plt.hist(rent_balance[rent_balance[c]>0][c], bins=50)
    plt.show()