In [None]:
import pandas as pd
import os
os.chdir('/home/jovyan/work/carlos/complete_execution_andalucia')
from src.SPG_module4 import SyntheticPopulationGenerator
from warnings import simplefilter
simplefilter(action="ignore", category=pd.errors.PerformanceWarning)

# Loguru could be added for cross-process logging support
def seconds_to_hours(time_):
    print(time_)
    # Seconds
    time_ = time_/1
    seconds = int(time_%60)
    time_ -= seconds
    # Minutes
    time_ = time_/60
    minutes = int(time_%60)
    time_ -= minutes
    # Hours
    time_ = time_/60
    hours = int(time_%60)
    print(f"{hours}:{minutes}:{seconds}")


def compute_totals_ratio(synthetic_population, nuts3, totals_variables, use_case, eps=1e-6):
    """
    Compare the totals of the synthetic population with the totals from the original population given a sample and the representativeness of the sample.

    Parameters
    ----------
    synthetic_population: pd.DataFrame
        dataframe containing the synthetic population
    nuts3: str
        NUTS3 code expressing the region for which the synthetic population is being assessed
    totals_variables: list
        list of variables expressing the totals

    Returns
    ----------
    
    """
    
    # Compute totals for synthetic population at desired NUTS level
    totals_sp = synthetic_population[totals_variables].sum().to_frame().transpose()

    # Load totals from the NUTS2 original population
    totals_original = pd.read_csv(f"./DATA_WAREHOUSE/use_case_{use_case}/metadata/totals_nuts3.csv", index_col="Unnamed: 0").loc[[nuts3]]
    #totals_original = pd.read_csv(f"./results/totals_nuts3.csv", index_col="Unnamed: 0").loc[[nuts3]]

    # Merge totals into a single dataframe
    totals = pd.concat([totals_sp, totals_original]).rename(index={0: "synthetic", nuts3: "original"})

    # Compute ration and add totals dataframe
    totals = pd.concat([totals, totals.apply(lambda x: (x["synthetic"]+eps)/(x["original"]+eps), axis=0).to_frame().transpose()], axis=0).rename(index={0: "ratio"})
    
    return totals

In [None]:
selected_use_case = ["andalusia", "italy", "greece", "poland"]

In [None]:
number_of_farms_per_uc_and_yeaer = {
    "andalusia": {
        #"2014": 139377,
        "2014": 116533, 
        "2015": 139377,
        "2016": 139377,
        "2017": 139377,
        #"2018": 141345,
        "2018": 112309,
        "2019": 139377,
        "2020": 139377,
    }, 
    "italy": {
        "2014": 139377,
        "2015": 139377,
        "2016": 139377,
        "2017": 45759,
        "2018": 45490,
        "2019": 42546, # extracted from processing RICA data and summing up the 'PESO_REG_OTE' columns for this year
        "2020": 139377,
    }, 
    "greece": {
        "2014": 60860,
        "2018": 47620, 
    }, 
    "poland": {
        "2014": 96511, 
        "2018": 94680, 
    }
}



threads_per_worker=1
n_workers=22


years_dict = {
    "andalusia": ["2014","2018"], 
    "italy": ["2019"], 
    "greece": ["2014", "2018"], 
    "poland": ["2014", "2018"], }


for uc in selected_use_case:
    BASE_PATH = f"./data/use_case_{uc}"

    for year in years_dict[uc]:

        requested_number_of_farms = number_of_farms_per_uc_and_yeaer[uc][year]
    
        print(f"Generating synthetic population, {uc}, {year}...")
        spg = SyntheticPopulationGenerator(requested_number_of_farms, BASE_PATH, uc, year, threads_per_worker=threads_per_worker, n_workers=n_workers)
        
        result = spg.main()
