In [1]:
from tesi.zappai.utils.genetic import GeneticAlgorithm
from tesi.zappai.di import (
    get_session_maker,
    get_cds_api,
    get_location_repository,
    get_past_climate_data_repository,
    get_future_climate_data_repository,
    get_climate_generative_model_repository,
    get_crop_repository,
    get_crop_yield_data_repository,
    get_crop_yield_model_service,
)

print("Before everything")

session_maker = get_session_maker()
cds_api = get_cds_api()
location_repository = get_location_repository(session_maker=session_maker)
past_climate_data_repository = get_past_climate_data_repository(
    session_maker=session_maker,
    cds_api=cds_api,
    location_repository=location_repository,
)
future_climate_data_repository = get_future_climate_data_repository(
    session_maker=session_maker, cds_api=cds_api
)
climate_generative_model_repository = get_climate_generative_model_repository(
    session_maker=session_maker,
    location_repository=location_repository,
    past_climate_data_repository=past_climate_data_repository,
    future_climate_data_repository=future_climate_data_repository,
)
crop_repository = get_crop_repository(session_maker=session_maker)
crop_yield_data_repository = get_crop_yield_data_repository(
    crop_repository=crop_repository,
    location_repository=location_repository,
    past_climate_data_repository=past_climate_data_repository,
)
crop_yield_model_service = get_crop_yield_model_service(
    past_climate_data_repository=past_climate_data_repository,
    location_repository=location_repository,
    crop_yield_data_repository=crop_yield_data_repository,
    crop_repository=crop_repository,
)

2024-07-18 17:27:00.679595: I external/local_tsl/tsl/cuda/cudart_stub.cc:32] Could not find cuda drivers on your machine, GPU will not be used.
2024-07-18 17:27:00.682279: I external/local_tsl/tsl/cuda/cudart_stub.cc:32] Could not find cuda drivers on your machine, GPU will not be used.
2024-07-18 17:27:00.715504: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


Before everything


In [2]:
import pandas as pd
from typing import cast
from tesi.zappai.dtos import CropDTO
from tesi.zappai.utils.common import enrich_data_frame_with_stats
from tesi.zappai.utils.genetic import (
    Individual,
    Population,
    individual_to_int,
)
from tesi.zappai.services.crop_yield_model_service import (
    FEATURES as CROP_YIELD_MODEL_FEATURES,
)
from sklearn.ensemble import RandomForestRegressor
from tesi.zappai.utils.common import calc_months_delta
from concurrent.futures import ProcessPoolExecutor


location = await location_repository.get_location_by_country_and_name(
    "Italy", "Policoro"
)

if location is None:
    raise Exception()

crop = await crop_repository.get_crop_by_name("maize")
if crop is None:
    raise Exception()

model = crop.crop_yield_model
if model is None:
    raise Exception()

forecast = await climate_generative_model_repository.generate_climate_data_from_last_past_climate_data(
    location_id=location.id, months=24
)

POPULATIONS = 20


def fitness(individual: Individual) -> float:
    if len(individual) != 10:
        raise Exception(f"Bro individual must be of size 10...")
    sowing = individual_to_int(individual[:5])
    harvesting = individual_to_int(individual[5:])

    if (sowing >= len(forecast)) | (harvesting >= len(forecast)):
        return 0

    sowing_year, sowing_month = forecast.index[sowing]
    harvest_year, harvest_month = forecast.index[harvesting]

    duration = calc_months_delta(
        start_year=sowing_year,
        start_month=sowing_month,
        end_year=harvest_year,
        end_month=harvest_month,
    )

    if duration <= 0:
        return 0
    if (duration < cast(int, cast(CropDTO, crop).min_farming_months)) | (
        duration > cast(int, cast(CropDTO, crop).max_farming_months)
    ):
        return 0

    forecast_for_individual = forecast[
        (
            (forecast.index.get_level_values("year") < harvest_year)
            | (
                (forecast.index.get_level_values("year") == harvest_year)
                & (forecast.index.get_level_values("year") <= harvest_month)
            )
        )
        | (
            (forecast.index.get_level_values("year") > sowing_year)
            | (
                (forecast.index.get_level_values("year") == sowing_year)
                & (forecast.index.get_level_values("year") >= sowing_month)
            )
        )
    ]

    enriched_forecast = enrich_data_frame_with_stats(
        df=forecast_for_individual, ignore=["sin_year, cos_year"]
    )

    x_df = pd.DataFrame(
        {
            "sowing_year": [sowing_year],
            "sowing_month": [sowing_month],
            "harvest_year": [harvest_year],
            "harvest_month": [harvest_month],
            "duration_months": calc_months_delta(
                start_year=sowing_year,
                start_month=sowing_month,
                end_year=harvest_year,
                end_month=harvest_month,
            ),
        }
    )
    x_df = pd.concat([x_df, enriched_forecast], axis=1)

    x_df.to_csv("x_df.csv")

    pred = cast(RandomForestRegressor, model).predict(
        x_df[CROP_YIELD_MODEL_FEATURES].to_numpy()
    )
    return pred[0]


def on_population_created(i: int, population: Population):
    return
    print(f"\rPOPULATION {i}/{POPULATIONS} PROCESSED!!!", end="")
    if i == POPULATIONS:
        print()


ga = GeneticAlgorithm(
    fitness=fitness,
    chromosome_length=10,
    population_size=POPULATIONS,
    mutation_rate=0.01,
    crossover_rate=0.7,
    generations=20,
    on_population_created=on_population_created,
)


def job(job_id: int):
    print(f"Starting job {job_id}")
    results, fitnesses = ga.run()

    sowing = individual_to_int(results[-1][:5])
    harvesting = individual_to_int(results[-1][5:])

    sowing_year, sowing_month = forecast.index[sowing]
    harvest_year, harvest_month = forecast.index[harvesting]
    duration = calc_months_delta(
        start_year=sowing_year,
        start_month=sowing_month,
        end_year=harvest_year,
        end_month=harvest_month,
    )

    print(f"Finish job {job_id}")

    return (
        sowing_year,
        sowing_month,
        harvest_year,
        harvest_month,
        duration,
        fitnesses[-1],
    )


with ProcessPoolExecutor(max_workers=4) as executor:
    futures = [executor.submit(job, job_id=i) for i in range(4)]
    for i, future in enumerate(futures):
        sowing_year, sowing_month, harvest_year, harvest_month, duration, _fitness = (
            future.result()
        )
        print(f"Sowing: {sowing_year}-{sowing_month}")
        print(f"Harvesting: {harvest_year}-{harvest_month}")
        duration = calc_months_delta(
            start_year=sowing_year,
            start_month=sowing_month,
            end_year=harvest_year,
            end_month=harvest_month,
        )
        print(f"Duration: {duration}")
        print(f"Est. yield: {_fitness}")

2024-07-18 17:27:02.259949: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:998] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
2024-07-18 17:27:02.260432: W tensorflow/core/common_runtime/gpu/gpu_device.cc:2251] Cannot dlopen some GPU libraries. Please make sure the missing libraries mentioned above are installed properly if you would like to use GPU. Follow the guide at https://www.tensorflow.org/install/gpu for how to download and setup the required libraries for your platform.
Skipping registering GPU devices...


Starting job 3Starting job 1Starting job 2Starting job 0



Finish job 2
Finish job 0
Sowing: 2025-9
Harvesting: 2025-12
Duration: 3
Est. yield: 8635.79017996543
Finish job 3
Finish job 1
Sowing: 2024-9
Harvesting: 2025-10
Duration: 13
Est. yield: 8635.79017996543
Sowing: 2024-10
Harvesting: 2025-10
Duration: 12
Est. yield: 8635.79017996543
Sowing: 2025-4
Harvesting: 2026-4
Duration: 12
Est. yield: 8607.154208221495
