# import

In [1]:
import json
import os
import re
import torch
from transformers.generation import (
    StoppingCriteria,
    StoppingCriteriaList,
    BeamSearchScorer,
    LogitsProcessorList,
    MaxLengthCriteria,
)
from transformers import LogitsProcessor
import numpy as np
from transformers import AutoTokenizer, AutoModelForCausalLM
from typing import Optional, Union, Tuple, List
from datasets import load_from_disk
from tqdm import tqdm
from transformers import GPT2LMHeadModel, GenerationConfig
from transformers import HfArgumentParser
import os
import sys
from dataclasses import dataclass, field
from datasets import Dataset
import pandas as pd
import os
import json
import time
import matplotlib.pyplot as plt


  from .autonotebook import tqdm as notebook_tqdm


# model

In [2]:
domains = ["satellite"]

for domain in domains:
    for mode in ["greedy"]:
        print(f"### Generazione con {domain} e {mode}")
        path = f"config.json"
        params = {
            "domain": domain,
            "model_dir": f"./models/{domain}/", 
            "dataset_dir": f"./{domain}-dataset-tesi/",
            "num_beams": 1,
            "num_return_beams": 1,
            "split_dataset": "test",
            "output_dir": f"{domain}-generations-tesi/greedy/",
        }
        with open(path, "w") as file:
            tmp = json.dumps(params, indent=4)
            file.write(tmp)
        with open(path, "r") as file:
            # Carica il contenuto del file JSON in una variabile
            data = json.load(file)
        print("### Generazione con " + path)
        print(json.dumps(data, indent=4))

### Generazione con satellite e greedy
### Generazione con config.json
{
    "domain": "satellite",
    "model_dir": "./models/satellite/",
    "dataset_dir": "./satellite-dataset-tesi/",
    "num_beams": 1,
    "num_return_beams": 1,
    "split_dataset": "test",
    "output_dir": "satellite-generations-tesi/greedy/"
}


In [3]:
@dataclass
class PlanGeneratorArguments:
    domain: str = field(
        default="satellite",
        metadata={"help": "The domain of the problems"},
    )
    model_dir: str = field(
        default="output_new_finetuning_more_batch/checkpoint-258502/",
        metadata={"help": "The model directory"},
    )
    dataset_dir: str = field(
        default="data/ipc/satellite/random_new_correct_with_invariants_and_types/",
        metadata={"help": "The dataset directory"},
    )
    add_start: bool = field(
        default=False,
        metadata={"help": "Add the start token to the prompt"},
    )
    split_dataset: str = field(
        default="validation",
        metadata={"help": "The split of the dataset to use"},
    )
    output_dir: str = field(
        default="output_coverage_beam_search/",
        metadata={"help": "The output directory"},
    )
    # generation parameters
    num_beams: int = field(
        default=1,
        metadata={"help": "The number of beams to track"},
    )
    num_return_beams: int = field(
        default=1,
        metadata={"help": "The number of beams to return"},
    )
    max_length: int = field(
        default=2048,
        metadata={"help": "The maximum length of the generated plan"},
    )   
    do_sample: bool = field(
        default=False,
        metadata={"help": "If to sample the generation"},
    )
    top_p: float = field(
        default=1.0,
        metadata={"help": "The top_p parameter for the generation"},
    )

In [4]:
# Create an instance of HfArgumentParser with your argument class
parser = HfArgumentParser(PlanGeneratorArguments)

(args,) = parser.parse_json_file(json_file=path)

dataset_dir = args.dataset_dir
model_path = args.model_dir
domain = args.domain

print(dataset_dir, model_path, domain)

# check if exists
if not os.path.exists(args.output_dir):
    os.makedirs(args.output_dir, exist_ok=True)

./satellite-dataset-tesi/ ./models/satellite/ satellite


In [5]:
tokenizer = AutoTokenizer.from_pretrained(model_path)
if "ape" in model_path:
    model = GPT2_ape.from_pretrained(model_path)
else:    
    model = AutoModelForCausalLM.from_pretrained(model_path)
model.to("cuda")

GPT2LMHeadModel(
  (transformer): GPT2Model(
    (wte): Embedding(254, 768)
    (wpe): Embedding(2048, 768)
    (drop): Dropout(p=0.1, inplace=False)
    (h): ModuleList(
      (0-11): 12 x GPT2Block(
        (ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
        (attn): GPT2Attention(
          (c_attn): Conv1D(nf=2304, nx=768)
          (c_proj): Conv1D(nf=768, nx=768)
          (attn_dropout): Dropout(p=0.1, inplace=False)
          (resid_dropout): Dropout(p=0.1, inplace=False)
        )
        (ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
        (mlp): GPT2MLP(
          (c_fc): Conv1D(nf=3072, nx=768)
          (c_proj): Conv1D(nf=768, nx=3072)
          (act): NewGELUActivation()
          (dropout): Dropout(p=0.1, inplace=False)
        )
      )
    )
    (ln_f): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
  )
  (lm_head): Linear(in_features=768, out_features=254, bias=False)
)

In [6]:
generation_config = GenerationConfig(
        num_beams=args.num_beams,
        max_length=args.max_length,
        do_sample=args.do_sample,
        num_return_sequences=args.num_return_beams,
        top_p=args.top_p,
)
print("Generation config loaded")
# print("Generation mode: ", generation_config.get_generation_mode()) # works only with transformers 4.40 +

print("Model and Tokenizer loaded")

Generation config loaded
Model and Tokenizer loaded


# dict 

In [8]:
dict_actions_sn = {}
dict_predicates_sn = {}
dict_objects_sn = {}

convert_action_sn = {}
convert_action_sn["switch_on"] = "switch-on"
convert_action_sn["switch_off"] = "switch-off"
convert_action_sn["take_image"] = "take-image"
convert_action_sn["turn_to"] = "turn-to"
convert_action_sn["have_image"] = "have-image"
convert_action_sn["power_on"] = "power-on"
convert_action_sn["power_avail"] = "power-avail"
convert_action_sn["on_board"] = "on-board"
convert_action_sn["calibration_target"] = "calibration-target"

convert_action_sn1 = {}


for i in range(45):    
    convert_action_sn[f"star{i}"] = f"direction{i+1}"
    convert_action_sn[f"groundstation{i}"] = f"direction{i+1}"
    convert_action_sn[f"phenomenon{i}"] = f"direction{i+1}"
    convert_action_sn[f"planet{i}"] = f"direction{i+1}"
    
for i in range(5): 
    convert_action_sn[f"image{i}"] = f"mode{i+1}"
    convert_action_sn[f"infrared{i}"] = f"mode{i+1}"
    convert_action_sn[f"spectrograph{i}"] = f"mode{i+1}"  
    convert_action_sn[f"thermograph{i}"] = f"mode{i+1}"  
  

dict_objects_sn["satellite"] = ["sat", "satellite"]
dict_objects_sn["instrument"] = ["ins", "instrument"]
dict_objects_sn["direction"] = ["dir", "direction"]
dict_objects_sn["mode"] = ["mod", "mode"]

dict_predicates_sn["on-board"] = (["instrument", "satellite"],)
dict_predicates_sn["supports"] = (["instrument", "mode"],)
dict_predicates_sn["pointing"] = (["satellite", "direction"],)
dict_predicates_sn["have-image"] = (["direction", "mode"],)
dict_predicates_sn["calibration-target"] = (["instrument", "direction"],)
dict_predicates_sn["power-avail"] = (["satellite"],)
dict_predicates_sn["power-on"] = (["instrument"],)
dict_predicates_sn["calibrated"] = (["instrument"],)
dict_predicates_sn["satellite"] = ["satellite"]
dict_predicates_sn["direction"] = ["direction"]
dict_predicates_sn["instrument"] = ["instrument"]
dict_predicates_sn["mode"] = ["mode"]

dict_actions_sn["turn-to"] = (
    ["satellite", "direction", "direction"],
    ["pointing 0 2"],
    ["pointing 0 1"],
    ["pointing 0 2"],
)
dict_actions_sn["switch-on"] = (
    ["instrument", "satellite"],
    ["on-board 0 1", "power-avail 1"],
    ["power-on 0"],
    ["power-avail 1", "calibrated 0"],
)
dict_actions_sn["switch-off"] = (
    ["instrument", "satellite"],
    ["on-board 0 1", "power-on 0"],
    ["power-avail 1"],
    ["power-on 0"],
)
dict_actions_sn["calibrate"] = (
    ["satellite", "instrument", "direction"],
    ["on-board 1 0", "calibration-target 1 2", "pointing 0 2", "power-on 1"],
    ["calibrated 1"],
    [],
)
dict_actions_sn["take-image"] = (
    ["satellite", "direction", "instrument", "mode"],
    ["calibrated 2", "on-board 2 0", "supports 2 3", "power-on 2", "pointing 0 1"],
    ["have-image 1 3"],
    [],
)

dict_actions_domain = {}
dict_predicates_domain = {}
dict_objects_domain = {}
convert_action_domain = {}
convert_action_domain1 = {}

dict_actions_domain["satellite"] = dict_actions_sn
dict_predicates_domain["satellite"] = dict_predicates_sn
dict_objects_domain["satellite"] = dict_objects_sn
convert_action_domain["satellite"] = convert_action_sn

# funzioni gestione input ed estrazione dati

In [9]:
def estrai(input):
    parole_con_satellite = re.findall(r'satellite\d+', input)
    parole_con_instrument = re.findall(r'instrument\d+', input)
    parole_con_instrument = list(set(parole_con_instrument))
    parole_con_satellite  = list(set(parole_con_satellite))
    return parole_con_instrument, parole_con_satellite

In [10]:
def unite_actions(input, keywords, domain, separator="_"): 
    for conversion in convert_action_domain1[domain].keys():
        input = input.replace(conversion, convert_action_domain1[domain][conversion])   
    for conversion in convert_action_domain[domain].keys():
        input = input.replace(conversion, convert_action_domain[domain][conversion])    
        #print(conversion, input)
    list_to_unite = input.split()    
    index_actions = []
    for idx, token in enumerate(list_to_unite):
        if token in keywords:            
            index_actions.append(idx)    
    index_actions.append(len(list_to_unite))
    new_list = []
    for i in range(len(index_actions) - 1):        
        new_action = " ".join(list_to_unite[index_actions[i] : index_actions[i + 1]])        
        new_list.append(new_action.replace(" ", separator))
    return new_list

In [11]:
def initial_state(path):
    num_mode=0
    num_direction=0
    with open(path, "r") as f:
        righe = f.readlines()
        inizio = False
        state = ""
        righe_uniche = []
        for riga in righe:
            if riga not in righe_uniche: 
                if inizio:   
                    righe_uniche.append(riga)
                riga = riga.strip().replace("(", "").replace(")", "").lower()  
                if "mode " in riga:  
                    num_mode+=1                           
                if "direction " in riga:   
                    num_direction+=1  
                if riga == "":
                    inizio = False
                if inizio:
                    state += riga + " "
                if riga == ":init":
                    inizio = True

    return state, num_direction, num_mode

In [12]:
def get_goals(path):
    goals = []
    with open(path, "r") as f:
        righe = f.readlines()
        for riga in righe:           
            riga = riga.strip().replace("(", "").replace(")", "").replace(",", "").lower()
            goals.append(riga)
    return goals

In [13]:
def get_actions(path):
    actions = []
    with open(path, "r") as f:
        righe = f.readlines()
        for riga in righe:            
            riga = riga.strip().replace("(", "").replace(")", "").lower()
            actions.append(riga)
    return actions

In [14]:
def create_starting_structures(plan, domain):

    initial_states = (
        plan["input"].split("<|goals|>")[0].strip().split("<|startofplan|>")[1].strip()
    )  
       
    goal_states = (
        plan["input"].split("<|goals|>")[1].strip().split("<|actions|>")[0].strip()
    )
    
    initial_states = unite_actions(
        initial_states, list(dict_predicates_domain[domain].keys()), domain
    )
    goal_states = unite_actions(
        goal_states, list(dict_predicates_domain[domain].keys()), domain
    )
    if "<|endofplan|>" in plan["actions"]:
        actions = unite_actions(
            plan["actions"].split("<|endofplan|>").strip(),
            list(dict_actions_domain[domain].keys()),
            domain,
        )
    else:
        actions = unite_actions(
            plan["actions"], list(dict_actions_domain[domain].keys()), domain
        )
        
    
    dict_states = {}
    dict_goals = {}
    for init_state in initial_states:
        dict_states[init_state] = True
    for goal_state in goal_states:
        if goal_state in initial_states:
            dict_goals[goal_state] = True
        else:
            dict_goals[goal_state] = False    
    return (dict_states, dict_goals, actions)

In [15]:
def rimuovi_numeri(stringa):
    return "".join([i for i in stringa if not i.isdigit()])

# funzioni di validità

In [16]:
def check_goals(states, goal_states, domain):
    all_goals_satisfied = True
    goals_unsatisfied_list = []
    goals_unsatisfied_list_nonumber = []
    # print(goal_states)
    for goal in goal_states.keys():
        # print("Analizzo il goal " + goal)
        if goal not in states.keys():
            goal_states[goal] = False
            all_goals_satisfied = False
            goals_unsatisfied_list.append(goal)
            goals_unsatisfied_list_nonumber.append(rimuovi_numeri(goal))
            # print("Il mio goal non è negli stati visti")
        elif states[goal] is False:
            goal_states[goal] = False
            all_goals_satisfied = False
            goals_unsatisfied_list.append(goal)
            goals_unsatisfied_list_nonumber.append(rimuovi_numeri(goal))
            # print("Il mio goal è negli stati visti falso")
        else:
            goal_states[goal] = True
            # print("Il mio goal è negli stati visti vero")
    if all_goals_satisfied is True:
        return (True, goal_states, "goals_succesfull")
    else:
        return (
            False,
            goal_states,
            "goals_not_succesfull",
            goals_unsatisfied_list_nonumber,
            goals_unsatisfied_list,
        )

In [17]:
def execute_action(states, action, domain):
    # Return a Tuple
    # in the first position the result True, False
    # in the second position the next state if result is True or the current state if it is False
    # in the third position the motivation of the wrong execution
    # split my action, on position 0 the action, then the objects
    #print(action)
    splitted_action = action.split("_")[0]  # prendo il nome della mia azione
    #print("splitted_action", splitted_action)
    try:
        action_parameter = dict_actions_domain[domain][splitted_action]  # verifico che l'azione esiste
        #print("action_parameter", action_parameter)
    except:
        action_parameter = None
    if action_parameter is None:
        return (False, states, "action_name_wrong", splitted_action, action, "")
    splitted_objects = action.split("_")[1:]  # ottengo il nome dei miei oggetti definiti nel mio problema
    #print("splitted_objects",splitted_objects)
    
    action_objects = action_parameter[0]  # ottengo gli oggetti accettati dall'azione
    #print("action_objects", action_objects)
    
    if len(action_objects) == len(splitted_objects):  # verifico che gli oggetti siano in egual numero rispetto a quanti ne richiede la mia azione
        for i in range(0, len(action_objects)):  # scorro i miei oggetti
            obj_nonumber = "".join([j for j in splitted_objects[i] if not j.isdigit()])  # calcolo il mio nome dell'oggetto senza tener conto dei numeri
            #print("obj_nonumber", obj_nonumber)            
            
            if (obj_nonumber not in dict_objects_domain[domain][action_objects[i]]):  # verifico che l'oggetto si trovi nella lista di nomi possibili associata alla descrizione dell'azione
                print(action)
                print("splitted_action", splitted_action)
                print("action_parameter", action_parameter)
                print("splitted_objects",splitted_objects)
                print("action_objects", action_objects)
                print("Errore: " + obj_nonumber + " non è un oggetto valido per " + action_objects[i])
                print("Gli oggetti validi sono: " + str(dict_objects_domain[domain][action_objects[i]]))
                return (
                    False,
                    states,
                    "object_name_wrong",
                    splitted_action,
                    obj_nonumber,
                    splitted_objects[i],
                )
    else:
        return (
            False,
            states,
            "object_number_wrong",
            splitted_action,
            len(action_objects),
            len(splitted_objects),
        )

    action_prec = action_parameter[1]  # prendo le precondizioni della mia azione    
    violed_precondtion = False
    violed_preconditions_list = []
    violed_preconditions_list_nonumber = []
    for prec in action_prec:  # scorro le mie precondizioni
        #print("precondizione:", prec)
        prec_list = prec.split(" ")        
        prec_parametrized = "" + prec_list[0]  # metto il mio predicato
        for obj in prec_list[1:]:  # scorro gli oggetti della mia precondizione
            prec_parametrized = (
                prec_parametrized + "_" + splitted_objects[int(obj)]
            )  # sostituisco gli oggetti generici ai miei oggetti del problema           
        if (
            prec_parametrized not in states.keys()
        ):  # controllo se la mia precondizione esiste come chiave del dizionario (se non esiste non è vera)
            violed_precondtion = True
            #print("violated precondition", prec_parametrized)
            violed_preconditions_list.append(prec_parametrized)
            violed_preconditions_list_nonumber.append(rimuovi_numeri(prec_parametrized))
        elif (
            states[prec_parametrized] is False
        ):  # controllo che la mia precondizione sia vera per eseguire l'azione
            violed_precondtion = True
            violed_preconditions_list.append(prec_parametrized)
            violed_preconditions_list_nonumber.append(rimuovi_numeri(prec_parametrized))
        else:
            pass
    if violed_precondtion is True:
        return (
            False,
            states,
            "violed_preconditions",
            violed_preconditions_list_nonumber,
            violed_preconditions_list,
            splitted_action,
        )

    # eseguo gli effetti negativi
    action_neg = action_parameter[3]  # prendo i miei effetti negativi
    for neg in action_neg:  # li scorro
        neg_list = neg.split(" ")
        neg_parametrized = "" + neg_list[0]  # ottengo il mio predicato
        for obj in neg_list[1:]:  # per ogni oggetto (rappresentato come indice)
            neg_parametrized = (
                neg_parametrized + "_" + splitted_objects[int(obj)]
            )  # vado a sostituirlo col corrispettivo dell'azione in corso
        states[neg_parametrized] = False  # cambio il valore nei miei stati
        # print("ho reso falso " + neg_parametrized)

    # eseguo gli effetti additivi
    action_plus = action_parameter[2]  # prendo i miei effetti additivi
    for plus in action_plus:  # li scorro
        plus_list = plus.split(" ")
        plus_parametrized = "" + plus_list[0]  # ottengo il mio predicato
        for obj in plus_list[1:]:  # per ogni oggetto (rappresentato come indice)
            plus_parametrized = (
                plus_parametrized + "_" + splitted_objects[int(obj)]
            )  # vado a sostituirlo col corrispettivo dell'azione in corso
        states[plus_parametrized] = True  # cambio il valore nei miei stati
        # print("ho reso vero " + plus_parametrized)

    return (True, states, "action_succesfull")

In [18]:
def check_violated_preconditions(plan, domain):
    init_structures = create_starting_structures(plan, domain)
    init_state = init_structures[0]
    goal_state = init_structures[1]
    actions = init_structures[2]
    if len(actions) == 0 and len(plan["actions"]) != 0:
        return True  # Il modello ha generato i token iniziali diversi da azioni (parole a caso)
    # print("Actions ", actions)
    result_goals = check_goals(init_state, goal_state, domain)
    state = init_state    
    for action in actions:
        result = execute_action(state, action, domain)
        #print("Res action ", result)
        if result[0] is True:
            state = result[1]
        else:
            error_type = result[2]
            if error_type == "object_name_wrong":
                return True
            elif error_type == "object_number_wrong":
                if result[4] > result[5]:
                    return False
                else:
                    return True
            elif error_type == "violed_preconditions":
                return True
            elif error_type == "action_name_wrong":
                return True
    return False

# funzione calcolo reward 

In [19]:
def calculate_reward(plan, domain):
    
    if domain == "satellite":
        dict_actions_domain['satellite'] = dict_actions_sn
    
    
    init_structures = create_starting_structures(plan, domain)
    init_state = init_structures[0]
    goal_state = init_structures[1]
    actions = init_structures[2]

    start = time.time()

    result_goals = check_goals(init_state, goal_state, domain)
    if result_goals[0] is True:
        return 1
    else:
        pass
    state = init_state
    j = 0
    for action in actions:
        start_act = time.time()
        result = execute_action(state, action, domain)
        if result[0] is True:
            # print(result[2])
            state = result[1]
            result_goals = check_goals(state, goal_state, domain)
            if result_goals[0] is True:
                return 1
            else:
                pass
                # print(result_goals[2])
        else:
            result_goals = check_goals(state, goal_state, domain)
            goals_satisfied = result_goals[1]
            count = 0
            n_goals = len(goals_satisfied.keys())
            for goal in goals_satisfied.keys():
                if goals_satisfied[goal] is True:
                    count += 1
            if check_violated_preconditions(plan, domain) is True:
                return (count / n_goals) - 1
            else:
                return count / n_goals
        j = j + 1
        end_act = time.time()
        # print("Ho impiegato per fare una azione: " + str(end_act-start_act))

    number_missing_actions = len(actions) - j
    end = time.time()
    # print("Ho impiegato " + str(end-start))
    goals_satisfied = result_goals[1]
    count = 0
    n_goals = len(goals_satisfied.keys())
    for goal in goals_satisfied.keys():
        if goals_satisfied[goal] is True:
            count += 1
    return count / n_goals

# funzione base che viene chiamata per singolo piano per controllare validità e riportare tutti i dati estratti

In [20]:
def parse_problem(plan, domain):
    
    if domain == "satellite":
        dict_actions_domain['satellite'] = dict_actions_sn    
            
    init_structures = create_starting_structures(plan, domain) # riporta i dizionari dei fluenti iniziali e dei fluenti obiettivo e l'inisieme delle azioni generate
    init_state = init_structures[0]  # dizionario con tutti i fluenti iniziali con assocciato il valore true
    #print(init_state)
    goal_state = init_structures[1]  # dizionario con i fluenti obiettivo con associato il valore true o false in base ai fluenti iniziali
    #print(goal_state)
    actions = init_structures[2]     # vettore contenente le azioni generate da planGPT
    #print(actions)
    
    start = time.time()

    result_goals = check_goals(init_state, goal_state, domain)   
        
    if result_goals[0] is True:
        return (result_goals[0], result_goals[1], result_goals[2], len(actions))
        # print(result_goals[2])
    else:
        pass
        # print(result_goals[2])

    state = init_state 
    #print(state)   
    j = 0
    for action in actions:
        #print(action)
        start_act = time.time()
        result = execute_action(state, action, domain)
        #print(result)
        if result[0] is True:
            # print(result[2])
            state = result[1]
            result_goals = check_goals(state, goal_state, domain)
            if result_goals[0] is True:
                # print(result_goals[2])
                j = j + 1
                break
            else:
                pass
                # print(result_goals[2])
        else:
            result_goals = check_goals(state, goal_state, domain)
            return (
                result[0],
                result_goals[1],
                result[2],
                result[3],
                result[4],
                result[5],
                j,
            )
            # print(result[2])
            # print(result[1])
            break
        j = j + 1
        end_act = time.time()
        # print("Ho impiegato per fare una azione: " + str(end_act-start_act))

    number_missing_actions = len(actions) - j
    end = time.time()
    # print("Ho impiegato " + str(end-start))
    return (result_goals[0], result_goals[1], result_goals[2], number_missing_actions)

# Approccio Online: creazione piani

In [26]:

counter = 0
last_generated = 0
generation_output = []
total_plans = 0
tot_time = 0
tot_skip = 0
v=0

for folder in os.listdir(dataset_dir):   
    v+=1      
    print(v, folder)
    convert_action_sn1 = {}
    cont=0
    cont_action=0
    for file in os.listdir(os.path.join(dataset_dir, folder)):
        path = os.path.join(dataset_dir, folder, file)
        if file == "template.pddl":
            state, num_direction, num_mode = initial_state(path)                            
        elif file == "hyps.dat":
            goals = get_goals(path) 
        elif file == "obs.dat":
            actions = get_actions(path)  
        elif file == "real_hyp.dat":
            real_goal = get_goals(path)
   
    inst,sat = estrai(state)
    inst = sorted(inst)
    sat = sorted(sat)              
    convert_action_sn1[f"{inst[0]}"] = f"instrument{len(inst)}"           
    convert_action_sn1[f"{sat[0]}"] = f"satellite{len(sat)}"
    #print(convert_action_sn1)
    convert_action_domain1["satellite"] = convert_action_sn1    
    
    
   
    state = unite_actions(
        state,
        list(dict_predicates_domain[domain].keys()),
        domain,
    )
    state = [x.replace("_", " ") for x in state]
    state = sorted(state)
    state = " ".join(state) 
        
    
    for r_goal in real_goal:
        r_goal = unite_actions(
            r_goal,
            list(dict_predicates_domain[domain].keys()),
            domain,
        )
        r_goal = [x.replace("_", " ") for x in r_goal]
        r_goal = sorted(r_goal)
        r_goal = " ".join(r_goal)
    
    prompts = []
    prompts_tokenized = []
    goals_sorted = []
    for goal in goals:
        goal = unite_actions(
            goal,
            list(dict_predicates_domain[domain].keys()),
            domain,
        )

        goal = [x.replace("_", " ") for x in goal]
        goal = sorted(goal)
        goal = " ".join(goal)
        goals_sorted.append(goal)
        prompt = state + " <|goals|> " + goal + " <|actions|>"
        prompts.append(prompt)
        prompt_tokenized = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=args.max_length)
        prompt_tokenized = prompt_tokenized["input_ids"].to(model.device)
        prompts_tokenized.append(prompt_tokenized)
        for action in actions:    
            for conversion in convert_action_domain1[domain].keys():
                action = action.replace(conversion, convert_action_domain1[domain][conversion])
            for conversion in convert_action_domain[domain].keys():         
                action = action.replace(conversion, convert_action_domain[domain][conversion])                         
            prompt = prompt + action + " "    
            #print(prompt)
            prompts.append(prompt)
            prompt_tokenized = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=args.max_length)
            prompt_tokenized = prompt_tokenized["input_ids"].to(model.device)
            prompts_tokenized.append(prompt_tokenized)    
            
    skip_number=0
    for prompt_tokenized in prompts_tokenized:   
        if skip_number>0:            
            skip_number-=1 
        else:            
            with torch.inference_mode():                
                start_time = time.time()
                generated = model.generate(
                    prompt_tokenized,
                    pad_token_id=tokenizer.eos_token_id,
                    max_length=args.max_length,
                    generation_config=generation_config,
                )
                end_time = time.time()
                delta_time = end_time - start_time
                tot_time+=delta_time
                
            found_true = False
            plans = []
            for index, output_tokenized in enumerate(generated):
                output = tokenizer.decode(output_tokenized)                 
                inputs = output.split("<|actions|>")[0]
                plan = output.split("<|actions|>")[1]
                if "<|endofplan|>" in plan:        
                    plan = plan.split("<|endofplan|>")[0]
                plan = plan.replace("<|endofplan|>", "")
                plan = plan.replace("<|startofplan|>", "")
                plan = plan.replace("<|pad|>", "")
                p = {"input": inputs, "actions": plan}       
                res = parse_problem(p, domain=domain)   
                metric_goals_satisfated=calculate_reward(p,domain=domain)   
                plans.append({"plan": plan, "result": res})  
                if res[0] is True:
                    found_true = True
                
            if found_true is True:
                counter += 1            
            
            actions_string = " ".join(actions) 
            
            ## controllo se il piano generato  è uguale alle azioni da aggiungere            
            
            if plans[0]["plan"].strip() == actions_string:  
                # print(plans[0]["result"][0])  
                # print(goals[cont])        
                skip_number = len(actions)-cont_action
                tot_skip+=skip_number
                if skip_number>0:
                    total_plans+=(cont_action+1)
                if plans[0]["result"][0]:
                    counter+=skip_number 
            
            if r_goal == goals_sorted[cont]:  
                true_goal = True                        
            else:
                true_goal = False
                
            generation_output.append(
                {
                    "name": folder,
                    "goal": goals[cont],
                    "input": prompts[cont], #cont or cont_action
                    "action": prompts[cont_action].split("<|actions|>")[1],
                    "true_goal": true_goal,
                    "actions": actions_string,
                    "plans": plans,
                    "value of the metric goals satisfated": metric_goals_satisfated,
                    #"actions": actions,
                    "time": format(delta_time, ".2f"),
                }
            )  
            cont_action+=1
            if cont_action > len(actions):            
                total_plans+=cont_action            
                cont_action=0
                cont+=1 
            
            
            if skip_number>0:
                cont_action=0
                cont+=1            
    
    
    # Save the last batch
    with open(
        os.path.join(args.output_dir, f"generation_output_100p_actions_NewTest.json"),
        "w",
    ) as f:
        json.dump(generation_output, f, indent=4) 
        
    with open(os.path.join(args.output_dir, f"generation_results_100p_actions_NewTest.txt"), "w") as f:
        f.write(f"Correct plans: {counter}\n")
        f.write(f"Skip plan generation: {tot_skip}\n")
        f.write(f"Total plans: {total_plans}\n")
        percent = counter / total_plans * 100.0
        f.write(f"Coverage: {percent}%\n")  
        f.write(f"Total time: {tot_time}\n")
        
      
    
    

1 satellite_p01_hyp-3_full
{'instrument0': 'instrument4', 'satellite0': 'satellite2'}
calibration-target instrument1 direction3 calibration-target instrument2 direction1 calibration-target instrument3 direction1 calibration-target instrument4 direction2 direction direction1 direction direction2 direction direction3 direction direction4 direction direction5 direction direction6 direction direction7 direction direction8 instrument instrument1 instrument instrument2 instrument instrument3 instrument instrument4 mode mode1 mode mode2 mode mode3 on-board instrument1 satellite2 on-board instrument2 satellite2 on-board instrument3 satellite1 on-board instrument4 satellite2 pointing satellite1 direction1 pointing satellite2 direction5 power-avail satellite1 power-avail satellite2 satellite satellite1 satellite satellite2 supports instrument1 mode2 supports instrument2 mode1 supports instrument2 mode2 supports instrument3 mode1 supports instrument3 mode2 supports instrument3 mode3 supports inst

7 satellite_p001143_hyp=hyp-2_100
switch-on_instrument10_direction21
splitted_action switch-on
action_parameter (['instrument', 'satellite'], ['on-board 0 1', 'power-avail 1'], ['power-on 0'], ['power-avail 1', 'calibrated 0'])
splitted_objects ['instrument10', 'direction21']
action_objects ['instrument', 'satellite']
Errore: direction non è un oggetto valido per satellite
Gli oggetti validi sono: ['sat', 'satellite']
switch-on_instrument10_direction21
splitted_action switch-on
action_parameter (['instrument', 'satellite'], ['on-board 0 1', 'power-avail 1'], ['power-on 0'], ['power-avail 1', 'calibrated 0'])
splitted_objects ['instrument10', 'direction21']
action_objects ['instrument', 'satellite']
Errore: direction non è un oggetto valido per satellite
Gli oggetti validi sono: ['sat', 'satellite']
switch-on_instrument10_direction21
splitted_action switch-on
action_parameter (['instrument', 'satellite'], ['on-board 0 1', 'power-avail 1'], ['power-on 0'], ['power-avail 1', 'calibrated 0'])
splitted_objects ['instrument10', 'direction21']
action_objects ['instrument', 'satellite']
Errore: direction non è un oggetto valido per satellite
Gli oggetti validi sono: ['sat', 'satellite']

52 satellite_p05_hyp-3_full
switch-on_instrument11_mode2
splitted_action switch-on
action_parameter (['instrument', 'satellite'], ['on-board 0 1', 'power-avail 1'], ['power-on 0'], ['power-avail 1', 'calibrated 0'])
splitted_objects ['instrument11', 'mode2']
action_objects ['instrument', 'satellite']
Errore: mode non è un oggetto valido per satellite
Gli oggetti validi sono: ['sat', 'satellite']
switch-on_instrument11_mode2
splitted_action switch-on
action_parameter (['instrument', 'satellite'], ['on-board 0 1', 'power-avail 1'], ['power-on 0'], ['power-avail 1', 'calibrated 0'])
splitted_objects ['instrument11', 'mode2']
action_objects ['instrument', 'satellite']
Errore: mode non è un oggetto valido per satellite
Gli oggetti validi sono: ['sat', 'satellite']
switch-on_instrument11_mode2
splitted_action switch-on
action_parameter (['instrument', 'satellite'], ['on-board 0 1', 'power-avail 1'], ['power-on 0'], ['power-avail 1', 'calibrated 0'])
splitted_objects ['instrument11', 'mode2']
action_objects ['instrument', 'satellite']
Errore: mode non è un oggetto valido per satellite
Gli oggetti validi sono: ['sat', 'satellite']

56 satellite_p07_hyp-2_full
turn-to_satellite4_instrument2_direction3
splitted_action turn-to
action_parameter (['satellite', 'direction', 'direction'], ['pointing 0 2'], ['pointing 0 1'], ['pointing 0 2'])
splitted_objects ['satellite4', 'instrument2', 'direction3']
action_objects ['satellite', 'direction', 'direction']
Errore: instrument non è un oggetto valido per direction
Gli oggetti validi sono: ['dir', 'direction']
turn-to_satellite4_instrument2_direction3
splitted_action turn-to
action_parameter (['satellite', 'direction', 'direction'], ['pointing 0 2'], ['pointing 0 1'], ['pointing 0 2'])
splitted_objects ['satellite4', 'instrument2', 'direction3']
action_objects ['satellite', 'direction', 'direction']
Errore: instrument non è un oggetto valido per direction
Gli oggetti validi sono: ['dir', 'direction']
turn-to_satellite4_instrument2_direction3
splitted_action turn-to
action_parameter (['satellite', 'direction', 'direction'], ['pointing 0 2'], ['pointing 0 1'], ['pointing 0 2'])
splitted_objects ['satellite4', 'instrument2', 'direction3']
action_objects ['satellite', 'direction', 'direction']
Errore: instrument non è un oggetto valido per direction
Gli oggetti validi sono: ['dir', 'direction']

72 satellite_p001257_hyp=hyp-6_100
calibrate_satellite1_instrument11_mode5
splitted_action calibrate
action_parameter (['satellite', 'instrument', 'direction'], ['on-board 1 0', 'calibration-target 1 2', 'pointing 0 2', 'power-on 1'], ['calibrated 1'], [])
splitted_objects ['satellite1', 'instrument11', 'mode5']
action_objects ['satellite', 'instrument', 'direction']
Errore: mode non è un oggetto valido per direction
Gli oggetti validi sono: ['dir', 'direction']
calibrate_satellite1_instrument11_mode5
splitted_action calibrate
action_parameter (['satellite', 'instrument', 'direction'], ['on-board 1 0', 'calibration-target 1 2', 'pointing 0 2', 'power-on 1'], ['calibrated 1'], [])
splitted_objects ['satellite1', 'instrument11', 'mode5']
action_objects ['satellite', 'instrument', 'direction']
Errore: mode non è un oggetto valido per direction
Gli oggetti validi sono: ['dir', 'direction']
calibrate_satellite1_instrument11_mode5
splitted_action calibrate
action_parameter (['satellite', 'instrument', 'direction'], ['on-board 1 0', 'calibration-target 1 2', 'pointing 0 2', 'power-on 1'], ['calibrated 1'], [])
splitted_objects ['satellite1', 'instrument11', 'mode5']
action_objects ['satellite', 'instrument', 'direction']
Errore: mode non è un oggetto valido per direction
Gli oggetti validi sono: ['dir', 'direction']

problemi su
33 satellite_p003215_hyp=hyp-2_100
37 satellite_p001037_hyp=hyp-2_100
49 satellite_p001266_hyp=hyp-1_100

# Approccio Offline: creazione piani

In [23]:
counter = 0
last_generated = 0
generation_output = []
total_plans = 0
tot_time = 0
v=0

for folder in os.listdir(dataset_dir):    
    v+=1
    print(v, folder)
    convert_action_sn1 = {}
    cont=0    
    for file in os.listdir(os.path.join(dataset_dir, folder)):
        path = os.path.join(dataset_dir, folder, file)
        if file == "template.pddl":
            state, num_direction, num_mode = initial_state(path)  
        elif file == "hyps.dat":
            goals = get_goals(path)             
        elif file == "obs.dat":
            actions = get_actions(path)              
        elif file == "real_hyp.dat":
            real_goal = get_goals(path)               
    
    # convert_action_sn1[f"star{0}"] = f"direction{num_direction}"
    # convert_action_sn1[f"groundstation{0}"] = f"direction{num_direction}"
    # convert_action_sn1[f"phenomenon{0}"] = f"direction{num_direction}"
    # convert_action_sn1[f"planet{0}"] = f"direction{num_direction}"
    # convert_action_sn1[f"image{0}"] = f"mode{num_mode}"
    # convert_action_sn1[f"infrared{0}"] = f"mode{num_mode}"
    # convert_action_sn1[f"spectrograph{0}"] = f"mode{num_mode}"  
    # convert_action_sn1[f"thermograph{0}"] = f"mode{num_mode}"
    
    inst,sat = estrai(state)
    inst = sorted(inst)
    sat = sorted(sat)              
    convert_action_sn1[f" {inst[0]} "] = f" instrument{len(inst)} "           
    convert_action_sn1[f" {sat[0]} "] = f" satellite{len(sat)} "
    #print(convert_action_sn1)
    convert_action_domain1["satellite"] = convert_action_sn1    
    
    state = unite_actions(
        state,
        list(dict_predicates_domain[domain].keys()),
        domain,
    )   
    state = [x.replace("_", " ") for x in state]
    state = sorted(state)
    state = " ".join(state)    
    
    for r_goal in real_goal:
        r_goal = unite_actions(
            r_goal,
            list(dict_predicates_domain[domain].keys()),
            domain,
        )
        r_goal = [x.replace("_", " ") for x in r_goal]
        r_goal = sorted(r_goal)
        r_goal = " ".join(r_goal)   
        
    prompts = []
    prompts_tokenized = []
    goals_sorted = []
    for goal in goals:       
        goal = unite_actions(
            goal,
            list(dict_predicates_domain[domain].keys()),
            domain,
        )        
        goal = [x.replace("_", " ") for x in goal]
        goal = sorted(goal)
        goal = " ".join(goal)    
        goals_sorted.append(goal)
        prompt = state + " <|goals|> " + goal + " <|actions|>"
        prompts.append(prompt)
        prompt_tokenized = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=args.max_length)
        prompt_tokenized = prompt_tokenized["input_ids"].to(model.device)
        prompts_tokenized.append(prompt_tokenized) 
    
    for prompt_tokenized in prompts_tokenized:        
        
        with torch.inference_mode():
            start_time = time.time()
            generated = model.generate(
                prompt_tokenized,
                pad_token_id=tokenizer.eos_token_id,
                max_length=args.max_length,
                generation_config=generation_config,
            )
            end_time = time.time()
            delta_time = end_time - start_time
            tot_time+=delta_time
            
        found_true = False
        plans = []
        for index, output_tokenized in enumerate(generated):            
            output = tokenizer.decode(output_tokenized)  
            #print(output)
            inputs = output.split("<|actions|>")[0] 
            #print(inputs)
            plan = output.split("<|actions|>")[1] 
            if "<|endofplan|>" in plan:        
                plan = plan.split("<|endofplan|>")[0]
            plan = plan.replace("<|endofplan|>", "")
            plan = plan.replace("<|startofplan|>", "")
            plan = plan.replace("<|pad|>", "")
            p = {"input": inputs, "actions": plan}       
            res = parse_problem(p, domain=domain) 
            metric_goals_satisfated=calculate_reward(p,domain=domain) 
            plans.append({"plan": plan, "result": res})  
            if res[0] is True:
                found_true = True       
        
    
           
        if found_true is True:
            counter += 1            
        
        actions_string = " ".join(actions)   
    
        if r_goal == goals_sorted[cont]:
            true_goal = True  
        else:
            true_goal = False 
            
                       
        generation_output.append(
            {
                "name": folder,
                "goal": goals[cont],
                "input": prompts[cont],               
                "true_goal": true_goal,
                "actions": actions_string,
                "plans": plans, 
                "value of the metric goals satisfated": metric_goals_satisfated,
                "time": format(delta_time, ".2f"),
            }
        ) 
          
        cont+=1     
        
        
    total_plans+=cont 
    
    # Save the last batch
    with open(
        os.path.join(args.output_dir, f"generation_output_100p_NewTest2.json"),
        "w",
    ) as f:
        json.dump(generation_output, f, indent=4) 
        
    with open(os.path.join(args.output_dir, f"generation_results_100p_NewTest2.txt"), "w") as f:
        f.write(f"Correct plans: {counter}\n")        
        f.write(f"Total plans: {total_plans}\n")
        percent = counter / total_plans * 100.0
        f.write(f"Coverage: {percent}%\n")  
        f.write(f"Total time: {tot_time}\n")   
    
    break
    
            
    
    

1 satellite_p01_hyp-3_full
