# Generating random GORGIAS code

The objective of this notebook is to generate an random but syntaxically correct GORGIAS code, we won't give importance to the semantics behind each code.

In [1]:
import random
import re

actions = [
    # Work-related actions
    "attend_meeting", "finish_report", "reply_emails", "give_presentation",
    
    # Social & leisure activities
    "go_to(restaurant)", "go_to(cinema)", "go_to(park)", "go_to(theater)",
    "visit_family", "attend_concert", "travel_abroad", "go_shopping",

    # Health & exercise
    "go_gym", "morning_run", "yoga_session", "visit_doctor",

    # Daily tasks
    "buy_groceries", "clean_house", "cook_dinner", "read_book",

    # Transportation
    "take_bus", "ride_bike", "drive_car", "book_flight"
]

facts = [
    # Work-related
    "urgent_deadline", "important_meeting", "boss_in_office", "team_project_due",

    # Personal situations
    "feeling_sick", "birthday_today", "wedding_anniversary", "friend_in_town", "medical_appointment",

    # Weather conditions
    "good_weather", "rainy_day", "snowstorm", "hot_day",

    # Time-based events
    "weekend", "holiday_season", "morning_rush", "night_time",

    # Social dynamics
    "invitation_from_friend", "family_gathering", "new_restaurant_to_try", "concert_nearby",

    # Financial considerations
    "low_budget", "got_bonus", "discount_on_flight", "expensive_event"
]


def convert_to_dynamic(term):
    match = re.match(r"(\w+)(\((.*?)\))?", term)
    if match:
        predicate = match.group(1)
        args = match.group(3)
        arity = len(args.split(',')) if args else 0
        return f"{predicate}/{arity}"
    return None

We will classify any GORGIAS code as beginner level as the following : 
- simple arguments with clear rules and priorities
- no recursion with minimal dependencies
- a maximum of 1 or 2 layers of preferences
- use of `complement()/2` conflict scenario, and common scenario
- no abducible or defeasible components

In [2]:
def generate_beginner_gorgias():
    action1, action2 = random.sample(actions, 2)
    shared_condition = random.choice(facts)

    remaining_facts = [fact for fact in facts if fact != shared_condition]
    p2_condition = random.choice(remaining_facts)

    remaining_facts = [fact for fact in remaining_facts if fact != p2_condition]
    c2_condition = random.choice(remaining_facts)

    # build dynamic
    if convert_to_dynamic(action1) == convert_to_dynamic(action2):
        # in case we have the same action for example : go_to(restaurant) and go_to(cinema) we should have only one dynamic
        dynamic = f":- dynamic {convert_to_dynamic(action1)}"
    else:
        dynamic = f":- dynamic {convert_to_dynamic(action1)}, {convert_to_dynamic(action2)}"
    
    isConflictScenario = random.choice([True, False])

    if isConflictScenario:  # Conflict scenario : 2 actions that are mutually exclusive
        rule1 = f"rule(r1, {action1}, []) :-."
        rule2 = f"rule(r2, {action2}, []) :-."
    else:  # common scenario : 2 actions that have a common condition
        rule1 = f"rule(r1, {action1}, []) :- {shared_condition}."
        rule2 = f"rule(r2, {action2}, []) :- {shared_condition}."
        dynamic += f", {convert_to_dynamic(shared_condition)}"

    pref1 = f"rule(p1, prefer(r1, r2), [])."

    # 1 = just p1, 2 = add p2 & c1, 3 = full depth
    depth = random.choice([1, 2, 3])

    rules = [rule1, rule2, pref1]

    if depth >= 2:
        pref2 = f"rule(p2, prefer(r2, r1), []) :- {p2_condition}."
        conflict1 = "rule(c1, prefer(p2, p1), [])."
        rules.extend([pref2, conflict1])
        dynamic += f", {convert_to_dynamic(p2_condition)}"

    if depth == 3:
        conflict2 = f"rule(c2, prefer(p1, p2), []) :- {c2_condition}."
        conflict3 = "rule(c3, prefer(c2, c1), [])."
        rules.extend([conflict2, conflict3])
        dynamic += f", {convert_to_dynamic(c2_condition)}"

    dynamic += f"."
    complement1 = f"complement({action2}, {action1})."
    complement2 = f"complement({action1}, {action2})."
    rules.extend([complement1, complement2])

    rules.insert(0, dynamic)

    return "\n".join(rules)

In [34]:
import random
num_examples = 3
gorgias_examples = [generate_beginner_gorgias() for _ in range(num_examples)]

for i, example in enumerate(gorgias_examples, 1):
    print(f"### Example {i} ###\n{example}\n")

### Example 1 ###
:- dynamic reply_emails/0, go_to/1, good_weather/0, rainy_day/0, medical_appointment/0.
rule(r1, reply_emails, []) :- good_weather.
rule(r2, go_to(theater), []) :- good_weather.
rule(p1, prefer(r1, r2), []).
rule(p2, prefer(r2, r1), []) :- rainy_day.
rule(c1, prefer(p2, p1), []).
rule(c2, prefer(p1, p2), []) :- medical_appointment.
rule(c3, prefer(c2, c1), []).
complement(go_to(theater), reply_emails).
complement(reply_emails, go_to(theater)).

### Example 2 ###
:- dynamic go_to/1, attend_concert/0, invitation_from_friend/0, urgent_deadline/0.
rule(r1, go_to(restaurant), []) :-.
rule(r2, attend_concert, []) :-.
rule(p1, prefer(r1, r2), []).
rule(p2, prefer(r2, r1), []) :- invitation_from_friend.
rule(c1, prefer(p2, p1), []).
rule(c2, prefer(p1, p2), []) :- urgent_deadline.
rule(c3, prefer(c2, c1), []).
complement(attend_concert, go_to(restaurant)).
complement(go_to(restaurant), attend_concert).

### Example 3 ###
:- dynamic morning_run/0, go_to/1.
rule(r1, morning_run

For the intermediate level we will have multi-level preference, more rules (for easy belief theories), for the preference we can add multiple conditions instead of one.

A good intermediate level should be the example of Allow/deny call :

```prolog
:- dynamic phone_call/0, at_work/0, family_member/1, at_meeting/0.
rule(r1(Call), allow(Call), []):- phone_call.
rule(r2(Call), deny(Call), []):- phone_call.
% Do we need to specify again the phone_call for p1 and p2 ???
rule(p1(Call), prefer(r1(Call), r2(Call)), []):- phone_call.
rule(p2(Call), prefer(r2(Call), r1(Call)), []):- phone_call , at_work.

rule(c1(Call), prefer(p2(Call), p1(Call)), []).
% And here too for at_work ?
rule(c2(Call), prefer(p1(Call), p2(Call)), []):- phone_call , at_work, familly_member(Call).

rule(c3(Call), prefer(c2(Call), c1(Call)), []).

rule(c4(Call), prefer(c1(Call), c2(Call)), []):- phone_call , at_work, familly_member(Call), at_meeting.

rule(c5(Call), prefer(c4(Call), c3(Call)), []).

complement(deny(Call), allow(Call)).
complement(allow(Call), deny(Call)).
```

In [3]:
# WIP
def generate_intermediate_gorgias():
    action1, action2 = random.sample(actions, 2)
    
    fact1 = random.choice(facts)
    remaining_facts = [f for f in facts if f != fact1]
    fact2 = random.choice(remaining_facts)
    remaining_facts = [f for f in remaining_facts if f != fact2]
    fact3 = random.choice(remaining_facts)
    
    rules = []
    
    rule1 = f"rule(r1, {action1}, []) :- {fact1}."
    rule2 = f"rule(r2, {action2}, []) :- {fact1}."
    
    pref1 = f"rule(p1, prefer(r1, r2), []) :- {fact1}, {fact3}."
    pref2 = f"rule(p2, prefer(r2, r1), []) :- {fact2}, {fact3}."
    
    conflict1 = "rule(c1, prefer(p2, p1), [])."
    conflict2 = f"rule(c2, prefer(p1, p2), []) :- {fact2}, neg({fact3})."
    conflict3 = "rule(c3, prefer(c2, c1), [])."
    
    complement1 = f"complement({action2}, {action1})."
    complement2 = f"complement({action1}, {action2})."
    
    rules.extend([rule1, rule2, pref1, pref2, conflict1, conflict2, conflict3, complement1, complement2])
    
    return "\n".join(rules)


In [12]:
import random
num_examples = 3
gorgias_intermediate_examples = [generate_intermediate_gorgias() for _ in range(num_examples)]

for i, example in enumerate(gorgias_intermediate_examples, 1):
    print(f"### Example {i} ###\n{example}\n")

### Example 1 ###
rule(r1, go_to(restaurant), []) :- important_meeting.
rule(r2, give_presentation, []) :- important_meeting.
rule(p1, prefer(r1, r2), []) :- important_meeting, good_weather.
rule(p2, prefer(r2, r1), []) :- weekend, good_weather.
rule(c1, prefer(p2, p1), []).
rule(c2, prefer(p1, p2), []) :- weekend, neg(good_weather).
rule(c3, prefer(c2, c1), []).
complement(give_presentation, go_to(restaurant)).
complement(go_to(restaurant), give_presentation).

### Example 2 ###
rule(r1, go_to(cinema), []) :- team_project_due.
rule(r2, finish_report, []) :- team_project_due.
rule(p1, prefer(r1, r2), []) :- team_project_due, expensive_event.
rule(p2, prefer(r2, r1), []) :- got_bonus, expensive_event.
rule(c1, prefer(p2, p1), []).
rule(c2, prefer(p1, p2), []) :- got_bonus, neg(expensive_event).
rule(c3, prefer(c2, c1), []).
complement(finish_report, go_to(cinema)).
complement(go_to(cinema), finish_report).

### Example 3 ###
rule(r1, drive_car, []) :- urgent_deadline.
rule(r2, visit_doc

The following code automates the creation of 100 Gorgias code examples and saves them in a structured CSV file.

In [13]:
import csv

num_examples = 100

gorgias_examples = [generate_beginner_gorgias() for _ in range(num_examples)]

with open("gorgias_beginner_examples.csv", "w", newline="", encoding="utf-8") as csvfile:
    writer = csv.writer(csvfile)
    writer.writerow(["Example Number", "Gorgias Code"])
    for i, example in enumerate(gorgias_examples, start=1):
        writer.writerow([i, example])


The following code reads a CSV file containing Gorgias code examples, translates each example into clear English using the OpenAI Chat API, and then writes the original code along with its translation into a new CSV file.

In [None]:
import csv
import openai
import time

openai.api_key = "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"

def translate_gorgias_to_nl(gorgias_code):

    prompt = f"Please translate the following Gorgias program into clear English, without including any additional built-in rules or extra explanations:\n\n{gorgias_code}\n"

    messages = [
        {"role": "system", "content": "You are a helpful assistant."},
        {"role": "user", "content": prompt}
    ]

    try:
        response = openai.ChatCompletion.create(
            model="gpt-3.5-turbo",
            messages=messages,
            temperature=0.5,
            max_tokens=300
        )
        translation = response.choices[0].message['content'].strip()
    except Exception as e:
        print(f"Error during API call: {e}")
        translation = "Error in translation."

    return translation

input_file = "gorgias_beginner_examples.csv"

output_file = "gorgias_beginner_nl_pairs.csv"

with open(input_file, "r", encoding="utf-8") as infile, \
     open(output_file, "w", newline="", encoding="utf-8") as outfile:

    reader = csv.DictReader(infile)
    fieldnames = ["Example Number", "Gorgias Code", "NL Translation"]
    writer = csv.DictWriter(outfile, fieldnames=fieldnames)

    writer.writeheader()

    for row in reader:
        example_number = row["Example Number"]
        gorgias_code = row["Gorgias Code"]

        print(f"Processing example {example_number}...")
        nl_translation = translate_gorgias_to_nl(gorgias_code)

        writer.writerow({
            "Example Number": example_number,
            "Gorgias Code": gorgias_code,
            "NL Translation": nl_translation
        })

        time.sleep(1)


Processing example 1...
Processing example 2...
Processing example 3...
Processing example 4...
Processing example 5...
Processing example 6...
Processing example 7...
Processing example 8...
Processing example 9...
Processing example 10...
Processing example 11...
Processing example 12...
Processing example 13...
Processing example 14...
Processing example 15...
Processing example 16...
Processing example 17...
Processing example 18...
Processing example 19...
Processing example 20...
Processing example 21...
Processing example 22...
Processing example 23...
Processing example 24...
Processing example 25...
Processing example 26...
Processing example 27...
Processing example 28...
Processing example 29...
Processing example 30...
Processing example 31...
Processing example 32...
Processing example 33...
Processing example 34...
Processing example 35...
Processing example 36...
Processing example 37...
Processing example 38...
Processing example 39...
Processing example 40...
Processin

The following code does the same thing as the previous one, but with an example in the prompt.  This results in significantly improved, more human-like, and syntactically correct responses from the OpenAI API.

In [1]:
import csv
import openai
import time

openai.api_key = "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"

def translate_gorgias_to_nl(gorgias_code):

    prompt = f"""The Gorgias program :

:- dynamic go_out/0, stay_home/0, nice_weather/0, nice_movie_tv/0, invitation_from_friend/0.
rule(r1, go_out, []) :- nice_weather.
rule(r2, stay_home, []) :- nice_weather.
rule(p1, prefer(r1,r2), []).
rule(p2, prefer(r2,r1), []) :- nice_movie_tv.
rule(c1, prefer(p2,p1), []).
rule(c2, prefer(p1,p2) :- invitation_from_friend, []).
rule(c3, prefer(c2,c1), []).
complement(go_out, stay_home).
complement(stay_home, go_out).

translates to English as "When it is nice weather I can go out or stay home. Generally, I prefer to go out but if there is a nice movie on TV I prefer to stay home. However, if I have an invitation from a friend I prefer to go out. I can't at the same time go out and stay home.".

The Gorgias program :

:- dynamic accept_call/0, deny_call/0, from_family_member/0.
rule(r1, accept_call, []) :-
rule(r2, deny_call, []) :-
rule(p1, prefer(r2, r1), []).
rule(p2, prefer(r1, r2), []) :- from_family_member.
rule(c1, prefer(p2, p1), []).
complement(accept_call, deny_call).
complement(deny_call, accept_call).

translates to English as "I can either accept or deny the call. Generally, I prefer to deny the call but if it is from a family member I prefer to accept the call. I can't at the same time accept and deny the call.".

Please translate the following Gorgias program into clear English by taking into account only the syntax, by disregarding semantic. Output only the translated text, without any labels, introductions, or explanations. :\n\n{gorgias_code}\n"""

    messages = [
        {"role": "system", "content": "You are a helpful assistant."},
        {"role": "user", "content": prompt}
    ]

    try:
        response = openai.ChatCompletion.create(
            model="gpt-3.5-turbo",
            messages=messages,
            temperature=0.5,
            max_tokens=300
        )
        translation = response.choices[0].message['content'].strip()
    except Exception as e:
        print(f"Error during API call: {e}")
        translation = "Error in translation."

    return translation

input_file = "gorgias_beginner_examples_modified.csv"

output_file = "gorgias_beginner_nl_pairs_modified_prompt29.csv"

with open(input_file, "r", encoding="utf-8") as infile, \
     open(output_file, "w", newline="", encoding="utf-8") as outfile:

    reader = csv.DictReader(infile)
    fieldnames = ["Example Number", "Gorgias Code", "NL Translation"]
    writer = csv.DictWriter(outfile, fieldnames=fieldnames)

    writer.writeheader()

    for row in reader:
        example_number = row["Example Number"]
        gorgias_code = row["Gorgias Code"]

        print(f"Processing example {example_number}...")
        nl_translation = translate_gorgias_to_nl(gorgias_code)

        writer.writerow({
            "Example Number": example_number,
            "Gorgias Code": gorgias_code,
            "NL Translation": nl_translation
        })

        time.sleep(1)


Processing example 1...
Processing example 2...
Processing example 3...
Processing example 4...
Processing example 5...
Processing example 6...
Processing example 7...
Processing example 8...
Processing example 9...
Processing example 10...
Processing example 11...
Processing example 12...
Processing example 13...
Processing example 14...
Processing example 15...
Processing example 16...
Processing example 17...
Processing example 18...
Processing example 19...
Processing example 20...


In [1]:
import pandas as pd
from bert_score import score


csv_file = "gorgias_beginner_nl_pairs_modified_prompt22.csv"

df = pd.read_csv(csv_file)

references = df["Manual NL Translation"].tolist()
candidates = df["NL Translation"].tolist()

P, R, F1 = score(candidates, references, lang="en", verbose=True)

for i, (p, r, f1) in enumerate(zip(P, R, F1)):
    print(f"Exemple {df['Example Number'][i]}:")
    print(f"  Precision : {p.item():.4f}")
    print(f"  Recall    : {r.item():.4f}")
    print(f"  F1        : {f1.item():.4f}")
    print("----------")

avg_f1 = F1.mean().item()
print(f"F1 moyen sur tous les exemples : {avg_f1:.4f}")

  from .autonotebook import tqdm as notebook_tqdm
Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


calculating scores...
computing bert embedding.


100%|██████████| 1/1 [00:00<00:00,  4.15it/s]


computing greedy matching.


100%|██████████| 1/1 [00:00<00:00, 29.41it/s]

done in 0.28 seconds, 71.43 sentences/sec
Exemple 1:
  Precision : 0.9473
  Recall    : 0.9547
  F1        : 0.9510
----------
Exemple 2:
  Precision : 0.9717
  Recall    : 0.9687
  F1        : 0.9702
----------
Exemple 3:
  Precision : 0.9351
  Recall    : 0.9328
  F1        : 0.9339
----------
Exemple 4:
  Precision : 0.9830
  Recall    : 0.9839
  F1        : 0.9834
----------
Exemple 5:
  Precision : 0.9517
  Recall    : 0.9486
  F1        : 0.9502
----------
Exemple 6:
  Precision : 0.9760
  Recall    : 0.9750
  F1        : 0.9755
----------
Exemple 7:
  Precision : 0.9591
  Recall    : 0.9634
  F1        : 0.9613
----------
Exemple 8:
  Precision : 0.9790
  Recall    : 0.9803
  F1        : 0.9796
----------
Exemple 9:
  Precision : 0.9416
  Recall    : 0.9577
  F1        : 0.9496
----------
Exemple 10:
  Precision : 0.9867
  Recall    : 0.9851
  F1        : 0.9859
----------
Exemple 11:
  Precision : 0.9528
  Recall    : 0.9293
  F1        : 0.9409
----------
Exemple 12:
  Precisi




In [3]:
import pandas as pd
import nltk
from nltk.translate.bleu_score import sentence_bleu, corpus_bleu

nltk.download('punkt')

csv_file = "gorgias_beginner_nl_pairs_modified_prompt22.csv"

df = pd.read_csv(csv_file)

print("Colonnes du fichier CSV :", df.columns.tolist())

references = df["Manual NL Translation"].tolist()       # Texte de référence
candidates = df["NL Translation"].tolist()       # Traduction candidate

tokenized_references = [nltk.word_tokenize(ref) for ref in references]
tokenized_candidates = [nltk.word_tokenize(cand) for cand in candidates]

list_of_references = [[ref_tokens] for ref_tokens in tokenized_references]

print("BLEU score par exemple :")
for i, (refs, cand) in enumerate(zip(list_of_references, tokenized_candidates)):
    bleu = sentence_bleu(refs, cand)
    print(f"Exemple {df['Example Number'][i]}: {bleu:.4f}")

corpus_bleu_score = corpus_bleu(list_of_references, tokenized_candidates)
print(f"\nCorpus BLEU score: {corpus_bleu_score:.4f}")

Colonnes du fichier CSV : ['Example Number', 'Gorgias Code', 'NL Translation', 'Manual NL Translation']
BLEU score par exemple :
Exemple 1: 0.6128
Exemple 2: 0.6488
Exemple 3: 0.2893
Exemple 4: 0.7869
Exemple 5: 0.6722
Exemple 6: 0.6922
Exemple 7: 0.5812
Exemple 8: 0.8339
Exemple 9: 0.4254
Exemple 10: 0.8733
Exemple 11: 0.4874
Exemple 12: 0.7440
Exemple 13: 0.7668
Exemple 14: 0.2813
Exemple 15: 1.0000
Exemple 16: 0.7596
Exemple 17: 0.3723
Exemple 18: 0.2896
Exemple 19: 0.6729
Exemple 20: 0.6446

Corpus BLEU score: 0.6736


[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\mmuzz\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!


In [9]:
import pandas as pd
from comet import download_model, load_from_checkpoint

model_path = download_model("wmt20-comet-da")
model = load_from_checkpoint(model_path)

csv_file = "gorgias_beginner_nl_pairs_modified_prompt22.csv"
df = pd.read_csv(csv_file)

references = df["Manual NL Translation"].tolist()
candidates = df["NL Translation"].tolist()

data = []
for ref, cand in zip(references, candidates):
    data.append({
        "src": "",
        "mt": cand,
        "ref": ref
    })

output = model.predict(data, batch_size=8, gpus=0)
scores = output["scores"] if isinstance(output, dict) and "scores" in output else output

for i, score in enumerate(scores):
    print(f"Exemple {df['Example Number'][i]}: COMET score = {float(score):.4f}")

avg_score = sum(float(s) for s in scores) / len(scores)
print(f"\nCOMET score moyen sur l'ensemble des exemples : {avg_score:.4f}")

wmt20-comet-da is already in cache.
Lightning automatically upgraded your loaded checkpoint from v1.3.5 to v2.5.1. To apply the upgrade to your files permanently, run `python -m pytorch_lightning.utilities.upgrade_checkpoint C:\Users\mmuzz\.cache\torch\unbabel_comet\wmt20-comet-da\checkpoints\model.ckpt`
Encoder model frozen.
C:\Users\mmuzz\miniconda3\Lib\site-packages\pytorch_lightning\core\saving.py:195: Found keys that are not in the model state dict but in the checkpoint: ['encoder.model.embeddings.position_ids']
You are using the plain ModelCheckpoint callback. Consider using LitModelCheckpoint which with seamless uploading to Model registry.
GPU available: True (cuda), used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
C:\Users\mmuzz\miniconda3\Lib\site-packages\pytorch_lightning\trainer\setup.py:177: GPU available but not used. You can set it by doing `Trainer(accelerator='gpu')`.
Predicting DataLoader 0: 100%|██████████| 3/3 [00:03<00:00,  

Exemple 1: COMET score = 0.6903
Exemple 2: COMET score = 0.7221
Exemple 3: COMET score = 0.5315
Exemple 4: COMET score = 0.7361
Exemple 5: COMET score = 0.5841
Exemple 6: COMET score = 0.6882
Exemple 7: COMET score = 0.5743
Exemple 8: COMET score = 0.7539
Exemple 9: COMET score = 0.6205
Exemple 10: COMET score = 0.8056
Exemple 11: COMET score = 0.4532
Exemple 12: COMET score = 0.8351
Exemple 13: COMET score = 0.8690
Exemple 14: COMET score = 0.0860
Exemple 15: COMET score = 0.9535
Exemple 16: COMET score = 0.6899
Exemple 17: COMET score = 0.2492
Exemple 18: COMET score = 0.2653
Exemple 19: COMET score = 0.7713
Exemple 20: COMET score = 0.7763

COMET score moyen sur l'ensemble des exemples : 0.6328



