### **Part 1**

---
#### **1.1 Import Formula Class**

In [1]:
try:
    import torch
except:
    print("[INFO] Couldn't find torch... installing it.")
    !pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118
    import torch

try:
    import torchmetrics
except:
    print("[INFO] Couldn't find torchmetrics... installing it.")
    !pip install torchmetrics
    import torchmetrics

try:
    import torchinfo
except:
    print("[INFO] Couldn't find torchinfo... installing it.")
    !pip install torchinfo
    import torchinfo

In [2]:
import sys
import os
import random
from typing import Dict, Tuple, List, Set, Union, Type, Literal
from itertools import product
from dataclasses import dataclass
from collections import Counter
import re

import pandas as pd
import numpy as np

import torch
import torch.nn as nn
from torchmetrics import Accuracy
from torch.nn.utils.rnn import pad_sequence
from torch.utils.data import Dataset, DataLoader
from torchinfo import summary

from sklearn.model_selection import train_test_split

from tqdm.auto import tqdm
from pathlib import Path

In [3]:
# --- Importing Formula Class ---
# Go two levels up: from ICTCS_notebooks → theorem_prover_core → project root
project_root = os.path.abspath(os.path.join(os.getcwd(), "..", ".."))
if project_root not in sys.path:
    sys.path.insert(0, project_root)

from theorem_prover_core.formula import (Formula, Letter, Falsity, Conjunction, Disjunction, Implication,
                                         Negation, BinaryConnectiveFormula, UnaryConnectiveFormula, bottom)

---
#### **1.2 Truth Assignment function and Normalization**

In [4]:
# --- Truth Assignment Function ---
# Generate random truth values for letters
def truth_assignment(letters: int, seed: int) -> Dict[int, bool]:
    """
    Randomly assigns True or False to each propositional letter in a formula.

    Args:
        letters (int): An integer that represents the number of propositional letters 
    
    Returns: 
        Dict[int, bool]: A dictonary where keys are integers representing the propositional 
        letters, and values are booleans (True or False), representing the truth value 
        assigned to each variable.
    """
    if seed is not None:
        random.seed(seed)
    return {i: random.choice([True, False]) for i in range(letters)}


# Example: 
n_letters = 2 
values = truth_assignment(n_letters, seed=42)
print(f"Random truth values assigned to P1 and P2: {values}")

Random truth values assigned to P1 and P2: {0: True, 1: True}


In [5]:
# --- Evaluate the truth value of a formula ---
def evaluate_formula(formula: Formula, truth_assignment: Dict[int, bool]) -> bool:
    """
    Evaluates the truth value of a formula using the given truth assignment 
    for propositional letters.

    Args: 
        formula: The propositional formula to evaluate.
        truth_assignment: A dictionary with the propositional letters values.
    
    Returns: 
        bool: A truth value.
    """
    if isinstance(formula, Letter):
        return truth_assignment[formula.n]
    elif isinstance(formula, Falsity):
        return False
    elif isinstance(formula, Negation):
        return not evaluate_formula(formula.formula, truth_assignment)
    elif isinstance(formula, Conjunction):
        return evaluate_formula(formula.left, truth_assignment) and evaluate_formula(formula.right, truth_assignment)
    elif isinstance(formula, Disjunction):
        return evaluate_formula(formula.left, truth_assignment) or evaluate_formula(formula.right, truth_assignment)
    #elif isinstance(formula, ExclusiveDisjunction):
    #    return evaluate_formula(formula.left, truth_assignment) != evaluate_formula(formula.right, truth_assignment)
    elif isinstance(formula, Implication):
        return not evaluate_formula(formula.left, truth_assignment) or evaluate_formula(formula.right, truth_assignment)
    else:
        raise Exception("Unknown formula type")

# Example: 
A1 = Letter(1)
A2 = Letter(2)
A3 = Letter(3)
conjunction = Conjunction(A1, A2)
implication = Implication(conjunction, A3)
truth_assignment = {1: True, 2: False, 3: True}

# Evaluate the formula
result = evaluate_formula(implication, truth_assignment)

values_list = list(truth_assignment.values())
print(f"Truth value of the composed formula: {implication}, where A1={values_list[0]},"
      f" A2={values_list[1]}, and A3={values_list[2]}: \n{result}")

Truth value of the composed formula: A1 ∧ A2 → A3, where A1=True, A2=False, and A3=True: 
True


In [6]:
# --- Derive letters from a given formula ---
def derive_letters(formula: Formula) -> Set[int]:
    """
    Recursively derives the set of propositional letters (by their index) in the given formula.
       
    Args: 
        formula: A propositional formula.
       
    Returns: 
        A set of letters indices. 
    """
    if isinstance(formula, Letter):
        return {formula.n}
    elif isinstance(formula, Falsity):
        return set()
    elif isinstance(formula, UnaryConnectiveFormula):
        return derive_letters(formula.formula)
    elif isinstance(formula, BinaryConnectiveFormula):
        left_letters = derive_letters(formula.left)
        right_letters = derive_letters(formula.right)
        return left_letters.union(right_letters)
    else:
        raise Exception("Unknown formula type")

print(f"Indices of propositional variables in the formula: {implication}:"
      f" {derive_letters(implication)}")

Indices of propositional variables in the formula: A1 ∧ A2 → A3: {1, 2, 3}


In [7]:
# --- Check tautology status of a given formula ---
def is_tautology(formula: Formula) -> bool:
    """
    Checks if a given formula is a tautology.

    Args: 
        formula: A propositional formula.
       
    Returns: 
        bool: The tautology status (True or False).
    """
    letters = derive_letters(formula)
    num_letters = len(letters)

    for values in product([False, True], repeat=num_letters):
        assignment = dict(zip(sorted(letters), values))
        if not evaluate_formula(formula, assignment):
            return False

    return True

# Example
print(f"Is the formula {implication} a tautology? {is_tautology(implication)}")
taut = Disjunction(A1, Negation(A1))
print(f"Is the formula {taut} a tautology? {is_tautology(taut)}")


Is the formula A1 ∧ A2 → A3 a tautology? False
Is the formula A1 ∨ ¬A1 a tautology? True


In [8]:
# --- Generator of letter sequence ---
def generate_letter_sequence(num_letters: int) -> None:
    """"
    Generate sequence of letter wrapping around when reaching 
    the max number of letters.

    Args:
        num_letters (int): Number of total propositional letters to generate.
    """
    current_letter = 0
    while True:
        yield Letter(current_letter)
        current_letter += 1
        if current_letter > num_letters:
            current_letter = 0  

# Example 
letter_generator = generate_letter_sequence(num_letters=6)
for _ in range(10):
    print(next(letter_generator))

A0
A1
A2
A3
A4
A5
A6
A0
A1
A2


In [9]:
class Normalizer:
    """
    This class is used to normalize formulas before use them as input.
    Normalization maintains letter consistency, ensuring that the same propositional 
    letter could appears with the same index throughout the formula.
    
    """

    def __init__(self):
        self.__dict_letters = {}
        self.__last_letter = 0

    def normalize(self, data: Formula) -> Formula:
        if isinstance(data, Letter):
            # If the letter has already been encountered, use the same normalized index
            n = data.n
            if n not in self.__dict_letters:
                # Assign a new letter if it hasn't been seen before
                self.__dict_letters[n] = self.__last_letter
                self.__last_letter += 1
            # Return the letter with its normalized index
            return Letter(self.__dict_letters[n])

        elif isinstance(data, Falsity):
            return Falsity()

        elif isinstance(data, UnaryConnectiveFormula):
            formula = self.normalize(data.formula)
            return data.__class__(formula)

        elif isinstance(data, BinaryConnectiveFormula):
            left = self.normalize(data.left)
            right = self.normalize(data.right)
            return data.__class__(left, right)

        else:
            raise Exception("Unknown formula type")

# Example
formula = Implication(Conjunction(Letter(3), Letter(2)), Disjunction(Letter(5), Letter(3)))
print(f"Not normalized formula: {formula}")

normalizer = Normalizer()
normalized_formula = normalizer.normalize(formula)
print(f"Normalized formula: {normalized_formula}")

Not normalized formula: A3 ∧ A2 → A5 ∨ A3
Normalized formula: A0 ∧ A1 → A2 ∨ A0


In [10]:
# --- Random Formula Generator ---
def generate_random_formula(max_depth: int = None, 
                            letter_generator = None, 
                            seed :int = None ) -> Formula:
    """
    Generates a random formula of a certain depth.
    
    Args: 
        max_depth (int): Controls the maximum depth of the generated formula's syntax tree.
        letter_generator: A generator of letter sequence. 
        seed (int): Integer for reproducibility.
       
    Returns: 
        A random propositional formula.

    """
      
    if seed is not None:
        random.seed(seed)


    if max_depth == 0 or random.random() > 0.5:
        # If max depth is 0 or by random chance, return either a letter or falsity
        return random.choices([next(letter_generator), Falsity()], weights=[0.95, 0.05])[0]  # Priority given to random letter

    formula_type = random.choice([Conjunction, Disjunction, Implication, Negation])

    if formula_type == Negation:
        subformula = generate_random_formula(max_depth - 1, letter_generator)
        return Negation(subformula)
    else:
        left_subformula = generate_random_formula(max_depth - 1, letter_generator)
        right_subformula = generate_random_formula(max_depth - 1, letter_generator)
        return formula_type(left_subformula, right_subformula)
    
# --- Generate normalized random formulas ---
def generate_normalized_random_formula(max_depth: int = None, 
                                       num_letters: int = None, 
                                       seed :int = None) -> Formula:
    """
    Generates a normalized random formula where letters are renumbered in ascending order.

    Args: 
        max_depth (int): Controls the maximum depth of the generated formula's syntax tree.
        num_letters (int): Number of propositional letter that can be used.
        seed (int): integer for reproducibility.
      
    Returns: 
        A random normalized formula.
        
    """

    if seed is not None:
        random.seed(seed)

    # Initialize a generator for propositional letters in ascending order
    letter_generator = generate_letter_sequence(num_letters)

    # Generate a random formula
    random_formula = generate_random_formula(max_depth, letter_generator, seed=seed)

    # Normalize the formula using the Normalizer class
    normalizer = Normalizer()
    normalized_formula = normalizer.normalize(random_formula)

    return normalized_formula

# Example: 
new_formula = generate_normalized_random_formula(max_depth=3, 
                                                 num_letters=2,
                                                 seed=32)
print(new_formula)

(A0 ∧ A1) ∨ A2 ∨ A0


In [11]:
# --- Checking if a formula is normalized --- 
def is_normalized(formula: Formula) -> bool:
    """
    Checks if a given formula is normalized.
    
    """
    encountered_letters = set()
    last_index = -1  # Initialize to an invalid index to check ascending order
                     # when the first letter is checked, its index will always be greater than -1

    def check_formula(f: Formula):
        nonlocal last_index
        if isinstance(f, Letter):
            # Check if the letter index is in ascending order
            if f.n in encountered_letters:
                return True  # Already seen, valid
            if f.n <= last_index:
                return False  # Not in ascending order
            encountered_letters.add(f.n)
            last_index = f.n  # Update last_index to current letter's index
            return True

        elif isinstance(f, Falsity):
            return True

        elif isinstance(f, UnaryConnectiveFormula):
            return check_formula(f.formula)

        elif isinstance(f, BinaryConnectiveFormula):
            return check_formula(f.left) and check_formula(f.right)

        else:
            raise Exception("Unknown formula type")

    return check_formula(formula)

# Example:
A0 = Letter(0)
A1 = Letter(1)
A2 = Letter(2)
A3 = Letter(3)
A4 = Letter(4)
falsity = Falsity()

formula1 = Disjunction(Letter(0), Letter(1))  # Normalized: A0 ∨ A1
formula2 = Disjunction(Letter(1), Letter(0))  # Not normalized: A1 ∨ A0
formula3 = Disjunction(Letter(0), Negation(Letter(0)))  # Normalized: A0 ∨ ¬A0

disjunction_A0_A1 = A0 | A1  # A0 ∨ A1
and_with_A3 = Conjunction(disjunction_A0_A1, A2)  # ((A0 ∨ A1) ∧ A2)
and_with_A0_A4 = Conjunction(A0, A3)  # A0 ∧ A3
formula4 = Implication(and_with_A3, and_with_A0_A4)  # ((A0 ∨ A1) ∧ A2) → (A0 ∧ A3)

# Print the final formula
print(f"Normalized status of ({formula1}) should be True and is: {is_normalized(formula1)}")  # True
print(f"Normalized status of ({formula2}) should be False and is: {is_normalized(formula2)}") # False
print(f"Normalized status of ({formula3}) should be True and is: {is_normalized(formula3)}")  # True
print(f"Normalized status of ({formula4}) should be True and is: {is_normalized(formula4)}")  # True

Normalized status of (A0 ∨ A1) should be True and is: True
Normalized status of (A1 ∨ A0) should be False and is: False
Normalized status of (A0 ∨ ¬A0) should be True and is: True
Normalized status of ((A0 ∨ A1) ∧ A2 → A0 ∧ A3) should be True and is: True


---
### **1.3 Generate Dataset**

In [12]:
# --- Normalized Dataset --- 
def generate_normalized_dataset(num_formulas: int, 
                                max_depth: int, 
                                num_letters: int) -> pd.DataFrame:
    """
    Generates a DataFrame containing random normalized formulas and their tautology status.

    Args: 
        num_formulas (int): Number of formulas to generate.
        max_depth (int): maximum depth of the generated formula's syntax tree.
        num_letters (int): Number of propositional letter that can be used.
    
    Returns: 
        A pandas DataFrame) that contains:
        - Random normalized logical formulas,
        - A Boolean label indicating whether each formula is a tautology.
    """
    data = []
    seen_formulas = set()

    while len(data) < num_formulas:
        formula = generate_normalized_random_formula(max_depth=max_depth, num_letters=num_letters)

        formula_str = str(formula)

        if formula_str in seen_formulas:
            continue

        seen_formulas.add(formula_str)

        tautology_status = is_tautology(formula)

        data.append({"formula": formula_str, "is_tautology": tautology_status})

    df = pd.DataFrame(data)
    return df

In [13]:
def save_normalized_dataset_to_csv(filename: str, 
                                   size: int, 
                                   max_depth: int = None, 
                                   num_letters: int = None, 
                                   seed: int = None):
    """
    Save the normalized dataset in a CSV file. 

    Args: 
        filename (str):
        size (str):
        max_depth (int):
        num_letters (int):
        seed (int):
        
    Returns: 
        A Dataset of random, normalized propositional logic formulas (with their tautology status),
        and saves it as a CSV file on disk under a specific filename.
    """

    if seed is not None:
        random.seed(seed)
    dataset = generate_normalized_dataset(num_formulas=size, 
                                          max_depth=max_depth, 
                                          num_letters=num_letters)
    dataset.to_csv(filename, index=False) # DataFrame’s index is not saved as a column
    print(f"[INFO] Saving Dataset to {os.path.abspath(filename)}")

In [14]:
SIZE = 10000
MAX_DEPTH = 5
NUM_LETTERS = 7

save_normalized_dataset_to_csv(filename='datasets/first_normalized_formulas_dataset.csv', 
                               size=SIZE, 
                               max_depth=MAX_DEPTH, 
                               num_letters=NUM_LETTERS, 
                               seed=42)

datapath = "datasets/first_normalized_formulas_dataset.csv"
data_set = pd.read_csv(datapath)

data_set.head(5)

[INFO] Saving Dataset to /home/labeconomia/nbalestra/theorem_prover/theorem_prover_core/ICTCS_notebooks/datasets/first_normalized_formulas_dataset.csv


Unnamed: 0,formula,is_tautology
0,A0,False
1,(A0 ∧ ¬(A1 ∧ A2 ∧ A3)) ∨ (A4 → A5),False
2,(¬(A0 → A1) → A2) ∨ A3,False
3,⊥ ∨ A0,False
4,¬A0,False


In [15]:
print(data_set.count())
count = data_set.is_tautology.value_counts()
print(f"\nNumber of True and False formulas: \n{count}\n")

total = len(data_set)
tautologies = data_set["is_tautology"].sum()
percentage = (tautologies / total) * 100
print(f"Percentage of tautologies in the dataset: {percentage:.2f}%")

formula         10000
is_tautology    10000
dtype: int64

Number of True and False formulas: 
is_tautology
False    9576
True      424
Name: count, dtype: int64

Percentage of tautologies in the dataset: 4.24%


In [16]:
# Checking if common tautologies are in the dataset

A0 = Letter(0)
A1 = Letter(1)
A2 = Letter(2)

excluded_middle = Disjunction(A0, Negation(A0))  # A0 ∨ ¬A0
not_contradiction = Negation(Conjunction(A0, Negation(A0)))  # ¬(A0 ∧ ¬A0)
de_morgan = Conjunction(
            Implication(Negation(Conjunction(A0, A1)), Disjunction(Negation(A0), Negation(A1))),
            Implication(Disjunction(Negation(A0), Negation(A1)), Negation(Conjunction(A0, A1)))    # ¬(A0 ∧ A1) ↔ (¬A0 ∨ ¬A1)
        )
distributivity = Conjunction(
                Implication(Conjunction(A0, Disjunction(A1, A2)), Disjunction(Conjunction(A0, A1), Conjunction(A0, A2))),
                Implication(Disjunction(Conjunction(A0, A1), Conjunction(A0, A2)), Conjunction(A0, Disjunction(A1, A2)))   # A0 ∨ (A1 ∧ A2) ↔ (A0 ∨ A1) ∧ (A0 ∨ A2)
        )


excluded_middle_str = str(excluded_middle)
not_contradiction_str = str(not_contradiction)
de_morgan_str = str(de_morgan)
distributivity_str = str(distributivity)


formulas_to_check = [excluded_middle_str, not_contradiction_str, de_morgan_str, distributivity_str]

for formula_str in formulas_to_check:
    exists_in_dataset = formula_str in data_set['formula'].values
    print(f"The formula '{formula_str}' is {'in' if exists_in_dataset else 'not in'} the dataset.")

The formula 'A0 ∨ ¬A0' is not in the dataset.
The formula '¬(A0 ∧ ¬A0)' is not in the dataset.
The formula '(¬(A0 ∧ A1) → ¬A0 ∨ ¬A1) ∧ (¬A0 ∨ ¬A1 → ¬(A0 ∧ A1))' is not in the dataset.
The formula '(A0 ∧ (A1 ∨ A2) → (A0 ∧ A1) ∨ (A0 ∧ A2)) ∧ ((A0 ∧ A1) ∨ (A0 ∧ A2) → A0 ∧ (A1 ∨ A2))' is not in the dataset.


---
#### **1.4 Data Augmentation With Common Tautologies Instantiation**

In [17]:
# Metavariable and Instantiation classes

Position = Literal["left", "right"]  
Associativity = Literal["left", "right"]  

@dataclass(frozen=True)
class Metavariable(Formula):
    """
    A class representing a metavariable in a formula.
    Metavariables will be replaced with actual formulas during instantiation.
    """
    __slots__ = ('name',)
    name: str

    def _make_str(self, outer_class: Union[Type[Formula], None], position: Union[Position, None]) -> str:
        return f'{self.name}'


class Instantiator:
    """
    This class handles the instantiation of metavariables within formulas.

    """
    def __init__(self, num_letters: int):
        self.num_letters = num_letters
        self.letter_generator = generate_letter_sequence(num_letters)
        self.normalizer = Normalizer()

    def instantiate(self, formula: Formula, metavariable_map: Dict[str, Formula]) -> Formula:
        """
        Recursively replaces metavariables in a formula with actual formulas from the map.

        """
        if isinstance(formula, Metavariable):
            if formula.name not in metavariable_map:
                raise Exception(f"Metavariable '{formula.name}' not found in the map.")
            return self.normalizer.normalize(metavariable_map[formula.name])

        elif isinstance(formula, Letter):
            return formula

        elif isinstance(formula, Falsity):
            return formula

        elif isinstance(formula, UnaryConnectiveFormula):
            instantiated_formula = self.instantiate(formula.formula, metavariable_map)
            return formula.__class__(instantiated_formula)

        elif isinstance(formula, BinaryConnectiveFormula):
            left_instantiated = self.instantiate(formula.left, metavariable_map)
            right_instantiated = self.instantiate(formula.right, metavariable_map)
            return formula.__class__(left_instantiated, right_instantiated)

        else:
            raise Exception("Unknown formula type")


# Example:
meta_A = Metavariable("A")
meta_B = Metavariable("B")
formula_with_metavariables = Conjunction(meta_A, meta_B)

num_letters = 6
max_depth = 3
instantiator = Instantiator(num_letters=num_letters)

form_1 = generate_normalized_random_formula(max_depth=max_depth, num_letters=num_letters, seed=43)
form_2 = generate_normalized_random_formula(max_depth=max_depth, num_letters=num_letters, seed=44)

metavariable_map = {
        "A": form_1,
        "B": form_2
        }

instantiated_formula = instantiator.instantiate(formula_with_metavariables, metavariable_map)


print(f"Original formula with metavariables: {formula_with_metavariables}\n")
print(f"Metavariable A will be instantiated with: {form_1} and"
      f"\nMetavariable B will be instantiated with: {form_2}\n")
print(f"Instantiated formula: {instantiated_formula}")

Original formula with metavariables: A ∧ B

Metavariable A will be instantiated with: A0 ∨ A1 and
Metavariable B will be instantiated with: (A0 ∨ A1 ∨ (A2 → A3)) ∧ ((A4 → A5) → A6 ∧ A0)

Instantiated formula: (A0 ∨ A1) ∧ (A0 ∨ A1 ∨ (A2 → A3)) ∧ ((A4 → A5) → A6 ∧ A0)


In [18]:
# --- Creating Common Tautologies --- 
A = Metavariable("A")
B = Metavariable("B")
C = Metavariable("C")

# List of common tautologies
tautologies = [

    Disjunction(A, Negation(A)),

    Negation(Conjunction(A, Negation(A))),

    Conjunction(
        Implication(Negation(Conjunction(A, B)), Disjunction(Negation(A), Negation(B))),
        Implication(Disjunction(Negation(A), Negation(B)), Negation(Conjunction(A, B)))
    ),

    Conjunction(
        Implication(Negation(Disjunction(A, B)), Conjunction(Negation(A), Negation(B))),
        Implication(Conjunction(Negation(A), Negation(B)), Negation(Disjunction(A, B)))
    ),

    Conjunction(
        Implication(Conjunction(A, Disjunction(B, C)), Disjunction(Conjunction(A, B), Conjunction(A, C))),
        Implication(Disjunction(Conjunction(A, B), Conjunction(A, C)), Conjunction(A, Disjunction(B, C)))
    ),

    Conjunction(
        Implication(Disjunction(A, Conjunction(B, C)), Conjunction(Disjunction(A, B), Disjunction(A, C))),
        Implication(Conjunction(Disjunction(A, B), Disjunction(A, C)), Disjunction(A, Conjunction(B, C)))
    )
]

for tautology in tautologies:
    print(tautology)

A ∨ ¬A
¬(A ∧ ¬A)
(¬(A ∧ B) → ¬A ∨ ¬B) ∧ (¬A ∨ ¬B → ¬(A ∧ B))
(¬(A ∨ B) → ¬A ∧ ¬B) ∧ (¬A ∧ ¬B → ¬(A ∨ B))
(A ∧ (B ∨ C) → (A ∧ B) ∨ (A ∧ C)) ∧ ((A ∧ B) ∨ (A ∧ C) → A ∧ (B ∨ C))
(A ∨ (B ∧ C) → (A ∨ B) ∧ (A ∨ C)) ∧ ((A ∨ B) ∧ (A ∨ C) → A ∨ (B ∧ C))


In [19]:
# --- Intantiating Common Tautologies --- 
def instantiate_random_formulas(num_samples: int, 
                                tautologies: List[Formula], 
                                seed: int = None) -> List[Formula]:
    """
    Samples and instantiates tautologies,
    re-sampling if necessary to reach the number of unique samples.

    Args: 
        num_samples (int): Number of tautologies to instantiate.
        tautologies: List of tautologies. 
        seed (int): Integer for reproducibility.
    
    Returns:
        A list of istantiated tautologies. 
    """
    if seed is not None:
        random.seed(seed)

    instantiated_tautologies = []
    seen_formulas = set()
    instantiator = Instantiator(num_letters=num_samples)

    attempts = 0  # Track the number of sampling attempts
    max_attempts = num_samples * 2  # Limit to avoid infinite loops

    # Continue generating until we have the required number of unique instantiations
    while len(instantiated_tautologies) < num_samples and attempts < max_attempts:
        random_formulas = [
            generate_normalized_random_formula(max_depth=MAX_DEPTH, 
                                               num_letters=NUM_LETTERS, 
                                               seed=(seed + attempts + i))
            for i in range(3)
        ]

        metavariable_map = {
            "A": random_formulas[0],
            "B": random_formulas[1],
            "C": random_formulas[2]
        }

        tautology = tautologies[attempts % len(tautologies)]  # Wrap around the tautologies list

        if isinstance(tautology, Formula):
            try:
                instantiated = instantiator.instantiate(tautology, metavariable_map)
                instantiated_str = str(instantiated)

                if instantiated_str not in seen_formulas:
                    instantiated_tautologies.append(instantiated)
                    seen_formulas.add(instantiated_str)

            except Exception as e:
                pass

        attempts += 1

    # Warning if the number of unique instantiations is less than required
    if len(instantiated_tautologies) < num_samples:
        print(f"Warning: Only {len(instantiated_tautologies)} unique formulas generated out of {num_samples} requested.")

    return instantiated_tautologies

In [20]:
seed_value = 42
num_samples = 3000 # (30% of the dataset formulas)

instantiated_tautologies = instantiate_random_formulas(num_samples, tautologies, seed_value)

print(f"Number of total instantiated tautologies: {len(instantiated_tautologies)}")

for i, instantiated in enumerate(instantiated_tautologies):
    if i % 200 == 0:
        print(f"Instantiated Tautology {i}: {instantiated}")

Number of total instantiated tautologies: 3000
Instantiated Tautology 0: A0 ∨ ¬A0
Instantiated Tautology 200: (¬A0 ∨ (((A0 → ¬A1) ∨ ¬¬A2) ∧ (A0 → ¬¬A1)) → (¬A0 ∨ (A0 → ¬A1) ∨ ¬¬A2) ∧ (¬A0 ∨ (A0 → ¬¬A1))) ∧ ((¬A0 ∨ (A0 → ¬A1) ∨ ¬¬A2) ∧ (¬A0 ∨ (A0 → ¬¬A1)) → ¬A0 ∨ (((A0 → ¬A1) ∨ ¬¬A2) ∧ (A0 → ¬¬A1)))
Instantiated Tautology 400: (A0 ∧ (⊥ ∨ A0) → (A0 ∧ ⊥) ∨ (A0 ∧ A0)) ∧ ((A0 ∧ ⊥) ∨ (A0 ∧ A0) → A0 ∧ (⊥ ∨ A0))
Instantiated Tautology 600: ¬((¬(¬A0 ∨ A1 ∨ ¬A2) ∨ ¬(¬A3 ∧ ¬¬A4)) ∧ ¬(¬(¬A0 ∨ A1 ∨ ¬A2) ∨ ¬(¬A3 ∧ ¬¬A4)))
Instantiated Tautology 800: (¬(A0 ∧ A1) ∧ (A0 ∨ A0) → (¬(A0 ∧ A1) ∧ A0) ∨ (¬(A0 ∧ A1) ∧ A0)) ∧ ((¬(A0 ∧ A1) ∧ A0) ∨ (¬(A0 ∧ A1) ∧ A0) → ¬(A0 ∧ A1) ∧ (A0 ∨ A0))
Instantiated Tautology 1000: ¬((A0 ∨ A1) ∧ A2 ∧ ¬((A0 ∨ A1) ∧ A2))
Instantiated Tautology 1200: ¬((⊥ → A0 ∧ A1) ∧ ¬(⊥ → A0 ∧ A1))
Instantiated Tautology 1400: (¬((¬A0 → A1) ∨ A0) → ¬(¬A0 → A1) ∧ ¬A0) ∧ (¬(¬A0 → A1) ∧ ¬A0 → ¬((¬A0 → A1) ∨ A0))
Instantiated Tautology 1600: (¬((A0 → ⊥) ∨ ¬A0) → ¬(A0 → ⊥) ∧ ¬¬A0) ∧ (¬(A0 → ⊥) ∧ 

In [21]:
# --- Check if istantated tautologies are unique ---
# Occurrences of each instantiated tautology
tautology_counter = Counter(instantiated_tautologies)

duplicates = {tautology: count for tautology, count in tautology_counter.items() if count > 1}
print(f"Duplicate instantiated tautologies: {duplicates}")

# Number of distinct tautologies
distinct_tautologies = set(instantiated_tautologies)
num_distinct_tautologies = len(distinct_tautologies)
print(f"Number of distinct instantiated tautologies: {num_distinct_tautologies}")

Duplicate instantiated tautologies: {}
Number of distinct instantiated tautologies: 3000


In [22]:
# --- Adding 3,000 (30% of dataaset) new tatologies to the dataset ---

def add_new_tautologies_to_dataset(dataset: pd.DataFrame, 
                                   tautologies: List[Formula],
                                   num_samples: int,
                                   seed: int = None) -> pd.DataFrame:
    """
    Add a fixed number of unique tautologies to the dataset.

    Args: 
        dataset (pd.DataFrame): Existing dataset.
        tautologies (List[Formula]): Template tautologies.
        num_samples (int): Number of new tautologies to add.
        seed (int): Random seed for reproducibility.

    Returns: 
        Updated DataFrame with added tautologies.
    """
    existing_formulas = set(dataset['formula'].tolist())
    new_data = []
    attempts = 0
    batch_size = 500  # generate in chunks
    seed_base = seed if seed is not None else random.randint(0, 10000)

    while len(new_data) < num_samples and attempts < num_samples * 5:
        # Instantiate new formulas in batches
        fresh = instantiate_random_formulas(batch_size, tautologies, seed_base + attempts)

        for formula in fresh:
            formula_str = str(formula)
            if formula_str not in existing_formulas:
                new_data.append({
                    'formula': formula_str,
                    'is_tautology': True
                })
                existing_formulas.add(formula_str)
                if len(new_data) >= num_samples:
                    break
        attempts += 1

    if len(new_data) < num_samples:
        print(f"[Warning] Only {len(new_data)} unique tautologies added out of {num_samples} requested.")

    
    # Concatenating and shuffling dataset
    new_df = pd.DataFrame(new_data)
    updated_dataset = pd.concat([dataset, new_df], ignore_index=True)
    return updated_dataset.sample(frac=1, random_state=seed).reset_index(drop=True) # frac=1 means shuffle all rows
                                                                                    # reset_index(drop=True) removes the old index


num_samples = 3000
seed_value = 42

dataset = add_new_tautologies_to_dataset(dataset=data_set,
                                         tautologies=tautologies,
                                         num_samples=num_samples,
                                         seed=seed_value)
dataset.to_csv('datasets/first_extended_dataset.cvs', index=False)
print(f"[INFO] Saving Dataset to {os.path.abspath('datasets/first_extended_dataset.csv')}")
dataset[:5]

[INFO] Saving Dataset to /home/labeconomia/nbalestra/theorem_prover/theorem_prover_core/ICTCS_notebooks/datasets/first_extended_dataset.csv


Unnamed: 0,formula,is_tautology
0,¬⊥ ∨ ¬A0,True
1,(A0 → A1) ∨ ⊥ → ¬A2,False
2,A0 ∨ A1 ∨ ¬(⊥ → A2) ∨ A3 ∨ A4,False
3,((A0 ∨ A1 → A2) ∧ ((⊥ → A3) ∨ (A4 → A5))) ∨ (A...,True
4,A0 ∧ A1 ∧ (A2 ∨ A3 ∨ A4 ∨ A5) ∧ ¬¬A6 ∧ (¬A7 ∨ ...,False


In [23]:
# --- Get Dataset Info ---
print(dataset.count())
count = dataset.is_tautology.value_counts()
print(f"\nNumber of True and False formulas: \n{count}\n")

total = len(dataset)
tautologies = dataset["is_tautology"].sum()
percentage = (tautologies / total) * 100
print(f"Percentage of tautologies in the dataset: {percentage:.2f}%")


formula         13000
is_tautology    13000
dtype: int64

Number of True and False formulas: 
is_tautology
False    9576
True     3424
Name: count, dtype: int64

Percentage of tautologies in the dataset: 26.34%


--- 
#### **1.5 Preparing the Data**

In [24]:
# --- Parser Module ---
# Parses a string representation of a formula

# Operator symbol to class map
OPERATOR_CLASSES = {
    '¬': Negation,
    '∧': Conjunction,
    '∨': Disjunction,
    #'⊻': ExclusiveDisjunction,
    '→': Implication
}

def tokenize(formula: str) -> List[str]:
    """
    Splits a logical formula string into a list of tokens.

    Supported tokens:
        - Propositional letters: A0, A1, ...
        - Connectives: ¬, ∧, ∨, ⊻, →
        - Falsity: ⊥
        - Parentheses: (,)

    Args:
        formula (str): A logical formula in string format.

    Returns:
        List[str]: A list of string tokens (e.g. ['¬', '(', 'A0', '∧', 'A1', ')'])

    Raises:
        ValueError: If the formula contains an unrecognized character.  
    """
    tokens = []
    i = 0
    # Iterate over each character
    while i < len(formula):
        c = formula[i]
        # Skip whitespace
        if c.isspace():
            i += 1
        elif c in '()¬∧∨⊻':
            tokens.append(c)
            i += 1
        elif formula[i:i+1] == '→':  
            tokens.append('→')
            i += 1
        elif c == 'A':
            j = i + 1
            while j < len(formula) and formula[j].isdigit():
                j += 1
            tokens.append(formula[i:j])
            i = j
        elif c == '⊥':
            tokens.append('⊥')
            i += 1
        else:
            raise ValueError(f"Unexpected character: {c}")
    return tokens


def parse_formula_string(formula_string: str) -> Formula:
    """
    Parses a string representation of a propositional logic formula into a
    structured Formula object, respecting operator precedence and associativity.

    Precedence and associativity are extracted directly from the formula classes.

    Args:
        formula_string (str): The input logical formula in string form.

    Returns:
        Formula: The parsed Formula object (e.g., Conjunction, Implication, etc.).

    Raises:
        ValueError: If the formula contains invalid or unexpected tokens.
    """
    tokens = tokenize(formula_string)

    def parse_expr(tokens: List[str], min_prec: int = 0) -> Formula:
        """
        Recursively parses an expression using a precedence climbing strategy.

        Args:
            tokens (List[str]): List of tokens.
            min_prec (int): Minimum precedence required to continue parsing.

        Returns:
            Formula: A Formula object representing the parsed structure.

        Raises:
            ValueError: For invalid token sequences or unexpected syntax.
        """
        if not tokens:
            raise ValueError("Empty expression")
        
        # Pick the first element of the list
        token = tokens.pop(0)

        # Base cases: atomic formulas
        if token == '(':
            # Handle parentheses by recursing on the subexpression
            sub_tokens = []
            depth = 1
            while tokens:
                t = tokens.pop(0)
                if t == '(':
                    depth += 1
                elif t == ')':
                    depth -= 1
                    if depth == 0:
                        break
                sub_tokens.append(t)
            node = parse_expr(sub_tokens)

        elif token == '⊥':
            node = Falsity()

        elif token.startswith('A') and token[1:].isdigit():
            node = Letter(int(token[1:]))

        elif token == '¬':
            # Unary operator
            cls = Negation
            right = parse_expr(tokens, cls.priority)
            node = cls(right)

        else:
            raise ValueError(f"Unexpected token: {token}")

        # After parsing an atomic or unary expression, handle binary connectives
        while tokens and tokens[0] in OPERATOR_CLASSES:
            op = tokens[0]
            cls = OPERATOR_CLASSES[op]
            prec = cls.priority
            assoc = cls.associativity

            if prec < min_prec:
                break

            tokens.pop(0)  # consume the operator
            
            # Recursively parse the right-hand expression, and combine it with the 
            # left node into a full binary Formula
            # Adjust min_prec depending on associativity
            next_min_prec = prec + 1 if assoc == 'left' else prec

            right = parse_expr(tokens, next_min_prec)
            node = cls(node, right)

        return node

    return parse_expr(tokens)


# Example 1: 
f1 = parse_formula_string("¬A0 ∨ A1")
f2 = parse_formula_string("¬(A0 ∨ A1)")

print(f1)  # (¬A0 ∨ A1)
print(f2)  # ¬(A0 ∨ A1)
print(f"{f1} == {f2} shoud be false and is: {f1 == f2}\n")  # False — correct

# Example 2:
list_of_str = [
    "¬(A0 ∧ A1) → (¬A0 ∨ ¬A1)",
    "¬(A0 ∨ A1) → (¬A0 ∧ ¬A1)",
    "(A0 ∧ (A1 ∨ A2)) → ((A0 ∧ A1) ∨ (A0 ∧ A2))"
]

for formula_str in list_of_str:
    parsed_formulas = parse_formula_string(formula_str)
    print(parsed_formulas)

¬A0 ∨ A1
¬(A0 ∨ A1)
¬A0 ∨ A1 == ¬(A0 ∨ A1) shoud be false and is: False

¬(A0 ∧ A1) → ¬A0 ∨ ¬A1
¬(A0 ∨ A1) → ¬A0 ∧ ¬A1
A0 ∧ (A1 ∨ A2) → (A0 ∧ A1) ∨ (A0 ∧ A2)


In [25]:
# --- Formulas and Truth Values lists ---
Formulas = dataset['formula'].tolist()
Truth_values = dataset['is_tautology'].tolist()

len(Formulas), len(Truth_values)

(13000, 13000)

In [26]:
formula_set = set(Formulas)
len(formula_set)

13000

In [27]:
# --- Parse Dataset's formulas ---
parsed_formulas = [parse_formula_string(f) for f in Formulas]

print(f"Parsed second formula: {parsed_formulas[1]}")
print(f"Total parsed formulas: {len(parsed_formulas)}")

distinct_formulas = set(parsed_formulas)
print(f"Number of distinct parsed formulas: {len(distinct_formulas)}")

Parsed second formula: (A0 → A1) ∨ ⊥ → ¬A2
Total parsed formulas: 13000
Number of distinct parsed formulas: 13000


In [28]:
# Data splitting: 80% of the data is reserved for training the model. 
# Test Sets: 20% of the data is used for testing the model.

X_train, X_test, y_train, y_test = train_test_split(parsed_formulas, 
                                                    Truth_values, 
                                                    test_size=0.2, 
                                                    random_state=42)

print(f"Training set: {len(X_train)} samples")
print(f"Test set: {len(X_test)} samples")

Training set: 10400 samples
Test set: 2600 samples


In [29]:
# --- Convert symbols in numbers ---
class CustomTokenizer:
    """
    Custom tokenizer class for logical formulas.
    This class converts formulas into tokenized integer representations,
    and supports detokenizing back into Formula objects.
    """

    def __init__(self):
        self.token_to_formula: Dict[int, Formula] = {}
        self.formula_to_token: Dict[Formula, int] = {}

        self.connective_map = {
            'Conjunction': 100,
            'Disjunction': 101,
            'Negation': 102,
            'Implication': 103,
            #'Exclusive Disjunction': 104
        }

        self.special_map = {
            '(': 106,
            ')': 107
        }

        self.falsity_token = 105

    def fit(self, formulas: List[Formula]):
        """
        Fit the tokenizer on a list of formulas, deriving tokens for each formula.
        """
        for formula in formulas:
            self._derive_tokens(formula)

    def _derive_tokens(self, formula: Formula):
        """
        Recursively derive tokens for all subcomponents of a formula.
        """
        if isinstance(formula, Falsity):
            if formula not in self.formula_to_token:
                self.formula_to_token[formula] = self.falsity_token
                self.token_to_formula[self.falsity_token] = formula

        elif isinstance(formula, Letter):
            letter_index = formula.n
            if formula not in self.formula_to_token:
                token = letter_index + 1  # Start letters from 1
                self.formula_to_token[formula] = token
                self.token_to_formula[token] = formula

        elif isinstance(formula, UnaryConnectiveFormula):
            self._derive_tokens(formula.formula)

        elif isinstance(formula, BinaryConnectiveFormula):
            self._derive_tokens(formula.left)
            self._derive_tokens(formula.right)

    def tokenize(self, formula: Formula) -> List[int]:
        """
        Convert a formula into a list of integer tokens.
        """
        tokens = []
        self._tokenize_helper(formula, tokens)
        return tokens

    def _tokenize_helper(self, formula: Formula, tokens: List[int]):
        """
        Helper method for recursive token generation.
        """
        if formula in self.formula_to_token:
            tokens.append(self.formula_to_token[formula])
            return

        if isinstance(formula, BinaryConnectiveFormula):
            tokens.append(self.special_map['('])
            self._tokenize_helper(formula.left, tokens)
            tokens.append(self.connective_map[type(formula).__name__])
            self._tokenize_helper(formula.right, tokens)
            tokens.append(self.special_map[')'])

        elif isinstance(formula, UnaryConnectiveFormula):
            tokens.append(self.special_map['('])
            tokens.append(self.connective_map[type(formula).__name__])
            tokens.append(self.special_map['('])
            self._tokenize_helper(formula.formula, tokens)
            tokens.append(self.special_map[')'])
            tokens.append(self.special_map[')'])

        elif isinstance(formula, Falsity):
            tokens.append(self.falsity_token)

        elif isinstance(formula, Letter):
            tokens.append(self.formula_to_token[formula])

    def detokenize(self, tokens: List[int]) -> Formula:
        """
        Convert a list of tokens (possibly padded) back into a Formula object.
        """
        # Remove trailing padding
        tokens = [t for t in tokens if t != 0]

        def parse_expr(pos: int) -> Tuple[Formula, int]:
            token = tokens[pos]

            if token == self.falsity_token:
                return Falsity(), pos + 1

            elif token in self.token_to_formula:
                return self.token_to_formula[token], pos + 1

            elif token == self.special_map['(']:
                next_token = tokens[pos + 1]

                # Handle unary connective
                if next_token in self.connective_map.values() and tokens[pos + 2] == self.special_map['(']:
                    op_token = next_token
                    inner_formula, new_pos = parse_expr(pos + 3)
                    assert tokens[new_pos] == self.special_map[')']
                    assert tokens[new_pos + 1] == self.special_map[')']
                    connective_class = self._connective_class(op_token)
                    return connective_class(inner_formula), new_pos + 2

                # Handle binary connective
                else:
                    left_formula, pos_left = parse_expr(pos + 1)
                    op_token = tokens[pos_left]
                    right_formula, pos_right = parse_expr(pos_left + 1)
                    assert tokens[pos_right] == self.special_map[')']
                    connective_class = self._connective_class(op_token)
                    return connective_class(left_formula, right_formula), pos_right + 1

            raise ValueError(f"Unexpected token at position {pos}: {tokens[pos:]}")

        return parse_expr(0)[0]

    def _connective_class(self, token: int):
        """
        Resolve token ID back to the appropriate connective class.
        """
        for name, code in self.connective_map.items():
            if token == code:
                return {
                    'Conjunction': Conjunction,
                    'Disjunction': Disjunction,
                    'Negation': Negation,
                    'Implication': Implication,
                    #'Exclusive Disjunction': ExclusiveDisjunction
                }[name]
        raise ValueError(f"Unknown connective token: {token}")


In [30]:
# Inspect unique formula's token 
tokenizer = CustomTokenizer()
tokenizer.fit(parsed_formulas)

def print_token_mappings(tokenizer):
    for token, formula in tokenizer.token_to_formula.items():
        print(f"Token: {token}, Formula: {formula}")

print_token_mappings(tokenizer)

# Example
example_formula = Conjunction(Letter(0), Negation(Letter(1)))
tokens = tokenizer.tokenize(example_formula)

print(f"\nTokenized representation of ({example_formula}): {tokens}")

Token: 105, Formula: ⊥
Token: 1, Formula: A0
Token: 2, Formula: A1
Token: 3, Formula: A2
Token: 4, Formula: A3
Token: 5, Formula: A4
Token: 6, Formula: A5
Token: 7, Formula: A6
Token: 8, Formula: A7

Tokenized representation of (A0 ∧ ¬A1): [106, 1, 100, 106, 102, 106, 2, 107, 107, 107]


In [31]:
# --- Tokenize Train Test ---
tokenizer = CustomTokenizer()
tokenizer.fit(X_train)

In [32]:
# Test function
def test_tokenizer(tokenizer, example_formulas):
    for formula in example_formulas:
        tokens = tokenizer.tokenize(formula)
        print(f"Original Formula: {formula}")
        print(f"Tokenized Version: {tokens}\n")

if __name__ == "__main__":
    tokenizer = CustomTokenizer()
    example_formulas = [X_train[2], X_train[3], X_train[4]]
    tokenizer.fit(example_formulas)

    test_tokenizer(tokenizer, example_formulas)

Original Formula: (¬A0 ∨ A1) ∧ (A2 ∨ (¬A3 → A4))
Tokenized Version: [106, 106, 106, 102, 106, 1, 107, 107, 101, 2, 107, 100, 106, 3, 101, 106, 106, 102, 106, 4, 107, 107, 103, 5, 107, 107, 107]

Original Formula: (¬A0 → ¬(A1 ∧ A2 → A3 → A4)) → A5
Tokenized Version: [106, 106, 106, 102, 106, 1, 107, 107, 103, 106, 102, 106, 106, 106, 2, 100, 3, 107, 103, 106, 4, 103, 5, 107, 107, 107, 107, 107, 103, 6, 107]

Original Formula: ¬⊥ ∧ (A0 ∨ A1)
Tokenized Version: [106, 106, 102, 106, 105, 107, 107, 100, 106, 1, 101, 2, 107, 107]



In [33]:
tokenizer = CustomTokenizer()
tokenizer.fit(X_train)

num_letters = sum(1 for formula in tokenizer.formula_to_token if isinstance(formula, Letter))
num_connectives = len(tokenizer.connective_map)
num_parenthesis = sum(1 for formula in tokenizer.special_map)

print(f"Number of unique letters: {num_letters}")
print(f"Number of unique connectives: {num_connectives}")
print(f"Number of spacial tokens: {num_parenthesis}")

Number of unique letters: 8
Number of unique connectives: 4
Number of spacial tokens: 2


In [34]:
# --- Tokenize Train and Test formulas ---
tokenizer = CustomTokenizer()
tokenizer.fit(X_train)

X_train_seq = [tokenizer.tokenize(formula) for formula in X_train]
X_test_seq = [tokenizer.tokenize(formula) for formula in X_test]

In [35]:
# --- Convert lists of int into tensors ---
X_train_tensors = [torch.tensor(seq, dtype=torch.long) for seq in X_train_seq]
X_test_tensors = [torch.tensor(seq, dtype=torch.long) for seq in X_test_seq]

# --- Pad sequences (fill with 0s) ---
X_train_padded = pad_sequence(X_train_tensors, batch_first=True, padding_value=0) # With batch_first = True, Shape: (batch_size, seq_len, features)
X_test_padded = pad_sequence(X_test_tensors, batch_first=True, padding_value=0)

# --- Convert labels to tensors --- 
y_train_tensor = torch.tensor(y_train, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test, dtype=torch.float32)

In [36]:
print(f"X_train_padded shape: {X_train_padded.shape} -> [num_of_tokenized_formulas, num_of_tokens_per_formula])")
print(f"y_train_tensor shape: {y_train_tensor.shape} -> [num_of_labels]")

print(f"\nX_test_padded shape: {X_test_padded.shape} -> [num_of_tokenized_formulas, num_of_tokens_per_formula])")
print(f"y_test_tensor shape: {y_train_tensor.shape} -> [num_of_labels]")

X_train_padded shape: torch.Size([10400, 603]) -> [num_of_tokenized_formulas, num_of_tokens_per_formula])
y_train_tensor shape: torch.Size([10400]) -> [num_of_labels]

X_test_padded shape: torch.Size([2600, 545]) -> [num_of_tokenized_formulas, num_of_tokens_per_formula])
y_test_tensor shape: torch.Size([10400]) -> [num_of_labels]


---
#### **1.6 Getting a PyTorch Dataset**

In [37]:
# --- Create a PyTorch Dataset ---
class FormulaDataset(Dataset):
    def __init__(self, X, y):
        self.X = X
        self.y = y

        # Define class label info
        self.classes = ['non-tautology', 'tautology']
        self.class_to_idx = {label: i for i, label in enumerate(self.classes)}

    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]

    def __len__(self):
        return len(self.X)

    def __repr__(self):
        return (
            f"Dataset FormulaDataset\n"
            f"  Number of datapoints: {len(self)}\n"
            f"  Input shape: {self.X[0].shape if len(self.X) > 0 else 'N/A'}\n"
            f"  Target type: {self.y.dtype}\n"
        )


train_data = FormulaDataset(X_train_padded, y_train_tensor)
test_data = FormulaDataset(X_test_padded, y_test_tensor)

print(f"{train_data} \n{test_data}")

Dataset FormulaDataset
  Number of datapoints: 10400
  Input shape: torch.Size([603])
  Target type: torch.float32
 
Dataset FormulaDataset
  Number of datapoints: 2600
  Input shape: torch.Size([545])
  Target type: torch.float32



In [38]:
import collections

train_class_counts = collections.Counter(y_train)
test_class_counts = collections.Counter(y_test)

print(f"Train class counts: {train_class_counts}")
print(f"Test class counts: {test_class_counts}")

Train class counts: Counter({False: 7662, True: 2738})
Test class counts: Counter({False: 1914, True: 686})


In [39]:
print(f"Train set (47998 samples): False = {(3671/4799)*100:.2f} % and True = {(1128/4799)* 100:.2f} %")
print(f"Test set (240samples): False = {(924/1200)*100:.2f}% and True = {(276/1200)*100:.2f}%")

Train set (47998 samples): False = 76.50 % and True = 23.50 %
Test set (240samples): False = 77.00% and True = 23.00%


In [40]:
# --- Convert lists of int into tensors ---
X_train_tensors = [torch.tensor(seq, dtype=torch.long) for seq in X_train_seq]
X_test_tensors = [torch.tensor(seq, dtype=torch.long) for seq in X_test_seq]

# --- Pad sequences (fill with 0s) ---
X_train_padded = pad_sequence(X_train_tensors, batch_first=True, padding_value=0) # With batch_first = True, Shape: (batch_size, seq_len, features)
X_test_padded = pad_sequence(X_test_tensors, batch_first=True, padding_value=0)

# --- Convert labels to tensors --- 
y_train_tensor = torch.tensor(y_train, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test, dtype=torch.float32)

In [41]:
# Classes Names and Labels Map
class_names = train_data.classes
class_to_idx = train_data.class_to_idx
print(class_names)
print(class_to_idx)

['non-tautology', 'tautology']
{'non-tautology': 0, 'tautology': 1}


In [42]:
# Visualizing a random sampled formula
torch.manual_seed(42)
random_idx = torch.randint(0, len(train_data), size=[1]).item()

random_formula, label = train_data[random_idx]
print(f"Random formula: {random_formula}, \n\nLabel: {label}")

# Convert tensor to list of token IDs (needed by detokenize)
token_list = random_formula.tolist()
# Reconstruct the original Formula
reconstructed_formula = tokenizer.detokenize(token_list)
print(f"Reconstructed formula: {reconstructed_formula}")
print(f"\nTautology status: {class_names[int(label.item())]}")

Random formula: tensor([106, 106,   1, 100, 106, 106, 106,   2, 103,   3, 107, 101, 106, 102,
        106,   4, 107, 107, 107, 103, 106,   5, 101, 106, 102, 106,   6, 107,
        107, 107, 107, 107, 103,   7, 107,   0,   0,   0,   0,   0,   0,   0,
          0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          0,   0,   0,   0,   0,   0,   0,   0, 

----
#### **1.7 DataLoader**

In [None]:
# Setup the batch size hyperparameter
BATCH_SIZE = 16

# Turn datasets into iterables (batches)
train_dataloader = DataLoader(train_data, # dataset to turn into iterable
                              batch_size=BATCH_SIZE, # how many samples per batch? 
                              shuffle=True # shuffle data every epoch?
                                           # This removes the data order, so the model does not learn it 
)

test_dataloader = DataLoader(test_data,
                             batch_size=BATCH_SIZE,
                             shuffle=False # don't necessarily have to shuffle the testing data
)

# Let's check out what we've created
print(f"Dataloaders: {train_dataloader, test_dataloader}") 
print(f"Length of train dataloader: {len(train_dataloader)} batches of {BATCH_SIZE}")
print(f"Length of test dataloader: {len(test_dataloader)} batches of {BATCH_SIZE}")

In [44]:
# Check out what's inside the training dataloader
train_features_batch, train_labels_batch = next(iter(train_dataloader)) # next() grabs the first batch from the iterator
print(f"{train_features_batch.shape, train_labels_batch.shape} -> [batch_size, num_of_tokens_per_formula], [bach_size]")

(torch.Size([16, 603]), torch.Size([16])) -> [batch_size, num_of_tokens_per_formula], [bach_size]


---
#### **1.8 Set up device agnostic-code**

In [45]:
device = "cuda" if torch.cuda.is_available() else "cpu"
device

'cuda'

--- 
#### **1.9 Reproducibility**

In [46]:
def set_seeds(seed: int=42):
    """Sets random sets for torch operations.

    Args:
        seed (int, optional): Random seed to set. Defaults to 42.
    """
    # Set the seed for general torch operations
    torch.manual_seed(seed)
    # Set the seed for CUDA torch operations (ones that happen on the GPU)
    #torch.mps.manual_seed(seed)
    torch.cuda.manual_seed(seed)

--- 
#### **1.10 Create `train_step()` and `test_step()` functions, and `test()` function to combne them**.

In [47]:
# --- Functions for training and testing a PyTorch model ---

def train_step(model: torch.nn.Module, 
               dataloader: torch.utils.data.DataLoader, 
               loss_fn: torch.nn.Module, 
               optimizer: torch.optim.Optimizer,
               device: torch.device) -> Tuple[float, float]:
  """Trains a PyTorch model for a single epoch.

  Turns a target PyTorch model to training mode and then
  runs through all of the required training steps (forward
  pass, loss calculation, optimizer step).

  Args:
    model: A PyTorch model to be trained.
    dataloader: A DataLoader instance for the model to be trained on.
    loss_fn: A PyTorch loss function to minimize.
    optimizer: A PyTorch optimizer to help minimize the loss function.
    device: A target device to compute on.

  Returns:
    A tuple of training loss and training accuracy metrics.
    In the form (train_loss, train_accuracy).
    
  """
  # Put model in train mode
  model.train()
  
  # Setup train loss and train accuracy values
  train_loss, train_acc = 0, 0
  
  # Loop through data loader data batches
  for batch, (X, y) in enumerate(dataloader):
      # Send data to target device
      X, y = X.to(device), y.to(device)

      # 1. Forward pass
      y_logits = model(X).squeeze(dim=1)
      y_preds = torch.round(torch.sigmoid(y_logits))

      # 2. Calculate  and accumulate loss
      loss = loss_fn(y_logits, y)
      train_loss += loss.item() 

      # 3. Optimizer zero grad
      optimizer.zero_grad()

      # 4. Loss backward
      loss.backward()

      # 5. Optimizer step
      optimizer.step()

      # Calculate and accumulate accuracy metric across all batches
      train_acc += (y_preds == y).sum().item()/len(y_preds)

  # Adjust metrics to get average loss and accuracy per batch 
  train_loss = train_loss / len(dataloader)
  train_acc = train_acc / len(dataloader)
  return train_loss, train_acc

def test_step(model: torch.nn.Module, 
              dataloader: torch.utils.data.DataLoader, 
              loss_fn: torch.nn.Module,
              device: torch.device) -> Tuple[float, float]:
  """Tests a PyTorch model for a single epoch.

  Turns a target PyTorch model to "eval" mode and then performs
  a forward pass on a testing dataset.

  Args:
    model: A PyTorch model to be tested.
    dataloader: A DataLoader instance for the model to be tested on.
    loss_fn: A PyTorch loss function to calculate loss on the test data.
    device: A target device to compute on.

  Returns:
    A tuple of testing loss and testing accuracy metrics.
    In the form (test_loss, test_accuracy). 
    
  """
  # Put model in eval mode
  model.eval() 
  
  # Setup test loss and test accuracy values
  test_loss, test_acc = 0, 0
  
  # Turn on inference context manager
  with torch.inference_mode():
      # Loop through DataLoader batches
      for batch, (X, y) in enumerate(dataloader):
          # Send data to target device
          X, y = X.to(device), y.to(device)
  
          # 1. Forward pass
          test_pred_logits = model(X).squeeze(dim=1)
          preds = torch.round(torch.sigmoid(test_pred_logits))

          # 2. Calculate and accumulate loss
          loss = loss_fn(test_pred_logits, y)
          test_loss += loss.item()
          
          # Calculate and accumulate accuracy
          test_pred_labels = preds
          test_acc += ((test_pred_labels == y).sum().item()/len(test_pred_labels))
          
  # Adjust metrics to get average loss and accuracy per batch 
  test_loss = test_loss / len(dataloader)
  test_acc = test_acc / len(dataloader)
  return test_loss, test_acc

def train(model: torch.nn.Module, 
          train_dataloader: torch.utils.data.DataLoader, 
          test_dataloader: torch.utils.data.DataLoader, 
          optimizer: torch.optim.Optimizer,
          loss_fn: torch.nn.Module,
          epochs: int,
          device: torch.device) -> Dict[str, List]:
  """Trains and tests a PyTorch model.

  Passes a target PyTorch models through train_step() and test_step()
  functions for a number of epochs, training and testing the model
  in the same epoch loop.

  Calculates, prints and stores evaluation metrics throughout.

  Args:
    model: A PyTorch model to be trained and tested.
    train_dataloader: A DataLoader instance for the model to be trained on.
    test_dataloader: A DataLoader instance for the model to be tested on.
    optimizer: A PyTorch optimizer to help minimize the loss function.
    loss_fn: A PyTorch loss function to calculate loss on both datasets.
    epochs: An integer indicating how many epochs to train for.
    device: A target device to compute on.

  Returns:
    A dictionary of training and testing loss as well as training and
    testing accuracy metrics. Each metric has a value in a list for 
    each epoch.
    In the form: {train_loss: [...],
                  train_acc: [...],
                  test_loss: [...],
                  test_acc: [...]} 

  """
  # Create empty results dictionary
  results = {"train_loss": [],
             "train_acc": [],
             "test_loss": [],
             "test_acc": []
  }
  
  # Loop through training and testing steps for a number of epochs
  for epoch in tqdm(range(epochs)):
      train_loss, train_acc = train_step(model=model,
                                          dataloader=train_dataloader,
                                          loss_fn=loss_fn,
                                          optimizer=optimizer,
                                          device=device)
      test_loss, test_acc = test_step(model=model,
          dataloader=test_dataloader,
          loss_fn=loss_fn,
          device=device)
      
      # Print out what's happening
      print(
          f"Epoch: {epoch+1} | "
          f"train_loss: {train_loss:.4f} | "
          f"train_acc: {train_acc:.4f} | "
          f"test_loss: {test_loss:.4f} | "
          f"test_acc: {test_acc:.4f}"
      )

      # Update results dictionary
      results["train_loss"].append(train_loss)
      results["train_acc"].append(train_acc)
      results["test_loss"].append(test_loss)
      results["test_acc"].append(test_acc)

  # Return the filled results at the end of the epochs
  return results

---
#### **1.11 Function to save a model**

In [48]:
def save_model(model: torch.nn.Module,
               target_dir: str,
               model_name: str):
  """Saves a PyTorch model to a target directory.

  Args:
    model: A target PyTorch model to save.
    target_dir: A directory for saving the model to.
    model_name: A filename for the saved model. Should include
      either ".pth" or ".pt" as the file extension.
  
  Example usage:
    save_model(model=model_0,
               target_dir="models",
               model_name="05_going_modular_tingvgg_model.pth")
  """
  # Create target directory
  target_dir_path = Path(target_dir)
  target_dir_path.mkdir(parents=True,
                        exist_ok=True)
  
  # Create model save path
  assert model_name.endswith(".pth") or model_name.endswith(".pt"), "model_name should end with '.pt' or '.pth'"
  model_save_path = target_dir_path / model_name

  # Save the model state_dict()
  print(f"[INFO] Saving model to: {model_save_path}")
  torch.save(obj=model.state_dict(),
             f=model_save_path)

---
#### **1.12 Hyperparameters**

In [49]:
# --- Hyperparameters --- 
# Determine the vocabulary size for the embedding layer and add 1 for padding index (0)
all_token_indices = (
    list(tokenizer.formula_to_token.values()) +
    list(tokenizer.connective_map.values()) +
    list(tokenizer.special_map.values()) +
    [tokenizer.falsity_token]
)

VOCAB_SIZE = max(all_token_indices) + 1  
print(f"Vocabulary size (including padding token): {VOCAB_SIZE}")
print("Max index in batch:", train_features_batch.max().item())
assert train_features_batch.max().item() < VOCAB_SIZE, "Some token indices exceed the embedding size!"

EMBEDDING_DIM = 32
LR = 0.001

Vocabulary size (including padding token): 108
Max index in batch: 107


---
#### **Build Model 1**

In [50]:
# --- Embedding layer ---
# This layer converts each token (represented as an integer ID) into a dense, trainable vector.
# Using an embedding is essential because raw integer token IDs have no semantic meaning to the model.
# The embedding allows the model to learn useful representations of logical symbols (e.g., A0, ∧, ¬, etc.)
# based on how they are used in formulas — similar symbols can develop similar vector representations.
# Without this layer, feeding raw integers into the model would mislead it into thinking larger token IDs are "more" important,
# which is not true for symbolic data.

embedding = nn.Embedding(num_embeddings=VOCAB_SIZE, embedding_dim=EMBEDDING_DIM)

x = train_features_batch
output_1 = embedding(x)
print(f"First batch of formula sequences shape: {train_features_batch.shape} -> [batch_size, seq_len]")
print(f"Embedding Layer output shape: {output_1.shape} -> [batch_size, seq_len, embed_dim]")

First batch of formula sequences shape: torch.Size([16, 603]) -> [batch_size, seq_len]
Embedding Layer output shape: torch.Size([16, 603, 32]) -> [batch_size, seq_len, embed_dim]


In [51]:
# --- Hyperparameters --- 
# Determine the vocabulary size for the embedding layer and add 1 for padding index (0)
all_token_indices = (
    list(tokenizer.formula_to_token.values()) +
    list(tokenizer.connective_map.values()) +
    list(tokenizer.special_map.values()) +
    [tokenizer.falsity_token]
)

VOCAB_SIZE = max(all_token_indices) + 1  
print(f"Vocabulary size (including padding token): {VOCAB_SIZE}")
print("Max index in batch:", train_features_batch.max().item())
assert train_features_batch.max().item() < VOCAB_SIZE, "Some token indices exceed the embedding size!"

EMBEDDING_DIM = 32
HIDDEN_DIM = 10
LR = 0.001

Vocabulary size (including padding token): 108
Max index in batch: 107


In [52]:
# --- Simple RNN layer ---
# - input_size=32: Each input vector (e.g., from an embedding) has 32 features
# - hidden_size=10: The RNN will maintain a hidden state with 10 dimensions
# - batch_first=True: Input and output tensors will have shape (batch, seq, feature)
rnn = nn.RNN(input_size=32, hidden_size=10, batch_first=True)

# Pass a sequence of embedded inputs to the RNN
# output_1 should have shape (batch_size, sequence_length, input_size=32)
# The RNN will process this input sequence and return:
#   - hidden_states: The hidden state at each time step -> shape: (batch, seq_len, hidden_size)
#   - h_n: The final hidden state for the last time step -> shape (num_layers, batch, hidden_size)
output_2 = rnn(output_1)
hidden_states, h_n = output_2
print(f"rnn output shape: {hidden_states.shape} -> [batch_size, seq_len, hidden_size]")
print(f"h_n shape: {h_n.shape} -> [num_layers, batch_size, hidden_size]")

rnn output shape: torch.Size([16, 603, 10]) -> [batch_size, seq_len, hidden_size]
h_n shape: torch.Size([1, 16, 10]) -> [num_layers, batch_size, hidden_size]


In [53]:
# --- Linear layer  ---
# This layer maps from the hidden state (size 10) to a single output (e.g., binary classification).
# in_features=10: the dimension of the RNN's final hidden state.
# out_features=1: we want a single output (e.g., a logit for binary classification).
linear = nn.Linear(in_features=10, out_features=1)

# h_n has shape (num_layers, batch_size, hidden_size)
# Since we’re using 1 RNN layer, h_n.shape == (1, batch_size, 10)
# We remove the first dimension using squeeze(0) -> (batch_size, 10)
# This gives us one 10-dimensional vector per sequence in the batch.
output_3 = linear(h_n.squeeze(0))

# The result is a tensor of shape (batch_size, 1)
# Each value is a prediction logit for the corresponding sequence
print(f"Linear layer output shape: {output_3.shape} -> [batch_size, 1]")

Linear layer output shape: torch.Size([16, 1]) -> [batch_size, 1]


In [54]:
class RNN_V1(nn.Module):
    def __init__(self, vocab_size :int, embedding_dim :int, hidden_units: int, output_size :int):
        super().__init__()
        self.embedding = nn.Embedding(num_embeddings=vocab_size, embedding_dim=embedding_dim)
        self.rnn = nn.RNN(input_size=embedding_dim, hidden_size=hidden_units, batch_first=True)
        self.linear = nn.Linear(in_features=hidden_units, out_features=output_size)
    
    def forward(self, x: torch.Tensor):
        x = self.embedding(x)               # [batch_size, seq_len, embed_dim]
        _, h_n = self.rnn(x)                # [num_layers, batch_size, hidden_dim]
        last_hidden = h_n.squeeze(0)        # remove the first dimension, which is num_layers=1 
        output = self.linear(last_hidden)   # [batch_size, output_size] == [32, 1]
        output = output                     # Reshape output to match label shape [32]
        return output

In [55]:
torch.manual_seed(42)
model_1 = RNN_V1(vocab_size=VOCAB_SIZE, 
                 embedding_dim=EMBEDDING_DIM,
                 hidden_units=HIDDEN_DIM,
                 output_size=1
).to(device) 

print(f"Model_1 is on the model device: {next(model_1.parameters()).device}")
model_1

Model_1 is on the model device: cuda:0


RNN_V1(
  (embedding): Embedding(108, 32)
  (rnn): RNN(32, 10, batch_first=True)
  (linear): Linear(in_features=10, out_features=1, bias=True)
)

In [56]:
# Get a summary of Model_1 
summary(model_1, 
         input_size=train_features_batch.shape,
         dtypes=[torch.long],
         verbose=0,
         col_names=["input_size", "output_size", "num_params", "trainable"],
         col_width=20,
         row_settings=["var_names"],
         device=device
)

Layer (type (var_name))                  Input Shape          Output Shape         Param #              Trainable
RNN_V1 (RNN_V1)                          [16, 603]            [16, 1]              --                   True
├─Embedding (embedding)                  [16, 603]            [16, 603, 32]        3,456                True
├─RNN (rnn)                              [16, 603, 32]        [16, 603, 10]        440                  True
├─Linear (linear)                        [16, 10]             [16, 1]              11                   True
Total params: 3,907
Trainable params: 3,907
Non-trainable params: 0
Total mult-adds (M): 4.30
Input size (MB): 0.08
Forward/backward pass size (MB): 3.24
Params size (MB): 0.02
Estimated Total Size (MB): 3.33

In [57]:
# --- Loss and Optimizer Functions ---
loss_fn = nn.BCEWithLogitsLoss()
optimizer = torch.optim.Adam(params=model_1.parameters(), 
                            lr=LR)

In [58]:
# --- Train and Test Model_1 ---
set_seeds()
results = train(model=model_1,
                train_dataloader=train_dataloader,
                test_dataloader=test_dataloader,
                optimizer=optimizer,
                loss_fn=loss_fn,
                epochs=5,
                device=device)

  0%|          | 0/5 [00:00<?, ?it/s]

Epoch: 1 | train_loss: 0.5859 | train_acc: 0.7237 | test_loss: 0.5770 | test_acc: 0.7362
Epoch: 2 | train_loss: 0.5772 | train_acc: 0.7367 | test_loss: 0.5773 | test_acc: 0.7362
Epoch: 3 | train_loss: 0.5770 | train_acc: 0.7367 | test_loss: 0.5768 | test_acc: 0.7362
Epoch: 4 | train_loss: 0.5770 | train_acc: 0.7367 | test_loss: 0.5768 | test_acc: 0.7362
Epoch: 5 | train_loss: 0.5767 | train_acc: 0.7367 | test_loss: 0.5766 | test_acc: 0.7362


In [59]:
import collections

train_class_counts = collections.Counter(y_train)
test_class_counts = collections.Counter(y_test)

print(f"Train class counts: {train_class_counts}")
print(f"Test class counts: {test_class_counts}")

Train class counts: Counter({False: 7662, True: 2738})
Test class counts: Counter({False: 1914, True: 686})


In [60]:
#print(f"Train set (960 samples): False = {(3671/4799)*100:.2f} % and True = {(1128/4799)* 100:.2f} %")
#print(f"Test set (240samples): False = {(924/1200)*100:.2f}% and True = {(276/1200)*100:.2f}%")
print(f"\nGiven this distribution and Model performances, model_1 that opredicts False every time — and that would still be right 77% of the time.")


Given this distribution and Model performances, model_1 that opredicts False every time — and that would still be right 77% of the time.


---
#### **Buils Model 2**

In [61]:
# --- Asymmetric Focal Loss for Binary Classification ---

# Due to the class imbalance in our dataset (~26% tautologies), the model may bias toward 
# predicting the majority class (non-tautologies). This leads to high accuracy but poor recall 
# on the minority class, which is undesirable in many reasoning or safety-critical settings.

# To address this, we use an Asymmetric Focal Loss, a refined version of the standard focal loss.
# The core idea is to:
# - Assign higher weight (α) to the minority class (tautologies) to penalize false negatives more.
# - Apply a modulating factor (1 - p)^γ to focus learning on hard examples.
# - Use separate α and γ values for each class for better control.

# Loss formula:
# L(y, ŷ) = 
#   - α_pos * y * (1 - ŷ)^γ_pos * log(ŷ)
#   - α_neg * (1 - y)^γ_neg * log(1 - ŷ)
# where:
#   - y is the true label (0 or 1)
#   - ŷ is the predicted probability (after sigmoid)
#   - α_pos/neg control class weighting
#   - γ_pos/neg control the focus on hard examples


class AsymmetricFocalLoss(nn.Module):
    def __init__(self, alpha_pos=1.0, alpha_neg=1.0, gamma_pos=2.0, gamma_neg=2.0, reduction='mean'):
        super().__init__()
        self.alpha_pos = alpha_pos
        self.alpha_neg = alpha_neg
        self.gamma_pos = gamma_pos
        self.gamma_neg = gamma_neg
        self.reduction = reduction

    def forward(self, logits, targets):
        probs = torch.sigmoid(logits)
        probs = torch.clamp(probs, 1e-6, 1 - 1e-6)  # Avoid log(0)

        # Loss for positive (tautology)
        pos_loss = self.alpha_pos * (1 - probs) ** self.gamma_pos * torch.log(probs)
        # Loss for negative (non-tautology)
        neg_loss = self.alpha_neg * (probs) ** self.gamma_neg * torch.log(1 - probs)

        # Full loss
        loss = -targets * pos_loss - (1 - targets) * neg_loss

        if self.reduction == 'mean':
            return loss.mean()
        elif self.reduction == 'sum':
            return loss.sum()
        return loss

In [62]:
class LSTM(nn.Module):
    def __init__(self, vocab_size: int, embedding_dim: int, hidden_dim: int, output_size: int):
        super().__init__()
        self.embedding = nn.Embedding(num_embeddings=vocab_size, embedding_dim=embedding_dim, padding_idx=0)
        self.lstm = nn.LSTM(input_size=embedding_dim, hidden_size=hidden_dim, batch_first=True)
        self.linear = nn.Linear(hidden_dim, output_size)

    def forward(self, x: torch.Tensor):
        x = self.embedding(x)                  # [batch_size, seq_len, embed_dim]
        _, (h_n, _) = self.lstm(x)             # h_n: [num_layers, batch_size, hidden_dim]
        last_hidden = h_n.squeeze(0)           # [batch_size, hidden_dim]
        output = self.linear(last_hidden)      # [batch_size, output_size]
        return output

In [63]:
HIDDEN_DIM=64

model_2 = LSTM(vocab_size=VOCAB_SIZE,
               embedding_dim=EMBEDDING_DIM,
               hidden_dim=HIDDEN_DIM,
               output_size=1).to(device)
model_2

LSTM(
  (embedding): Embedding(108, 32, padding_idx=0)
  (lstm): LSTM(32, 64, batch_first=True)
  (linear): Linear(in_features=64, out_features=1, bias=True)
)

In [64]:
# --- Loss and Optimizer Functions ---
loss_fn = AsymmetricFocalLoss(
    alpha_pos=0.75,   # emphasize tautologies
    alpha_neg=0.25,   # downweight non-tautologies
    gamma_pos=2.0,    # focus more on hard positives
    gamma_neg=1.0     # still soften easy negatives
)
optimizer = torch.optim.Adam(params=model_2.parameters(), 
                            lr=LR)

In [65]:
# --- Train and Test Model_2 ---
set_seeds()
results = train(model=model_2,
                train_dataloader=train_dataloader,
                test_dataloader=test_dataloader,
                optimizer=optimizer,
                loss_fn=loss_fn,
                epochs=5,
                device=device)

  0%|          | 0/5 [00:00<?, ?it/s]

Epoch: 1 | train_loss: 0.0969 | train_acc: 0.7369 | test_loss: 0.0968 | test_acc: 0.7370
Epoch: 2 | train_loss: 0.0968 | train_acc: 0.7369 | test_loss: 0.0969 | test_acc: 0.7370
Epoch: 3 | train_loss: 0.0969 | train_acc: 0.7369 | test_loss: 0.0968 | test_acc: 0.7370
Epoch: 4 | train_loss: 0.0969 | train_acc: 0.7369 | test_loss: 0.0968 | test_acc: 0.7370
Epoch: 5 | train_loss: 0.0970 | train_acc: 0.7358 | test_loss: 0.0968 | test_acc: 0.7370


--- 
### **Build Model 3**

In [66]:
class GRU(nn.Module):
    def __init__(self, vocab_size, embedding_dim=32):
        super().__init__()
        self.embedding = nn.Embedding(vocab_size, embedding_dim, padding_idx=0)

        self.gru1 = nn.GRU(
            input_size=embedding_dim,
            hidden_size=128,
            batch_first=True,
            bidirectional=True
        )

        self.gru2 = nn.GRU(
            input_size=128 * 2,  # Because bidirectional doubles output size
            hidden_size=64,
            batch_first=True,
            bidirectional=True
        )

        self.fc1 = nn.Linear(64 * 2, 32)  # 64*2 because second GRU is bidirectional
        self.relu = nn.ReLU()

        self.fc2 = nn.Linear(32, 1)  # Binary classification output (logit)

    def forward(self, x):
        # x shape: [batch_size, seq_len]
        x = self.embedding(x)              # [batch_size, seq_len, embed_dim]

        out1, _ = self.gru1(x)             # [batch_size, seq_len, 256]
        out2, _ = self.gru2(out1)          # [batch_size, seq_len, 128]

        out2_last = out2[:, -1, :]         # Use the last timestep's features
        x = self.relu(self.fc1(out2_last)) # [batch_size, 32]
        output = self.fc2(x)               # [batch_size, 1]

        return output  

In [67]:
model_3 = GRU(vocab_size=VOCAB_SIZE, embedding_dim=EMBEDDING_DIM).to(device)
model_3

GRU(
  (embedding): Embedding(108, 32, padding_idx=0)
  (gru1): GRU(32, 128, batch_first=True, bidirectional=True)
  (gru2): GRU(256, 64, batch_first=True, bidirectional=True)
  (fc1): Linear(in_features=128, out_features=32, bias=True)
  (relu): ReLU()
  (fc2): Linear(in_features=32, out_features=1, bias=True)
)

In [68]:
# --- Loss and Optimizer Functions ---
loss_fn = AsymmetricFocalLoss(
    alpha_pos=0.3,  # minority (tautology)
    alpha_neg=0.7,  # majority
    gamma_pos=3.0,
    gamma_neg=1.5
)
optimizer = torch.optim.Adam(params=model_3.parameters(), 
                            lr=0.0005)

In [69]:
# Get a summary of Model_3 
summary(model_3, 
         input_size=train_features_batch.shape,
         dtypes=[torch.long],
         verbose=0,
         col_names=["input_size", "output_size", "num_params", "trainable"],
         col_width=20,
         row_settings=["var_names"],
         device=device
)

Layer (type (var_name))                  Input Shape          Output Shape         Param #              Trainable
GRU (GRU)                                [16, 603]            [16, 1]              --                   True
├─Embedding (embedding)                  [16, 603]            [16, 603, 32]        3,456                True
├─GRU (gru1)                             [16, 603, 32]        [16, 603, 256]       124,416              True
├─GRU (gru2)                             [16, 603, 256]       [16, 603, 128]       123,648              True
├─Linear (fc1)                           [16, 128]            [16, 32]             4,128                True
├─ReLU (relu)                            [16, 32]             [16, 32]             --                   --
├─Linear (fc2)                           [16, 32]             [16, 1]              33                   True
Total params: 255,681
Trainable params: 255,681
Non-trainable params: 0
Total mult-adds (G): 2.39
Input size (MB): 0.08
Forwa

In [70]:
# --- Train and Test Model_3 ---
set_seeds()
results = train(model=model_3,
                train_dataloader=train_dataloader,
                test_dataloader=test_dataloader,
                optimizer=optimizer,
                loss_fn=loss_fn,
                epochs=20,
                device=device)

  0%|          | 0/20 [00:00<?, ?it/s]

Epoch: 1 | train_loss: 0.0429 | train_acc: 0.8403 | test_loss: 0.0213 | test_acc: 0.9463
Epoch: 2 | train_loss: 0.0214 | train_acc: 0.9428 | test_loss: 0.0197 | test_acc: 0.9548
Epoch: 3 | train_loss: 0.0188 | train_acc: 0.9512 | test_loss: 0.0157 | test_acc: 0.9594
Epoch: 4 | train_loss: 0.0153 | train_acc: 0.9587 | test_loss: 0.0153 | test_acc: 0.9486
Epoch: 5 | train_loss: 0.0133 | train_acc: 0.9608 | test_loss: 0.0129 | test_acc: 0.9640
Epoch: 6 | train_loss: 0.0121 | train_acc: 0.9624 | test_loss: 0.0130 | test_acc: 0.9666
Epoch: 7 | train_loss: 0.0109 | train_acc: 0.9655 | test_loss: 0.0117 | test_acc: 0.9655
Epoch: 8 | train_loss: 0.0096 | train_acc: 0.9674 | test_loss: 0.0102 | test_acc: 0.9689
Epoch: 9 | train_loss: 0.0090 | train_acc: 0.9674 | test_loss: 0.0092 | test_acc: 0.9689
Epoch: 10 | train_loss: 0.0083 | train_acc: 0.9705 | test_loss: 0.0091 | test_acc: 0.9670
Epoch: 11 | train_loss: 0.0078 | train_acc: 0.9688 | test_loss: 0.0118 | test_acc: 0.9578
Epoch: 12 | train_l

In [71]:
save_model(model=model_3,
           target_dir="models",
           model_name="Bidirectional_GRU_1.pth")

[INFO] Saving model to: models/Bidirectional_GRU_1.pth


---
### **Part 2**
> **Note:** Needs to execute all cells before Preparing the Data.

#### **2.1 Extending Dataset** with [SATLIB - Benchmark Problems](https://www.cs.ubc.ca/~hoos/SATLIB/benchm.html), using propositional formulas in Dimacs format.

Formulas Downloaded from SATLIB: 
- uf20-91: 20 variables, 91 clauses - 1000 instances, all satisfiable
- uf50-218 / uuf50-218: 50 variables, 218 clauses - 1000 instances, all sat/unsat

In [24]:
def parse_dimacs_files(base_dir: str):
    """
    Parses DIMACS CNF files from a directory and returns a labeled dataset of formulas.

    This function scans subdirectories under the specified base directory to identify 
    and process CNF files representing propositional logic formulas in DIMACS format. 
    It labels the formulas as tautology or not tautology based on directory naming 
    conventions (`uf*` = satisfiable, `uuf*` = unsatisfiable).

    Args:
        base_dir (str): The path to the top-level directory containing `uf*` and `uuf*` folders
                        with .cnf files inside.

    Returns:
        pd.DataFrame: A DataFrame containing two columns:
                      - 'formula': the parsed `Formula` object
                      - 'is_tautology': a boolean indicating whether the formula is satisfiable.
    """
    formulas_data = []
    base_path = Path(base_dir)

    for folder in base_path.iterdir():
        if folder.is_dir():
            folder_name = folder.name.lower()
            if folder_name.startswith("uf") and not folder_name.startswith("uuf"):
                label = True  # satisfiable
            elif folder_name.startswith("uuf"):
                label = False  # unsatisfiable
            else:
                print(f"Skipping unknown folder: {folder_name}")
                continue

            for file_path in folder.glob("*.cnf"):
                with open(file_path, 'r') as file:
                    dimacs_lines = file.readlines()
                try:
                    formula = Formula.from_dimacs(dimacs_lines)
                    formulas_data.append({
                        'formula': formula,
                        'is_tautology': label
                    })
                except Exception as e:
                    print(f"Error parsing {file_path.name}: {e}")

    return pd.DataFrame(formulas_data)

# base_dir = "/Users/nicolabalestra/Desktop/DeepSAT/theorem_prover/theorem_prover_core/ICTCS_notebooks/dimacs_formulas_datasets"
base_dir = "/home/labeconomia/nbalestra/theorem_prover/theorem_prover_core/ICTCS_notebooks/dimacs_formulas_datasets"
df_satlib = parse_dimacs_files(base_dir=base_dir)

In [25]:
df_satlib.is_tautology.value_counts()

is_tautology
True     2000
False    1000
Name: count, dtype: int64

In [26]:
# Inspecting some sampled unsatisfable formulas
df_satlib[df_satlib['is_tautology'] == False].sample(5)

Unnamed: 0,formula,is_tautology
2531,(A42 ∨ ¬A12 ∨ A27) ∧ (A24 ∨ A9 ∨ ¬A42) ∧ (¬A38...,False
2532,(¬A9 ∨ ¬A46 ∨ ¬A19) ∧ (¬A22 ∨ A17 ∨ A48) ∧ (¬A...,False
2669,(¬A22 ∨ A1 ∨ A43) ∧ (A6 ∨ ¬A15 ∨ ¬A4) ∧ (A39 ∨...,False
2202,(A46 ∨ A32 ∨ A37) ∧ (A36 ∨ ¬A1 ∨ ¬A46) ∧ (A25 ...,False
2608,(A18 ∨ A27 ∨ A30) ∧ (A35 ∨ ¬A12 ∨ ¬A23) ∧ (A17...,False


In [27]:
# Inspecting some sampled satisfable formulas 
df_satlib[df_satlib['is_tautology'] == True].sample(5)

Unnamed: 0,formula,is_tautology
836,(A26 ∨ ¬A27 ∨ ¬A46) ∧ (A19 ∨ A6 ∨ A25) ∧ (¬A36...,True
1173,(A9 ∨ A1 ∨ A7) ∧ (¬A16 ∨ A0 ∨ A11) ∧ (A11 ∨ ¬A...,True
1302,(¬A1 ∨ A11 ∨ ¬A15) ∧ (¬A3 ∨ A11 ∨ A5) ∧ (A9 ∨ ...,True
1703,(¬A3 ∨ A6 ∨ ¬A7) ∧ (¬A14 ∨ ¬A2 ∨ ¬A0) ∧ (¬A17 ...,True
30,(¬A24 ∨ ¬A31 ∨ A3) ∧ (¬A16 ∨ A41 ∨ A35) ∧ (A2 ...,True


In [28]:
random.seed(42)
random_idx = random.choice(range(len(df_satlib)))
print(df_satlib.iloc[random_idx]['formula'])

(¬A27 ∨ A42 ∨ A28) ∧ (¬A49 ∨ A37 ∨ A9) ∧ (¬A38 ∨ ¬A26 ∨ A40) ∧ (¬A40 ∨ ¬A13 ∨ A35) ∧ (A45 ∨ ¬A23 ∨ ¬A28) ∧ (A31 ∨ A12 ∨ A39) ∧ (¬A35 ∨ A7 ∨ A46) ∧ (¬A8 ∨ ¬A42 ∨ ¬A29) ∧ (¬A11 ∨ A29 ∨ ¬A42) ∧ (¬A14 ∨ ¬A18 ∨ ¬A5) ∧ (¬A40 ∨ ¬A16 ∨ A4) ∧ (¬A38 ∨ A12 ∨ ¬A20) ∧ (A36 ∨ A15 ∨ A17) ∧ (¬A44 ∨ A25 ∨ A36) ∧ (¬A19 ∨ ¬A37 ∨ A27) ∧ (A16 ∨ A20 ∨ A29) ∧ (A34 ∨ A38 ∨ ¬A19) ∧ (¬A29 ∨ A8 ∨ A41) ∧ (¬A4 ∨ A18 ∨ A32) ∧ (¬A0 ∨ ¬A24 ∨ A42) ∧ (A41 ∨ ¬A16 ∨ ¬A44) ∧ (¬A26 ∨ ¬A21 ∨ A32) ∧ (A21 ∨ A6 ∨ ¬A13) ∧ (A19 ∨ ¬A8 ∨ A7) ∧ (¬A7 ∨ ¬A0 ∨ ¬A37) ∧ (A23 ∨ ¬A19 ∨ ¬A3) ∧ (¬A11 ∨ A16 ∨ ¬A12) ∧ (A48 ∨ ¬A34 ∨ ¬A44) ∧ (A0 ∨ A18 ∨ A7) ∧ (A19 ∨ ¬A28 ∨ ¬A7) ∧ (¬A43 ∨ A1 ∨ ¬A6) ∧ (¬A37 ∨ A30 ∨ ¬A13) ∧ (¬A37 ∨ ¬A22 ∨ A10) ∧ (¬A31 ∨ ¬A34 ∨ ¬A27) ∧ (A46 ∨ A11 ∨ A26) ∧ (¬A39 ∨ A11 ∨ A5) ∧ (A42 ∨ ¬A45 ∨ ¬A4) ∧ (¬A31 ∨ A32 ∨ ¬A0) ∧ (¬A1 ∨ ¬A45 ∨ A19) ∧ (A28 ∨ ¬A30 ∨ ¬A23) ∧ (A9 ∨ A24 ∨ A3) ∧ (¬A40 ∨ A12 ∨ A6) ∧ (A23 ∨ ¬A48 ∨ ¬A33) ∧ (A9 ∨ ¬A43 ∨ ¬A29) ∧ (A1 ∨ ¬A9 ∨ A14) ∧ (¬A17 ∨ ¬A18 ∨ A8) ∧ (A23 ∨ A0 ∨ ¬A27) ∧ (A42 ∨ A16 ∨ A12) 

In [29]:
random.seed(7)
random_idx = random.choice(range(len(dataset)))
print(dataset.iloc[random_idx]['formula'])

(¬((A0 → A1) → ¬A2 ∧ (A3 → A4) ∧ ¬¬A5) ∨ ((¬(A0 ∨ A1 → A2) ∨ A3) ∧ (A4 ∨ A5) ∧ A6 ∧ A7 ∧ A0) → (¬((A0 → A1) → ¬A2 ∧ (A3 → A4) ∧ ¬¬A5) ∨ ((¬(A0 ∨ A1 → A2) ∨ A3) ∧ (A4 ∨ A5) ∧ A6 ∧ A7)) ∧ (¬((A0 → A1) → ¬A2 ∧ (A3 → A4) ∧ ¬¬A5) ∨ A0)) ∧ ((¬((A0 → A1) → ¬A2 ∧ (A3 → A4) ∧ ¬¬A5) ∨ ((¬(A0 ∨ A1 → A2) ∨ A3) ∧ (A4 ∨ A5) ∧ A6 ∧ A7)) ∧ (¬((A0 → A1) → ¬A2 ∧ (A3 → A4) ∧ ¬¬A5) ∨ A0) → ¬((A0 → A1) → ¬A2 ∧ (A3 → A4) ∧ ¬¬A5) ∨ ((¬(A0 ∨ A1 → A2) ∨ A3) ∧ (A4 ∨ A5) ∧ A6 ∧ A7 ∧ A0))


In [30]:
# --- Normalizing new formulas ---
df_satlib['formula'] = df_satlib['formula'].apply(lambda f: str(Normalizer().normalize(f)))
df_satlib[:5]

Unnamed: 0,formula,is_tautology
0,(¬A0 ∨ ¬A1 ∨ ¬A2) ∧ (A3 ∨ A4 ∨ ¬A5) ∧ (¬A6 ∨ ¬...,True
1,(¬A0 ∨ ¬A1 ∨ ¬A2) ∧ (¬A3 ∨ A4 ∨ ¬A5) ∧ (¬A6 ∨ ...,True
2,(A0 ∨ A1 ∨ A2) ∧ (¬A3 ∨ ¬A4 ∨ A5) ∧ (¬A6 ∨ A7 ...,True
3,(A0 ∨ A1 ∨ A2) ∧ (¬A3 ∨ A4 ∨ ¬A5) ∧ (¬A6 ∨ ¬A7...,True
4,(A0 ∨ ¬A1 ∨ ¬A2) ∧ (A0 ∨ ¬A3 ∨ A4) ∧ (A5 ∨ A6 ...,True


In [31]:
# --- Concateating datasets and shuffling ---
dataset_composed = pd.concat([dataset, df_satlib], ignore_index=True)
dataset_composed = dataset_composed.sample(frac=1, random_state=42).reset_index(drop=True) # frac=1 means shuffle all rows
                                                                                           # reset_index(drop=True) removes the old index

# Saving new dataset in cvs format 
dataset_composed.to_csv('datasets/combined_with_dimacs_formulas_dataset.csv', index=False)
print(f"[INFO] Dataset saved to: {os.path.abspath('datasets/combined_with_dimacs_formulas_dataset.csv')}")

[INFO] Dataset saved to: /home/labeconomia/nbalestra/theorem_prover/theorem_prover_core/ICTCS_notebooks/datasets/combined_with-dimacs_formulas_dataset.csv


In [32]:
print(dataset_composed.count())
count = dataset_composed.is_tautology.value_counts()
print(f"\nNumber of True and False formulas: \n{count}\n")

total = len(dataset_composed)
tautologies = dataset_composed["is_tautology"].sum()
percentage = (tautologies / total) * 100
print(f"Percentage of tautologies in the dataset: {percentage:.2f}%")

formula         16000
is_tautology    16000
dtype: int64

Number of True and False formulas: 
is_tautology
False    10576
True      5424
Name: count, dtype: int64

Percentage of tautologies in the dataset: 33.90%


--- 
#### **2.2 Preparing Data**

In [33]:
# --- Parser Module ---
# Parses a string representation of a formula

# Operator symbol to class map
OPERATOR_CLASSES = {
    '¬': Negation,
    '∧': Conjunction,
    '∨': Disjunction,
    #'⊻': ExclusiveDisjunction,
    '→': Implication
}

def tokenize(formula: str) -> List[str]:
    """
    Splits a logical formula string into a list of tokens.

    Supported tokens:
        - Propositional letters: A0, A1, ...
        - Connectives: ¬, ∧, ∨, ⊻, →
        - Falsity: ⊥
        - Parentheses: (,)

    Args:
        formula (str): A logical formula in string format.

    Returns:
        List[str]: A list of string tokens (e.g. ['¬', '(', 'A0', '∧', 'A1', ')'])

    Raises:
        ValueError: If the formula contains an unrecognized character.  
    """
    tokens = []
    i = 0
    # Iterate over each character
    while i < len(formula):
        c = formula[i]
        # Skip whitespace
        if c.isspace():
            i += 1
        elif c in '()¬∧∨⊻':
            tokens.append(c)
            i += 1
        elif formula[i:i+1] == '→':  
            tokens.append('→')
            i += 1
        elif c == 'A':
            j = i + 1
            while j < len(formula) and formula[j].isdigit():
                j += 1
            tokens.append(formula[i:j])
            i = j
        elif c == '⊥':
            tokens.append('⊥')
            i += 1
        else:
            raise ValueError(f"Unexpected character: {c}")
    return tokens


def parse_formula_string(formula_string: str) -> Formula:
    """
    Parses a string representation of a propositional logic formula into a
    structured Formula object, respecting operator precedence and associativity.

    Precedence and associativity are extracted directly from the formula classes.

    Args:
        formula_string (str): The input logical formula in string form.

    Returns:
        Formula: The parsed Formula object (e.g., Conjunction, Implication, etc.).

    Raises:
        ValueError: If the formula contains invalid or unexpected tokens.
    """
    tokens = tokenize(formula_string)

    def parse_expr(tokens: List[str], min_prec: int = 0) -> Formula:
        """
        Recursively parses an expression using a precedence climbing strategy.

        Args:
            tokens (List[str]): List of tokens.
            min_prec (int): Minimum precedence required to continue parsing.

        Returns:
            Formula: A Formula object representing the parsed structure.

        Raises:
            ValueError: For invalid token sequences or unexpected syntax.
        """
        if not tokens:
            raise ValueError("Empty expression")
        
        # Pick the first element of the list
        token = tokens.pop(0)

        # Base cases: atomic formulas
        if token == '(':
            # Handle parentheses by recursing on the subexpression
            sub_tokens = []
            depth = 1
            while tokens:
                t = tokens.pop(0)
                if t == '(':
                    depth += 1
                elif t == ')':
                    depth -= 1
                    if depth == 0:
                        break
                sub_tokens.append(t)
            node = parse_expr(sub_tokens)

        elif token == '⊥':
            node = Falsity()

        elif token.startswith('A') and token[1:].isdigit():
            node = Letter(int(token[1:]))

        elif token == '¬':
            # Unary operator
            cls = Negation
            right = parse_expr(tokens, cls.priority)
            node = cls(right)

        else:
            raise ValueError(f"Unexpected token: {token}")

        # After parsing an atomic or unary expression, handle binary connectives
        while tokens and tokens[0] in OPERATOR_CLASSES:
            op = tokens[0]
            cls = OPERATOR_CLASSES[op]
            prec = cls.priority
            assoc = cls.associativity

            if prec < min_prec:
                break

            tokens.pop(0)  # consume the operator
            
            # Recursively parse the right-hand expression, and combine it with the 
            # left node into a full binary Formula
            # Adjust min_prec depending on associativity
            next_min_prec = prec + 1 if assoc == 'left' else prec

            right = parse_expr(tokens, next_min_prec)
            node = cls(node, right)

        return node

    return parse_expr(tokens)


# Example 1: 
f1 = parse_formula_string("¬A0 ∨ A1")
f2 = parse_formula_string("¬(A0 ∨ A1)")

print(f1)  # (¬A0 ∨ A1)
print(f2)  # ¬(A0 ∨ A1)
print(f"{f1} == {f2} shoud be false and is: {f1 == f2}\n")  # False — correct

# Example 2:
list_of_str = [
    "¬(A0 ∧ A1) → (¬A0 ∨ ¬A1)",
    "¬(A0 ∨ A1) → (¬A0 ∧ ¬A1)",
    "(A0 ∧ (A1 ∨ A2)) → ((A0 ∧ A1) ∨ (A0 ∧ A2))"
]

for formula_str in list_of_str:
    parsed_formulas = parse_formula_string(formula_str)
    print(parsed_formulas)

¬A0 ∨ A1
¬(A0 ∨ A1)
¬A0 ∨ A1 == ¬(A0 ∨ A1) shoud be false and is: False

¬(A0 ∧ A1) → ¬A0 ∨ ¬A1
¬(A0 ∨ A1) → ¬A0 ∧ ¬A1
A0 ∧ (A1 ∨ A2) → (A0 ∧ A1) ∨ (A0 ∧ A2)


In [34]:
# --- Formulas and Truth Values lists ---
Formulas = dataset_composed['formula'].tolist()
Truth_values = dataset_composed['is_tautology'].tolist()

len(Formulas), len(Truth_values)

(16000, 16000)

In [35]:
formula_set = set(Formulas)
len(formula_set)

16000

In [36]:
# --- Parse Dataset's formulas ---
parsed_formulas = [parse_formula_string(f) for f in Formulas]

print(f"Parsed second formula: {parsed_formulas[1]}")
print(f"Total parsed formulas: {len(parsed_formulas)}")

distinct_formulas = set(parsed_formulas)
print(f"Number of distinct parsed formulas: {len(distinct_formulas)}")

Parsed second formula: (¬((A0 → A1 ∨ A2 ∨ A3) ∨ A0) → ¬(A0 → A1 ∨ A2 ∨ A3) ∧ ¬A0) ∧ (¬(A0 → A1 ∨ A2 ∨ A3) ∧ ¬A0 → ¬((A0 → A1 ∨ A2 ∨ A3) ∨ A0))
Total parsed formulas: 16000
Number of distinct parsed formulas: 16000


In [37]:
# Data splitting: 80% of the data is reserved for training the model. 
# Test Sets: 20% of the data is used for testing the model.

X_train, X_test, y_train, y_test = train_test_split(parsed_formulas, 
                                                    Truth_values, 
                                                    test_size=0.2, 
                                                    random_state=42)

print(f"Training set: {len(X_train)} samples")
print(f"Test set: {len(X_test)} samples")

Training set: 12800 samples
Test set: 3200 samples


In [38]:
# --- Convert symbols in numbers ---
class CustomTokenizer:
    """
    Custom tokenizer class for logical formulas.
    This class converts formulas into tokenized integer representations,
    and supports detokenizing back into Formula objects.
    """

    def __init__(self):
        self.token_to_formula: Dict[int, Formula] = {}
        self.formula_to_token: Dict[Formula, int] = {}

        self.connective_map = {
            'Conjunction': 100,
            'Disjunction': 101,
            'Negation': 102,
            'Implication': 103,
            #'Exclusive Disjunction': 104
        }

        self.special_map = {
            '(': 106,
            ')': 107
        }

        self.falsity_token = 105

    def fit(self, formulas: List[Formula]):
        """
        Fit the tokenizer on a list of formulas, deriving tokens for each formula.
        """
        for formula in formulas:
            self._derive_tokens(formula)

    def _derive_tokens(self, formula: Formula):
        """
        Recursively derive tokens for all subcomponents of a formula.
        """
        if isinstance(formula, Falsity):
            if formula not in self.formula_to_token:
                self.formula_to_token[formula] = self.falsity_token
                self.token_to_formula[self.falsity_token] = formula

        elif isinstance(formula, Letter):
            letter_index = formula.n
            if formula not in self.formula_to_token:
                token = letter_index + 1  # Start letters from 1
                self.formula_to_token[formula] = token
                self.token_to_formula[token] = formula

        elif isinstance(formula, UnaryConnectiveFormula):
            self._derive_tokens(formula.formula)

        elif isinstance(formula, BinaryConnectiveFormula):
            self._derive_tokens(formula.left)
            self._derive_tokens(formula.right)

    def tokenize(self, formula: Formula) -> List[int]:
        """
        Convert a formula into a list of integer tokens.
        """
        tokens = []
        self._tokenize_helper(formula, tokens)
        return tokens

    def _tokenize_helper(self, formula: Formula, tokens: List[int]):
        """
        Helper method for recursive token generation.
        """
        if formula in self.formula_to_token:
            tokens.append(self.formula_to_token[formula])
            return

        if isinstance(formula, BinaryConnectiveFormula):
            tokens.append(self.special_map['('])
            self._tokenize_helper(formula.left, tokens)
            tokens.append(self.connective_map[type(formula).__name__])
            self._tokenize_helper(formula.right, tokens)
            tokens.append(self.special_map[')'])

        elif isinstance(formula, UnaryConnectiveFormula):
            tokens.append(self.special_map['('])
            tokens.append(self.connective_map[type(formula).__name__])
            tokens.append(self.special_map['('])
            self._tokenize_helper(formula.formula, tokens)
            tokens.append(self.special_map[')'])
            tokens.append(self.special_map[')'])

        elif isinstance(formula, Falsity):
            tokens.append(self.falsity_token)

        elif isinstance(formula, Letter):
            tokens.append(self.formula_to_token[formula])

    def detokenize(self, tokens: List[int]) -> Formula:
        """
        Convert a list of tokens (possibly padded) back into a Formula object.
        """
        # Remove trailing padding
        tokens = [t for t in tokens if t != 0]

        def parse_expr(pos: int) -> Tuple[Formula, int]:
            token = tokens[pos]

            if token == self.falsity_token:
                return Falsity(), pos + 1

            elif token in self.token_to_formula:
                return self.token_to_formula[token], pos + 1

            elif token == self.special_map['(']:
                next_token = tokens[pos + 1]

                # Handle unary connective
                if next_token in self.connective_map.values() and tokens[pos + 2] == self.special_map['(']:
                    op_token = next_token
                    inner_formula, new_pos = parse_expr(pos + 3)
                    assert tokens[new_pos] == self.special_map[')']
                    assert tokens[new_pos + 1] == self.special_map[')']
                    connective_class = self._connective_class(op_token)
                    return connective_class(inner_formula), new_pos + 2

                # Handle binary connective
                else:
                    left_formula, pos_left = parse_expr(pos + 1)
                    op_token = tokens[pos_left]
                    right_formula, pos_right = parse_expr(pos_left + 1)
                    assert tokens[pos_right] == self.special_map[')']
                    connective_class = self._connective_class(op_token)
                    return connective_class(left_formula, right_formula), pos_right + 1

            raise ValueError(f"Unexpected token at position {pos}: {tokens[pos:]}")

        return parse_expr(0)[0]

    def _connective_class(self, token: int):
        """
        Resolve token ID back to the appropriate connective class.
        """
        for name, code in self.connective_map.items():
            if token == code:
                return {
                    'Conjunction': Conjunction,
                    'Disjunction': Disjunction,
                    'Negation': Negation,
                    'Implication': Implication,
                    #'Exclusive Disjunction': ExclusiveDisjunction
                }[name]
        raise ValueError(f"Unknown connective token: {token}")

In [39]:
# Inspecting unique formula's token 
tokenizer = CustomTokenizer()
tokenizer.fit(parsed_formulas)

def print_token_mappings(tokenizer):
    for token, formula in tokenizer.token_to_formula.items():
        print(f"Token: {token}, Formula: {formula}")

print_token_mappings(tokenizer)

# Example
example_formula = Conjunction(Letter(0), Negation(Letter(1)))
tokens = tokenizer.tokenize(example_formula)

print(f"\nTokenized representation of ({example_formula}): {tokens}")

Token: 1, Formula: A0
Token: 2, Formula: A1
Token: 105, Formula: ⊥
Token: 3, Formula: A2
Token: 4, Formula: A3
Token: 5, Formula: A4
Token: 6, Formula: A5
Token: 7, Formula: A6
Token: 8, Formula: A7
Token: 9, Formula: A8
Token: 10, Formula: A9
Token: 11, Formula: A10
Token: 12, Formula: A11
Token: 13, Formula: A12
Token: 14, Formula: A13
Token: 15, Formula: A14
Token: 16, Formula: A15
Token: 17, Formula: A16
Token: 18, Formula: A17
Token: 19, Formula: A18
Token: 20, Formula: A19
Token: 21, Formula: A20
Token: 22, Formula: A21
Token: 23, Formula: A22
Token: 24, Formula: A23
Token: 25, Formula: A24
Token: 26, Formula: A25
Token: 27, Formula: A26
Token: 28, Formula: A27
Token: 29, Formula: A28
Token: 30, Formula: A29
Token: 31, Formula: A30
Token: 32, Formula: A31
Token: 33, Formula: A32
Token: 34, Formula: A33
Token: 35, Formula: A34
Token: 36, Formula: A35
Token: 37, Formula: A36
Token: 38, Formula: A37
Token: 39, Formula: A38
Token: 40, Formula: A39
Token: 41, Formula: A40
Token: 42, F

In [40]:
# --- Tokenize Train Test ---
tokenizer = CustomTokenizer()
tokenizer.fit(X_train)

In [41]:
# Test function
def test_tokenizer(tokenizer, example_formulas):
    for formula in example_formulas:
        tokens = tokenizer.tokenize(formula)
        print(f"Original Formula: {formula}")
        print(f"Tokenized Version: {tokens}\n")

if __name__ == "__main__":
    tokenizer = CustomTokenizer()
    example_formulas = [X_train[2], X_train[3], X_train[4]]
    tokenizer.fit(example_formulas)

    test_tokenizer(tokenizer, example_formulas)

Original Formula: (¬((¬A0 → ⊥) ∨ A0) → ¬(¬A0 → ⊥) ∧ ¬A0) ∧ (¬(¬A0 → ⊥) ∧ ¬A0 → ¬((¬A0 → ⊥) ∨ A0))
Tokenized Version: [106, 106, 106, 102, 106, 106, 106, 106, 102, 106, 1, 107, 107, 103, 105, 107, 101, 1, 107, 107, 107, 103, 106, 106, 102, 106, 106, 106, 102, 106, 1, 107, 107, 103, 105, 107, 107, 107, 100, 106, 102, 106, 1, 107, 107, 107, 107, 100, 106, 106, 106, 102, 106, 106, 106, 102, 106, 1, 107, 107, 103, 105, 107, 107, 107, 100, 106, 102, 106, 1, 107, 107, 107, 103, 106, 102, 106, 106, 106, 106, 102, 106, 1, 107, 107, 103, 105, 107, 101, 1, 107, 107, 107, 107, 107]

Original Formula: ¬((A0 ∧ (A1 → A2)) ∨ (A3 → A4) ∨ ⊥ ∨ A5)
Tokenized Version: [106, 102, 106, 106, 106, 106, 106, 1, 100, 106, 2, 103, 3, 107, 107, 101, 106, 4, 103, 5, 107, 107, 101, 105, 107, 101, 6, 107, 107, 107]

Original Formula: ¬⊥ ∨ A0 ∨ A1
Tokenized Version: [106, 106, 106, 102, 106, 105, 107, 107, 101, 1, 107, 101, 2, 107]



In [42]:
tokenizer = CustomTokenizer()
tokenizer.fit(X_train)

num_letters = sum(1 for formula in tokenizer.formula_to_token if isinstance(formula, Letter))
num_connectives = len(tokenizer.connective_map)
num_parenthesis = sum(1 for formula in tokenizer.special_map)

print(f"Number of unique letters: {num_letters}")
print(f"Number of unique connectives: {num_connectives}")
print(f"Number of spacial tokens: {num_parenthesis}")

Number of unique letters: 50
Number of unique connectives: 4
Number of spacial tokens: 2


In [43]:
# --- Tokenize Train and Test formulas ---
tokenizer = CustomTokenizer()
tokenizer.fit(X_train)

X_train_seq = [tokenizer.tokenize(formula) for formula in X_train]
X_test_seq = [tokenizer.tokenize(formula) for formula in X_test]

In [44]:
# --- Convert lists of int into tensors ---
X_train_tensors = [torch.tensor(seq, dtype=torch.long) for seq in X_train_seq]
X_test_tensors = [torch.tensor(seq, dtype=torch.long) for seq in X_test_seq]

# --- Pad sequences (fill with 0s) ---
X_train_padded = pad_sequence(X_train_tensors, batch_first=True, padding_value=0) # With batch_first = True, Shape: (batch_size, seq_len, features)
X_test_padded = pad_sequence(X_test_tensors, batch_first=True, padding_value=0)

# --- Convert labels to tensors --- 
y_train_tensor = torch.tensor(y_train, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test, dtype=torch.float32)

In [45]:
print(f"X_train_padded shape: {X_train_padded.shape} -> [num_of_tokenized_formulas, num_of_tokens_per_formula])")
print(f"y_train_tensor shape: {y_train_tensor.shape} -> [num_of_labels]")

print(f"\nX_test_padded shape: {X_test_padded.shape} -> [num_of_tokenized_formulas, num_of_tokens_per_formula])")
print(f"y_test_tensor shape: {y_train_tensor.shape} -> [num_of_labels]")

X_train_padded shape: torch.Size([12800, 4468]) -> [num_of_tokenized_formulas, num_of_tokens_per_formula])
y_train_tensor shape: torch.Size([12800]) -> [num_of_labels]

X_test_padded shape: torch.Size([3200, 4433]) -> [num_of_tokenized_formulas, num_of_tokens_per_formula])
y_test_tensor shape: torch.Size([12800]) -> [num_of_labels]


--- 
#### **2.3 Getting a PyTorch Dataset**

In [46]:
# --- Create a PyTorch Dataset ---
class FormulaDataset(Dataset):
    def __init__(self, X, y):
        self.X = X
        self.y = y

        # Define class label info
        self.classes = ['non-tautology', 'tautology']
        self.class_to_idx = {label: i for i, label in enumerate(self.classes)}

    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]

    def __len__(self):
        return len(self.X)

    def __repr__(self):
        return (
            f"Dataset FormulaDataset\n"
            f"  Number of datapoints: {len(self)}\n"
            f"  Input shape: {self.X[0].shape if len(self.X) > 0 else 'N/A'}\n"
            f"  Target type: {self.y.dtype}\n"
        )


train_data = FormulaDataset(X_train_padded, y_train_tensor)
test_data = FormulaDataset(X_test_padded, y_test_tensor)

print(f"{train_data} \n{test_data}")

Dataset FormulaDataset
  Number of datapoints: 12800
  Input shape: torch.Size([4468])
  Target type: torch.float32
 
Dataset FormulaDataset
  Number of datapoints: 3200
  Input shape: torch.Size([4433])
  Target type: torch.float32



In [47]:
import collections

train_class_counts = collections.Counter(y_train)
test_class_counts = collections.Counter(y_test)

print(f"Train class counts: {train_class_counts}")
print(f"Test class counts: {test_class_counts}")

Train class counts: Counter({False: 8484, True: 4316})
Test class counts: Counter({False: 2092, True: 1108})


In [48]:
print(f"Train set (15360 samples): False = {(8484/12800)*100:.2f} % and True = {(4316/12800)* 100:.2f} %")
print(f"Test set (240samples): False = {(2092/3200)*100:.2f}% and True = {(1108/3200)*100:.2f}%")

Train set (15360 samples): False = 66.28 % and True = 33.72 %
Test set (240samples): False = 65.38% and True = 34.62%


In [49]:
# Classes Names and Labels Map
class_names = train_data.classes
class_to_idx = train_data.class_to_idx
print(class_names)
print(class_to_idx)

['non-tautology', 'tautology']
{'non-tautology': 0, 'tautology': 1}


In [50]:
# Visualize a random sampled formula
torch.manual_seed(41)
random_idx = torch.randint(0, len(train_data), size=[1]).item()

random_formula, label = train_data[random_idx]
print(f"Random formula: {random_formula}, \n\nLabel: {label}")

# Convert tensor to list of token IDs (needed by detokenize)
token_list = random_formula.tolist()
# Reconstruct the original Formula
reconstructed_formula = tokenizer.detokenize(token_list)
print(f"Reconstructed formula: {reconstructed_formula}")
print(f"\nTautology status: {class_names[int(label.item())]}")

Random formula: tensor([106, 106, 106,  ...,   0,   0,   0]), 

Label: 1.0
Reconstructed formula: (A0 ∧ ((A0 ∧ (A1 ∧ ⊥ → A2 ∨ ⊥ → ¬A3) → A4 ∧ A5 ∧ A0) ∨ A0) → (A0 ∧ (A0 ∧ (A1 ∧ ⊥ → A2 ∨ ⊥ → ¬A3) → A4 ∧ A5 ∧ A0)) ∨ (A0 ∧ A0)) ∧ ((A0 ∧ (A0 ∧ (A1 ∧ ⊥ → A2 ∨ ⊥ → ¬A3) → A4 ∧ A5 ∧ A0)) ∨ (A0 ∧ A0) → A0 ∧ ((A0 ∧ (A1 ∧ ⊥ → A2 ∨ ⊥ → ¬A3) → A4 ∧ A5 ∧ A0) ∨ A0))

Tautology status: tautology


---
#### **2.4 DataLoader**

In [51]:
# Setup the batch size hyperparameter
BATCH_SIZE = 16

# Turn datasets into iterables (batches)
train_dataloader = DataLoader(train_data, # dataset to turn into iterable
                              batch_size=BATCH_SIZE, # how many samples per batch? 
                              shuffle=True # shuffle data every epoch?
                                           # This removes the data order, so the model does not learn it 
)

test_dataloader = DataLoader(test_data,
                             batch_size=BATCH_SIZE,
                             shuffle=False # don't necessarily have to shuffle the testing data
)

# Let's check out what we've created
print(f"Dataloaders: {train_dataloader, test_dataloader}") 
print(f"Length of train dataloader: {len(train_dataloader)} batches of {BATCH_SIZE}")
print(f"Length of test dataloader: {len(test_dataloader)} batches of {BATCH_SIZE}")

Dataloaders: (<torch.utils.data.dataloader.DataLoader object at 0x7fed58b565f0>, <torch.utils.data.dataloader.DataLoader object at 0x7fed58b56f80>)
Length of train dataloader: 800 batches of 16
Length of test dataloader: 200 batches of 16


In [52]:
# Checking out what's inside the training dataloader
train_features_batch, train_labels_batch = next(iter(train_dataloader)) # next() grabs the first batch from the iterator
print(f"{train_features_batch.shape, train_labels_batch.shape} -> [batch_size, num_of_tokens_per_formula], [bach_size]")

(torch.Size([16, 4468]), torch.Size([16])) -> [batch_size, num_of_tokens_per_formula], [bach_size]


--- 
#### **2.5 Set up device agnostic-code**

In [53]:
device = "cuda" if torch.cuda.is_available() else "cpu"
device

'cuda'

--- 
#### **2.5 Reproducibility**

In [54]:
def set_seeds(seed: int=42):
    """Sets random sets for torch operations.

    Args:
        seed (int, optional): Random seed to set. Defaults to 42.
    """
    # Set the seed for general torch operations
    torch.manual_seed(seed)
    # Set the seed for CUDA torch operations (ones that happen on the GPU)
    #torch.mps.manual_seed(seed)
    torch.cuda.manual_seed(42)

--- 
#### **2.6 `train_step()`, `test_test()`, and `train_()` functions.**

In [55]:
# --- Functions for training and testing a PyTorch model ---

def train_step(model: torch.nn.Module, 
               dataloader: torch.utils.data.DataLoader, 
               loss_fn: torch.nn.Module, 
               optimizer: torch.optim.Optimizer,
               device: torch.device) -> Tuple[float, float]:
  """Trains a PyTorch model for a single epoch.

  Turns a target PyTorch model to training mode and then
  runs through all of the required training steps (forward
  pass, loss calculation, optimizer step).

  Args:
    model: A PyTorch model to be trained.
    dataloader: A DataLoader instance for the model to be trained on.
    loss_fn: A PyTorch loss function to minimize.
    optimizer: A PyTorch optimizer to help minimize the loss function.
    device: A target device to compute on.

  Returns:
    A tuple of training loss and training accuracy metrics.
    In the form (train_loss, train_accuracy).
    
  """
  # Put model in train mode
  model.train()
  
  # Setup train loss and train accuracy values
  train_loss, train_acc = 0, 0
  
  # Loop through data loader data batches
  for batch, (X, y) in enumerate(dataloader):
      # Send data to target device
      X, y = X.to(device), y.to(device)

      # 1. Forward pass
      y_logits = model(X).squeeze(dim=1)
      y_preds = torch.round(torch.sigmoid(y_logits))

      # 2. Calculate  and accumulate loss
      loss = loss_fn(y_logits, y)
      train_loss += loss.item() 

      # 3. Optimizer zero grad
      optimizer.zero_grad()

      # 4. Loss backward
      loss.backward()

      # 5. Optimizer step
      optimizer.step()

      # Calculate and accumulate accuracy metric across all batches
      train_acc += (y_preds == y).sum().item()/len(y_preds)

  # Adjust metrics to get average loss and accuracy per batch 
  train_loss = train_loss / len(dataloader)
  train_acc = train_acc / len(dataloader)
  return train_loss, train_acc

def test_step(model: torch.nn.Module, 
              dataloader: torch.utils.data.DataLoader, 
              loss_fn: torch.nn.Module,
              device: torch.device) -> Tuple[float, float]:
  """Tests a PyTorch model for a single epoch.

  Turns a target PyTorch model to "eval" mode and then performs
  a forward pass on a testing dataset.

  Args:
    model: A PyTorch model to be tested.
    dataloader: A DataLoader instance for the model to be tested on.
    loss_fn: A PyTorch loss function to calculate loss on the test data.
    device: A target device to compute on.

  Returns:
    A tuple of testing loss and testing accuracy metrics.
    In the form (test_loss, test_accuracy). 
    
  """
  # Put model in eval mode
  model.eval() 
  
  # Setup test loss and test accuracy values
  test_loss, test_acc = 0, 0
  
  # Turn on inference context manager
  with torch.inference_mode():
      # Loop through DataLoader batches
      for batch, (X, y) in enumerate(dataloader):
          # Send data to target device
          X, y = X.to(device), y.to(device)
  
          # 1. Forward pass
          test_pred_logits = model(X).squeeze(dim=1)
          preds = torch.round(torch.sigmoid(test_pred_logits))

          # 2. Calculate and accumulate loss
          loss = loss_fn(test_pred_logits, y)
          test_loss += loss.item()
          
          # Calculate and accumulate accuracy
          test_pred_labels = preds
          test_acc += ((test_pred_labels == y).sum().item()/len(test_pred_labels))
          
  # Adjust metrics to get average loss and accuracy per batch 
  test_loss = test_loss / len(dataloader)
  test_acc = test_acc / len(dataloader)
  return test_loss, test_acc

def train(model: torch.nn.Module, 
          train_dataloader: torch.utils.data.DataLoader, 
          test_dataloader: torch.utils.data.DataLoader, 
          optimizer: torch.optim.Optimizer,
          loss_fn: torch.nn.Module,
          epochs: int,
          device: torch.device) -> Dict[str, List]:
  """Trains and tests a PyTorch model.

  Passes a target PyTorch models through train_step() and test_step()
  functions for a number of epochs, training and testing the model
  in the same epoch loop.

  Calculates, prints and stores evaluation metrics throughout.

  Args:
    model: A PyTorch model to be trained and tested.
    train_dataloader: A DataLoader instance for the model to be trained on.
    test_dataloader: A DataLoader instance for the model to be tested on.
    optimizer: A PyTorch optimizer to help minimize the loss function.
    loss_fn: A PyTorch loss function to calculate loss on both datasets.
    epochs: An integer indicating how many epochs to train for.
    device: A target device to compute on.

  Returns:
    A dictionary of training and testing loss as well as training and
    testing accuracy metrics. Each metric has a value in a list for 
    each epoch.
    In the form: {train_loss: [...],
                  train_acc: [...],
                  test_loss: [...],
                  test_acc: [...]} 

  """
  # Create empty results dictionary
  results = {"train_loss": [],
             "train_acc": [],
             "test_loss": [],
             "test_acc": []
  }
  
  # Loop through training and testing steps for a number of epochs
  for epoch in tqdm(range(epochs)):
      train_loss, train_acc = train_step(model=model,
                                          dataloader=train_dataloader,
                                          loss_fn=loss_fn,
                                          optimizer=optimizer,
                                          device=device)
      test_loss, test_acc = test_step(model=model,
          dataloader=test_dataloader,
          loss_fn=loss_fn,
          device=device)
      
      # Print out what's happening
      print(
          f"Epoch: {epoch+1} | "
          f"train_loss: {train_loss:.4f} | "
          f"train_acc: {train_acc:.4f} | "
          f"test_loss: {test_loss:.4f} | "
          f"test_acc: {test_acc:.4f}"
      )

      # Update results dictionary
      results["train_loss"].append(train_loss)
      results["train_acc"].append(train_acc)
      results["test_loss"].append(test_loss)
      results["test_acc"].append(test_acc)

  # Return the filled results at the end of the epochs
  return results

--- 
#### **2.7 Function to save a model**

In [56]:
def save_model(model: torch.nn.Module,
               target_dir: str,
               model_name: str):
  """Saves a PyTorch model to a target directory.

  Args:
    model: A target PyTorch model to save.
    target_dir: A directory for saving the model to.
    model_name: A filename for the saved model. Should include
      either ".pth" or ".pt" as the file extension.
  
  Example usage:
    save_model(model=model_0,
               target_dir="models",
               model_name="05_going_modular_tingvgg_model.pth")
  """
  # Create target directory
  target_dir_path = Path(target_dir)
  target_dir_path.mkdir(parents=True,
                        exist_ok=True)
  
  # Create model save path
  assert model_name.endswith(".pth") or model_name.endswith(".pt"), "model_name should end with '.pt' or '.pth'"
  model_save_path = target_dir_path / model_name

  # Save the model state_dict()
  print(f"[INFO] Saving model to: {model_save_path}")
  torch.save(obj=model.state_dict(),
             f=model_save_path)

---
#### **2.8 Hyperparameters**

In [57]:
# --- Hyperparameters --- 
# Determine the vocabulary size for the embedding layer and add 1 for padding index (0)
all_token_indices = (
    list(tokenizer.formula_to_token.values()) +
    list(tokenizer.connective_map.values()) +
    list(tokenizer.special_map.values()) +
    [tokenizer.falsity_token]
)

VOCAB_SIZE = max(all_token_indices) + 1  
print(f"Vocabulary size (including padding token): {VOCAB_SIZE}")
print("Max index in batch:", train_features_batch.max().item())
assert train_features_batch.max().item() < VOCAB_SIZE, "Some token indices exceed the embedding size!"

EMBEDDING_DIM = 32
LR = 0.0005

Vocabulary size (including padding token): 108
Max index in batch: 107


---
#### **2.9 Build Model 4**

In [58]:
class BidirectionalGRU(nn.Module):
    def __init__(self, vocab_size, embedding_dim=32):
        super().__init__()
        self.embedding = nn.Embedding(vocab_size, embedding_dim, padding_idx=0)

        self.gru1 = nn.GRU(
            input_size=embedding_dim,
            hidden_size=128,
            batch_first=True,
            bidirectional=True
        )

        self.gru2 = nn.GRU(
            input_size=128 * 2,  # Because bidirectional doubles output size
            hidden_size=64,
            batch_first=True,
            bidirectional=True
        )

        self.fc1 = nn.Linear(64 * 2, 32)  # 64*2 because second GRU is bidirectional
        self.relu = nn.ReLU()

        self.fc2 = nn.Linear(32, 1)  # Binary classification output (logit)

    def forward(self, x):
        # x shape: [batch_size, seq_len]
        x = self.embedding(x)              # [batch_size, seq_len, embed_dim]

        out1, _ = self.gru1(x)             # [batch_size, seq_len, 256]
        out2, _ = self.gru2(out1)          # [batch_size, seq_len, 128]

        out2_last = out2[:, -1, :]         # Use the last timestep's features
        x = self.relu(self.fc1(out2_last)) # [batch_size, 32]
        output = self.fc2(x)               # [batch_size, 1]

        return output  

In [107]:
model_4 = BidirectionalGRU(vocab_size=VOCAB_SIZE, embedding_dim=EMBEDDING_DIM).to(device)
model_4

BidirectionalGRU(
  (embedding): Embedding(108, 32, padding_idx=0)
  (gru1): GRU(32, 128, batch_first=True, bidirectional=True)
  (gru2): GRU(256, 64, batch_first=True, bidirectional=True)
  (fc1): Linear(in_features=128, out_features=32, bias=True)
  (relu): ReLU()
  (fc2): Linear(in_features=32, out_features=1, bias=True)
)

In [108]:
# --- Checking correct device ---
next(model_4.parameters()).device

device(type='cuda', index=0)

In [109]:
# --- Asymmetric Focal Loss for Binary Classification ---

# Due to the class imbalance in our dataset (~24% tautologies), the model may bias toward 
# predicting the majority class (non-tautologies). This leads to high accuracy but poor recall 
# on the minority class, which is undesirable in many reasoning or safety-critical settings.

# To address this, we use an Asymmetric Focal Loss, a refined version of the standard focal loss.
# The core idea is to:
# - Assign higher weight (α) to the minority class (tautologies) to penalize false negatives more.
# - Apply a modulating factor (1 - p)^γ to focus learning on hard examples.
# - Use separate α and γ values for each class for better control.

# Loss formula:
# L(y, ŷ) = 
#   - α_pos * y * (1 - ŷ)^γ_pos * log(ŷ)
#   - α_neg * (1 - y)^γ_neg * log(1 - ŷ)
# where:
#   - y is the true label (0 or 1)
#   - ŷ is the predicted probability (after sigmoid)
#   - α_pos/neg control class weighting
#   - γ_pos/neg control the focus on hard examples


class AsymmetricFocalLoss(nn.Module):
    def __init__(self, alpha_pos=1.0, alpha_neg=1.0, gamma_pos=2.0, gamma_neg=2.0, reduction='mean'):
        super().__init__()
        self.alpha_pos = alpha_pos
        self.alpha_neg = alpha_neg
        self.gamma_pos = gamma_pos
        self.gamma_neg = gamma_neg
        self.reduction = reduction

    def forward(self, logits, targets):
        probs = torch.sigmoid(logits)
        probs = torch.clamp(probs, 1e-6, 1 - 1e-6)  # Avoid log(0)

        # Loss for positive (tautology)
        pos_loss = self.alpha_pos * (1 - probs) ** self.gamma_pos * torch.log(probs)
        # Loss for negative (non-tautology)
        neg_loss = self.alpha_neg * (probs) ** self.gamma_neg * torch.log(1 - probs)

        # Full loss
        loss = -targets * pos_loss - (1 - targets) * neg_loss

        if self.reduction == 'mean':
            return loss.mean()
        elif self.reduction == 'sum':
            return loss.sum()
        return loss

In [110]:
# --- Loss and Optimizer Functions ---
loss_fn = AsymmetricFocalLoss(
    alpha_pos=0.3,  # minority (tautology)
    alpha_neg=0.7,  # majority
    gamma_pos=3.0,
    gamma_neg=1.5
)
optimizer = torch.optim.Adam(params=model_4.parameters(), 
                            lr=LR)

In [111]:
# Get a summary of Model_4
summary(model_4, 
         input_size=train_features_batch.shape, # [batch_size, seq_len]
         dtypes=[torch.long],
         verbose=0,
         col_names=["input_size", "output_size", "num_params", "trainable"],
         col_width=20,
         row_settings=["var_names"],
         device=device
)

Layer (type (var_name))                  Input Shape          Output Shape         Param #              Trainable
BidirectionalGRU (BidirectionalGRU)      [16, 4468]           [16, 1]              --                   True
├─Embedding (embedding)                  [16, 4468]           [16, 4468, 32]       3,456                True
├─GRU (gru1)                             [16, 4468, 32]       [16, 4468, 256]      124,416              True
├─GRU (gru2)                             [16, 4468, 256]      [16, 4468, 128]      123,648              True
├─Linear (fc1)                           [16, 128]            [16, 32]             4,128                True
├─ReLU (relu)                            [16, 32]             [16, 32]             --                   --
├─Linear (fc2)                           [16, 32]             [16, 1]              33                   True
Total params: 255,681
Trainable params: 255,681
Non-trainable params: 0
Total mult-adds (G): 17.73
Input size (MB): 0.57
Forw

In [114]:
# --- Train and Test Model_4 ---
set_seeds()
results = train(model=model_4,
                train_dataloader=train_dataloader,
                test_dataloader=test_dataloader,
                optimizer=optimizer,
                loss_fn=loss_fn,
                epochs=20,
                device=device)

  0%|          | 0/20 [00:00<?, ?it/s]

Epoch: 1 | train_loss: 0.0689 | train_acc: 0.6562 | test_loss: 0.0609 | test_acc: 0.6538
Epoch: 2 | train_loss: 0.0364 | train_acc: 0.8113 | test_loss: 0.0248 | test_acc: 0.9078
Epoch: 3 | train_loss: 0.0249 | train_acc: 0.9049 | test_loss: 0.0233 | test_acc: 0.9078
Epoch: 4 | train_loss: 0.0274 | train_acc: 0.8899 | test_loss: 0.0234 | test_acc: 0.8819
Epoch: 5 | train_loss: 0.0244 | train_acc: 0.8959 | test_loss: 0.0214 | test_acc: 0.9087
Epoch: 6 | train_loss: 0.0220 | train_acc: 0.9065 | test_loss: 0.0202 | test_acc: 0.9100
Epoch: 7 | train_loss: 0.0212 | train_acc: 0.9075 | test_loss: 0.0207 | test_acc: 0.9116
Epoch: 8 | train_loss: 0.0205 | train_acc: 0.9094 | test_loss: 0.0194 | test_acc: 0.9116
Epoch: 9 | train_loss: 0.0202 | train_acc: 0.9088 | test_loss: 0.0189 | test_acc: 0.9128
Epoch: 10 | train_loss: 0.0192 | train_acc: 0.9113 | test_loss: 0.0175 | test_acc: 0.9116
Epoch: 11 | train_loss: 0.0182 | train_acc: 0.9110 | test_loss: 0.0167 | test_acc: 0.9141
Epoch: 12 | train_l

In [115]:
save_model(model=model_4,
           target_dir="models",
           model_name="Bidirectional_GRU_trained_with_dimacs_formulas_1.pth")

[INFO] Saving model to: models/Bidirectional_GRU_trained_with_dimacs_formulas_1.pth


---
#### **2.10 Build Model 5**

In [116]:
# Adding more hidden layers
class BidirectionalGRU_V2(nn.Module):
    def __init__(self, vocab_size, embedding_dim=32):
        super().__init__()
        self.embedding = nn.Embedding(vocab_size, embedding_dim, padding_idx=0)

        self.gru1 = nn.GRU(
            input_size=embedding_dim,
            hidden_size=128,
            batch_first=True,
            bidirectional=True
        )

        self.gru2 = nn.GRU(
            input_size=128 * 2,  # Because bidirectional doubles output size
            hidden_size=128,
            batch_first=True,
            bidirectional=True
        )

        self.fc1 = nn.Linear(128 * 2, 64)  # 128*2 because second GRU is bidirectional
        self.relu = nn.ReLU()

        self.fc2 = nn.Linear(64, 1)  # Binary classification output (logit)

    def forward(self, x):
        # x shape: [batch_size, seq_len]
        x = self.embedding(x)              # [batch_size, seq_len, embed_dim]

        out1, _ = self.gru1(x)             # [batch_size, seq_len, 256]
        out2, _ = self.gru2(out1)          # [batch_size, seq_len, 128]

        out2_last = out2[:, -1, :]         # Use the last timestep's features
        x = self.relu(self.fc1(out2_last)) # [batch_size, 32]
        output = self.fc2(x)               # [batch_size, 1]

        return output  

In [117]:
model_5 = BidirectionalGRU_V2(vocab_size=VOCAB_SIZE, embedding_dim=EMBEDDING_DIM).to(device)
model_5

BidirectionalGRU_V2(
  (embedding): Embedding(108, 32, padding_idx=0)
  (gru1): GRU(32, 128, batch_first=True, bidirectional=True)
  (gru2): GRU(256, 128, batch_first=True, bidirectional=True)
  (fc1): Linear(in_features=256, out_features=64, bias=True)
  (relu): ReLU()
  (fc2): Linear(in_features=64, out_features=1, bias=True)
)

In [118]:
# --- Loss and Optimizer Functions ---
loss_fn = AsymmetricFocalLoss(
    alpha_pos=0.3,  # minority (tautology)
    alpha_neg=0.7,  # majority
    gamma_pos=3.0,
    gamma_neg=1.5
)
optimizer = torch.optim.Adam(params=model_5.parameters(), 
                            lr=LR)

In [119]:
# Get a summary of Model_5
summary(model_5, 
         input_size=train_features_batch.shape, # [batch_size, seq_len]
         dtypes=[torch.long],
         verbose=0,
         col_names=["input_size", "output_size", "num_params", "trainable"],
         col_width=20,
         row_settings=["var_names"],
         device=device
)

Layer (type (var_name))                       Input Shape          Output Shape         Param #              Trainable
BidirectionalGRU_V2 (BidirectionalGRU_V2)     [16, 4468]           [16, 1]              --                   True
├─Embedding (embedding)                       [16, 4468]           [16, 4468, 32]       3,456                True
├─GRU (gru1)                                  [16, 4468, 32]       [16, 4468, 256]      124,416              True
├─GRU (gru2)                                  [16, 4468, 256]      [16, 4468, 256]      296,448              True
├─Linear (fc1)                                [16, 256]            [16, 64]             16,448               True
├─ReLU (relu)                                 [16, 64]             [16, 64]             --                   --
├─Linear (fc2)                                [16, 64]             [16, 1]              65                   True
Total params: 440,833
Trainable params: 440,833
Non-trainable params: 0
Total mult-ad

In [120]:
# --- Train and Test Model_5 ---
set_seeds()
results = train(model=model_5,
                train_dataloader=train_dataloader,
                test_dataloader=test_dataloader,
                optimizer=optimizer,
                loss_fn=loss_fn,
                epochs=20,
                device=device)

  0%|          | 0/20 [00:00<?, ?it/s]

Epoch: 1 | train_loss: 0.0701 | train_acc: 0.6606 | test_loss: 0.0692 | test_acc: 0.6538
Epoch: 2 | train_loss: 0.0690 | train_acc: 0.6629 | test_loss: 0.0692 | test_acc: 0.6534
Epoch: 3 | train_loss: 0.0689 | train_acc: 0.6629 | test_loss: 0.0696 | test_acc: 0.6534
Epoch: 4 | train_loss: 0.0689 | train_acc: 0.6629 | test_loss: 0.0697 | test_acc: 0.6534
Epoch: 5 | train_loss: 0.0688 | train_acc: 0.6629 | test_loss: 0.0694 | test_acc: 0.6534
Epoch: 6 | train_loss: 0.0681 | train_acc: 0.6628 | test_loss: 0.0680 | test_acc: 0.6534
Epoch: 7 | train_loss: 0.0400 | train_acc: 0.8049 | test_loss: 0.0229 | test_acc: 0.9094
Epoch: 8 | train_loss: 0.0231 | train_acc: 0.9054 | test_loss: 0.0215 | test_acc: 0.9053
Epoch: 9 | train_loss: 0.0219 | train_acc: 0.9061 | test_loss: 0.0210 | test_acc: 0.9087
Epoch: 10 | train_loss: 0.0201 | train_acc: 0.9085 | test_loss: 0.0193 | test_acc: 0.9056
Epoch: 11 | train_loss: 0.0194 | train_acc: 0.9084 | test_loss: 0.0187 | test_acc: 0.9084
Epoch: 12 | train_l

In [121]:
save_model(model=model_5,
           target_dir="models",
           model_name="Bidirectional_GRU_trained_with_dimacs_formulas_2.pth")

[INFO] Saving model to: models/Bidirectional_GRU_trained_with_dimacs_formulas_2.pth


**Possible reasons why the More Complex Model performs worst:**

- *Potential Overfitting*:
An increased number of hidden units results in higher model capacity, which in turn raises the risk of overfitting to the training data.
Evidence of overfitting is observable in the decline of test accuracy during the final epochs (epochs 17–19).

- *Insufficient or Insufficiently Complex Training Data*:
The dataset employed (synthetic data combined with selected SATLIB instances) may not be sufficiently rich or complex to fully exploit the added capacity of the second model. In scenarios where the data is relatively simple or homogeneous, a less complex model could generalizes more effectively.

- *Slow Initial Convergence*:
The second model exhibits slow learning progress during the initial epochs (1–6).
This behavior may suggest that the choice of activation function or optimization algorithm is not well suited to the increased model depth or capacity.

- *Suboptimal GRU Configuration*:
Employing a 128 to 128 GRU stack can lead to highly redundant output representations. In the absence of mechanisms designed to emphasize relevant components of the input, the model may struggle to maintain focus, resulting in reduced effectiveness.

---
#### **2.11 Best Model's prformance analysis**

In [62]:
# --- Loading Model 4 ---
# Instantiate a fresh instance of BidirectionalGRU()
model = BidirectionalGRU(vocab_size=VOCAB_SIZE, embedding_dim=EMBEDDING_DIM)

# Load model state dict 
MODEL_SAVE_PATH = 'models/Bidirectional_GRU_trained_with_dimacs_formulas_1.pth'
model.load_state_dict(torch.load(MODEL_SAVE_PATH))

# Put model to target device (if your data is on GPU, model will have to be on GPU to make predictions)
model.to(device)

print(f"Loaded model:\n{model}")
print(f"Model on device:\n{next(model.parameters()).device}")

Loaded model:
BidirectionalGRU(
  (embedding): Embedding(108, 32, padding_idx=0)
  (gru1): GRU(32, 128, batch_first=True, bidirectional=True)
  (gru2): GRU(256, 64, batch_first=True, bidirectional=True)
  (fc1): Linear(in_features=128, out_features=32, bias=True)
  (relu): ReLU()
  (fc2): Linear(in_features=32, out_features=1, bias=True)
)
Model on device:
cuda:0


In [84]:
# --- Adding a column 'source' to combined dataset to distinguish between synthetic formulas and dimacs formulas ---
dataset['source'] = 'synthetic'
df_satlib['source'] = 'satlib'

df_full = pd.concat([dataset, df_satlib], ignore_index=True)

# --- Adding indices to dataset---
df_full['index'] = df_full.index

In [85]:
print(f"{df_full[df_full['source'] == 'synthetic'].sample(1)}")
print(f"\n{df_full[df_full['source'] == 'satlib'].sample(1)}")

                                                formula  is_tautology  \
9791  (¬((¬A0 → ¬(A1 ∨ A2)) ∨ A3 ∨ A4 ∨ A0) → ¬((¬A0...          True   

         source  index  
9791  synthetic   9791  

                                                 formula  is_tautology  \
14138  (¬A0 ∨ A1 ∨ A2) ∧ (A3 ∨ ¬A4 ∨ A0) ∧ (¬A2 ∨ A5 ...          True   

       source  index  
14138  satlib  14138  


In [86]:
# --- Preparing Data ---
# Functionalize pipeline
def prepare_formula_dataset(dataset: pd.DataFrame,
                             test_size: float,
                             batch_size: int,
                             seed: int = 42
                            ) -> Tuple[DataLoader, DataLoader, CustomTokenizer, List[Formula], List[Formula]]:
    """
    General pipeline to parse, tokenize, and convert a dataset of formulas
    into PyTorch dataloaders for training/testing.

    Args:
        dataset (pd.DataFrame): DataFrame with columns 'formula' and 'is_tautology'.
        test_size (float): Proportion for test split.
        batch_size (int): Batch size for DataLoaders.
        seed (int): Random seed for reproducibility.

    Returns:
        Tuple containing:
            - train_dataloader
            - test_dataloader
            - tokenizer (CustomTokenizer)
            - X_train (parsed formulas)
            - X_test (parsed formulas)
    """
    # Parse formulas
    parsed_formulas = [parse_formula_string(f) for f in dataset['formula']]
    truth_values = dataset['is_tautology'].tolist()

    if 'original_index' in dataset.columns:
        original_indices = dataset['original_index'].tolist()
    else:
        original_indices = list(range(len(dataset)))

    # Train/test split
    X_train, X_test, y_train, y_test, idx_train, idx_test = train_test_split(
        parsed_formulas, truth_values, original_indices,
        test_size=test_size,
        random_state=seed
    )

    # Tokenizer
    tokenizer = CustomTokenizer()
    tokenizer.fit(X_train)

    X_train_seq = [tokenizer.tokenize(f) for f in X_train]
    X_test_seq = [tokenizer.tokenize(f) for f in X_test]

    X_train_tensors = [torch.tensor(seq, dtype=torch.long) for seq in X_train_seq]
    X_test_tensors = [torch.tensor(seq, dtype=torch.long) for seq in X_test_seq]

    X_train_padded = pad_sequence(X_train_tensors, batch_first=True, padding_value=0)
    X_test_padded = pad_sequence(X_test_tensors, batch_first=True, padding_value=0)

    y_train_tensor = torch.tensor(y_train, dtype=torch.float32)
    y_test_tensor = torch.tensor(y_test, dtype=torch.float32)

    train_data = FormulaDataset(X_train_padded, y_train_tensor)
    test_data = FormulaDataset(X_test_padded, y_test_tensor)

    train_dataloader = DataLoader(train_data, batch_size=batch_size, shuffle=True)
    test_dataloader = DataLoader(test_data, batch_size=batch_size, shuffle=False)

    return train_dataloader, test_dataloader, X_train, X_test, y_train, y_test, idx_test


In [87]:
exp_train_dataloader, exp_test_dataloader, exp_X_train, exp_X_test, exp_y_train, exp_y_test, exp_idx_test = prepare_formula_dataset(
    dataset = df_full,
    test_size = 0.2,
    batch_size = BATCH_SIZE,
    seed = 42)

In [89]:
# Let's check out what we've created
print(f"Dataloaders: {exp_train_dataloader, exp_test_dataloader}") 
print(f"Length of train dataloader: {len(exp_train_dataloader)} batches of {BATCH_SIZE}")
print(f"Length of test dataloader: {len(exp_test_dataloader)} batches of {BATCH_SIZE}")

Dataloaders: (<torch.utils.data.dataloader.DataLoader object at 0x7f5fb60c1090>, <torch.utils.data.dataloader.DataLoader object at 0x7f5fb60c1030>)
Length of train dataloader: 800 batches of 16
Length of test dataloader: 200 batches of 16


In [90]:
train_data, test_data

(Dataset FormulaDataset
   Number of datapoints: 12800
   Input shape: torch.Size([4468])
   Target type: torch.float32,
 Dataset FormulaDataset
   Number of datapoints: 3200
   Input shape: torch.Size([4433])
   Target type: torch.float32)

In [93]:
# --- Model's predictions on Test set tracking indices ---
model.eval()
all_preds = []
all_targets = []
all_sources = []

with torch.inference_mode():
    for i, (x_batch, y_batch) in enumerate(exp_test_dataloader):
        x_batch = x_batch.to(device)
        y_batch = y_batch.to(device)

        logits = model(x_batch)
        probs = torch.sigmoid(logits).squeeze()
        preds = (probs > 0.5).float()

        all_preds.extend(preds.cpu().numpy())
        all_targets.extend(y_batch.cpu().numpy())

# Taking original 'sources' using idx_test
source_series = df_full.loc[exp_idx_test, 'source']
all_sources = source_series.tolist()

# Create a DataFrame
eval_df = pd.DataFrame({
    'source': all_sources,
    'y_true': all_targets,
    'y_pred': all_preds
})

In [106]:
sampled_pred_synt = eval_df[eval_df['source'] == 'synthetic'].sample(n=1, random_state=42)
sampled_pred_sat = eval_df[eval_df['source'] == 'satlib'].sample(n=1, random_state=41)
print(sampled_pred_synt)
print(sampled_pred_sat)

         source  y_true  y_pred
1002  synthetic     1.0     1.0
      source  y_true  y_pred
2378  satlib     1.0     0.0


In [109]:
# --- Analyzing Accuracy per source --- 
accuracy_per_source = eval_df.groupby('source')[['y_true', 'y_pred']].apply(
    lambda x: (x['y_true'] == x['y_pred']).mean()
)
print(f"Accuracy per source: \n{accuracy_per_source}")

Accuracy per source: 
source
satlib       0.677064
synthetic    0.970998
dtype: float64


From the previous results we can observe that the model is significantly more accurate on synthetic formulas (97%) compared to those from SATLIB (67%). This suggests a generalization gap when dealing with structurally different formulas.

In [110]:
# --- False negatives and False positives analysis ---
false_positives = eval_df[(eval_df['y_pred'] == 1.0) & (eval_df['y_true'] == 0.0)]
false_negatives = eval_df[(eval_df['y_pred'] == 0.0) & (eval_df['y_true'] == 1.0)]

print(f"Number of false positives: {len(false_positives)}")
print(f"Number of false negatives: {len(false_negatives)}")


Number of false positives: 3
Number of false negatives: 250


These results indicate a highly conservative model that prefers to classify a formula as a non-tautology rather than risk a false positive:
The model struggles to correctly identify many tautologies, likely due to one or more of the following reasons:

- The representation of tautologies is less frequent in the data (~ 34% of tautologies in the dataset).
- The model has learned a bias toward the majority class (non-tautologies).
- The tautologies in SATLIB (synthetic or DIMACS) differ significantly from synthetic ones, which are the majority.

#### **2.12 Build Model 6**
Since Model 4 performed bettera than Model 5, we try to improve its performances addind a Multihead Attention Layer and Positional Encoding to try to focus on the more relevant parts of input formulas.

In [58]:
import math 

class PositionalEncoding(nn.Module):
    def __init__(self, d_model, max_len=5000):
        super().__init__()
        pe = torch.zeros(max_len, d_model)
        position = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1)
        div_term = torch.exp(torch.arange(0, d_model, 2).float() * (-math.log(10000.0) / d_model))
        pe[:, 0::2] = torch.sin(position * div_term)
        pe[:, 1::2] = torch.cos(position * div_term)
        pe = pe.unsqueeze(1)  # [max_len, 1, d_model]
        self.register_buffer('pe', pe)

    def forward(self, x):
        x = x + self.pe[:x.size(0), :]
        return x

class BidirectionalGRU_Multi_Att(nn.Module):
    def __init__(self, vocab_size, embedding_dim, n_heads): 
        super().__init__()
        self.embedding = nn.Embedding(vocab_size, embedding_dim, padding_idx=0)

        self.gru = nn.GRU(
            input_size=embedding_dim,
            hidden_size=128,
            batch_first=True,
            bidirectional=True
        )

        self.pos_encoder = PositionalEncoding(d_model=128 * 2)

        self.attn = nn.MultiheadAttention(embed_dim=128 * 2, num_heads=n_heads, batch_first=False)

        self.fc1 = nn.Linear(128 * 2, 32)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(32, 1)

    def forward(self, x):
        x = self.embedding(x)              # [batch_size, seq_len, embedding_dim]
        gru_out, _ = self.gru(x)           # [batch_size, seq_len, hidden_dim*2]
        gru_out = gru_out.transpose(0, 1)  # [seq_len, batch_size, hidden_dim*2]

        # Add positional encoding
        gru_out = self.pos_encoder(gru_out)

        # Self-attention
        attn_output, _ = self.attn(gru_out, gru_out, gru_out)
        attn_output = attn_output.mean(dim=0)  # [batch_size, hidden_dim*2]
        attn_output[0]

        x = self.relu(self.fc1(attn_output))
        output = self.fc2(x)
        return output

In [59]:
model_6 = BidirectionalGRU_Multi_Att(vocab_size=VOCAB_SIZE, 
                                     embedding_dim=EMBEDDING_DIM, 
                                     n_heads=4  
                                    ).to(device)

model_6

BidirectionalGRU_Multi_Att(
  (embedding): Embedding(108, 32, padding_idx=0)
  (gru): GRU(32, 128, batch_first=True, bidirectional=True)
  (pos_encoder): PositionalEncoding()
  (attn): MultiheadAttention(
    (out_proj): NonDynamicallyQuantizableLinear(in_features=256, out_features=256, bias=True)
  )
  (fc1): Linear(in_features=256, out_features=32, bias=True)
  (relu): ReLU()
  (fc2): Linear(in_features=32, out_features=1, bias=True)
)

In [60]:
# --- Asymmetric Focal Loss for Binary Classification ---

# Due to the class imbalance in our dataset (~24% tautologies), the model may bias toward 
# predicting the majority class (non-tautologies). This leads to high accuracy but poor recall 
# on the minority class, which is undesirable in many reasoning or safety-critical settings.

# To address this, we use an Asymmetric Focal Loss, a refined version of the standard focal loss.
# The core idea is to:
# - Assign higher weight (α) to the minority class (tautologies) to penalize false negatives more.
# - Apply a modulating factor (1 - p)^γ to focus learning on hard examples.
# - Use separate α and γ values for each class for better control.

# Loss formula:
# L(y, ŷ) = 
#   - α_pos * y * (1 - ŷ)^γ_pos * log(ŷ)
#   - α_neg * (1 - y)^γ_neg * log(1 - ŷ)
# where:
#   - y is the true label (0 or 1)
#   - ŷ is the predicted probability (after sigmoid)
#   - α_pos/neg control class weighting
#   - γ_pos/neg control the focus on hard examples


class AsymmetricFocalLoss(nn.Module):
    def __init__(self, alpha_pos=1.0, alpha_neg=1.0, gamma_pos=2.0, gamma_neg=2.0, reduction='mean'):
        super().__init__()
        self.alpha_pos = alpha_pos
        self.alpha_neg = alpha_neg
        self.gamma_pos = gamma_pos
        self.gamma_neg = gamma_neg
        self.reduction = reduction

    def forward(self, logits, targets):
        probs = torch.sigmoid(logits)
        probs = torch.clamp(probs, 1e-6, 1 - 1e-6)  # Avoid log(0)

        # Loss for positive (tautology)
        pos_loss = self.alpha_pos * (1 - probs) ** self.gamma_pos * torch.log(probs)
        # Loss for negative (non-tautology)
        neg_loss = self.alpha_neg * (probs) ** self.gamma_neg * torch.log(1 - probs)

        # Full loss
        loss = -targets * pos_loss - (1 - targets) * neg_loss

        if self.reduction == 'mean':
            return loss.mean()
        elif self.reduction == 'sum':
            return loss.sum()
        return loss

In [61]:
# --- Loss and Optimizer Functions ---
loss_fn = AsymmetricFocalLoss(
    alpha_pos=0.3,  # minority (tautology)
    alpha_neg=0.7,  # majority
    gamma_pos=3.0,
    gamma_neg=1.5
)
optimizer = torch.optim.Adam(params=model_6.parameters(), 
                            lr=LR)

In [62]:
summary(model_6, 
         input_size=train_features_batch.shape, # [batch_size, seq_len]
         dtypes=[torch.long],
         verbose=0,
         col_names=["input_size", "output_size", "num_params", "trainable"],
         col_width=20,
         row_settings=["var_names"],
         device=device
)

Layer (type (var_name))                                      Input Shape          Output Shape         Param #              Trainable
BidirectionalGRU_Multi_Att (BidirectionalGRU_Multi_Att)      [16, 4468]           [16, 1]              --                   True
├─Embedding (embedding)                                      [16, 4468]           [16, 4468, 32]       3,456                True
├─GRU (gru)                                                  [16, 4468, 32]       [16, 4468, 256]      124,416              True
├─PositionalEncoding (pos_encoder)                           [4468, 16, 256]      [4468, 16, 256]      --                   --
├─MultiheadAttention (attn)                                  [4468, 16, 256]      [4468, 16, 256]      263,168              True
├─Linear (fc1)                                               [16, 256]            [16, 32]             8,224                True
├─ReLU (relu)                                                [16, 32]             [16, 32]    

In [63]:
# --- Modified BATCH_SIZE parameter to avoid OutOfMemoryError ---

# Setup the batch size hyperparameter
BATCH_SIZE = 4

# Turn datasets into iterables (batches)
train_dataloader = DataLoader(train_data, # dataset to turn into iterable
                              batch_size=BATCH_SIZE, # how many samples per batch? 
                              shuffle=True # shuffle data every epoch?
                                           # This removes the data order, so the model does not learn it 
)

test_dataloader = DataLoader(test_data,
                             batch_size=BATCH_SIZE,
                             shuffle=False # don't necessarily have to shuffle the testing data
)

# Let's check out what we've created
print(f"Dataloaders: {train_dataloader, test_dataloader}") 
print(f"Length of train dataloader: {len(train_dataloader)} batches of {BATCH_SIZE}")
print(f"Length of test dataloader: {len(test_dataloader)} batches of {BATCH_SIZE}")

Dataloaders: (<torch.utils.data.dataloader.DataLoader object at 0x7fed58b55750>, <torch.utils.data.dataloader.DataLoader object at 0x7fed58b54910>)
Length of train dataloader: 3200 batches of 4
Length of test dataloader: 800 batches of 4


In [64]:
# --- Train and Test Model_6 ---
set_seeds()
results_model_6 = train(model=model_6,
                  train_dataloader=train_dataloader,
                  test_dataloader=test_dataloader,
                  optimizer=optimizer,
                  loss_fn=loss_fn,
                  epochs=20,
                  device=device)

  0%|          | 0/20 [00:00<?, ?it/s]

Epoch: 1 | train_loss: 0.0310 | train_acc: 0.8752 | test_loss: 0.0366 | test_acc: 0.8906
Epoch: 2 | train_loss: 0.0255 | train_acc: 0.8862 | test_loss: 0.0241 | test_acc: 0.8909
Epoch: 3 | train_loss: 0.0232 | train_acc: 0.8918 | test_loss: 0.0218 | test_acc: 0.8988
Epoch: 4 | train_loss: 0.0207 | train_acc: 0.8967 | test_loss: 0.0192 | test_acc: 0.9075
Epoch: 5 | train_loss: 0.0189 | train_acc: 0.9035 | test_loss: 0.0229 | test_acc: 0.8869
Epoch: 6 | train_loss: 0.0183 | train_acc: 0.9047 | test_loss: 0.0194 | test_acc: 0.9069
Epoch: 7 | train_loss: 0.0177 | train_acc: 0.9061 | test_loss: 0.0168 | test_acc: 0.9106
Epoch: 8 | train_loss: 0.0167 | train_acc: 0.9079 | test_loss: 0.0168 | test_acc: 0.9094
Epoch: 9 | train_loss: 0.0158 | train_acc: 0.9104 | test_loss: 0.0176 | test_acc: 0.9103
Epoch: 10 | train_loss: 0.0155 | train_acc: 0.9093 | test_loss: 0.0161 | test_acc: 0.9178
Epoch: 11 | train_loss: 0.0148 | train_acc: 0.9120 | test_loss: 0.0155 | test_acc: 0.9147
Epoch: 12 | train_l

KeyboardInterrupt: 

In [46]:
truth_values = dataset['is_tautology'].tolist()

In [47]:
# --- Tree Node representation for Tree-LSTM ---
from typing import Optional, List, Tuple
from torch.utils.data import Dataset
import torch
import torch.nn as nn

class TreeNode:
    def __init__(self, label: int, left: Optional['TreeNode'] = None, right: Optional['TreeNode'] = None):
        self.label = label
        self.left = left
        self.right = right

# --- Convert logical Formula into TreeNode structure ---
def formula_to_tree(formula, tokenizer) -> TreeNode:
    if isinstance(formula, Letter):
        return TreeNode(tokenizer.formula_to_token[formula])
    elif isinstance(formula, Falsity):
        return TreeNode(tokenizer.falsity_token)
    elif isinstance(formula, UnaryConnectiveFormula):
        child = formula_to_tree(formula.formula, tokenizer)
        return TreeNode(tokenizer.connective_map[type(formula).__name__], left=child)
    elif isinstance(formula, BinaryConnectiveFormula):
        left = formula_to_tree(formula.left, tokenizer)
        right = formula_to_tree(formula.right, tokenizer)
        return TreeNode(tokenizer.connective_map[type(formula).__name__], left=left, right=right)
    else:
        raise ValueError("Unknown formula type")

# --- Custom Dataset ---
class TreeFormulaDataset(Dataset):
    def __init__(self, formulas, labels, tokenizer):
        self.trees = [formula_to_tree(f, tokenizer) for f in formulas]
        self.labels = labels

    def __len__(self):
        return len(self.trees)

    def __getitem__(self, idx):
        return self.trees[idx], torch.tensor(self.labels[idx], dtype=torch.float32)

# --- Binary Tree-LSTM Cell ---
class BinaryTreeLSTMCell(nn.Module):
    def __init__(self, in_dim, hidden_dim):
        super().__init__()
        self.in_dim = in_dim
        self.hidden_dim = hidden_dim

        self.W_iou = nn.Linear(in_dim, 3 * hidden_dim)
        self.U_iou = nn.Linear(2 * hidden_dim, 3 * hidden_dim)
        self.W_f = nn.Linear(in_dim, 2 * hidden_dim)
        self.U_f = nn.Linear(2 * hidden_dim, 2 * hidden_dim)

    def forward(self, x, left, right):
        h_l, c_l = left
        h_r, c_r = right

        h_cat = torch.cat([h_l, h_r], dim=-1)

        iou = self.W_iou(x) + self.U_iou(h_cat)
        i, o, u = torch.chunk(torch.sigmoid(iou), 3, dim=-1)

        f = torch.sigmoid(self.W_f(x) + self.U_f(h_cat))
        f_l, f_r = torch.chunk(f, 2, dim=-1)

        c = i * torch.tanh(u) + f_l * c_l + f_r * c_r
        h = o * torch.tanh(c)

        return h, c

# --- TreeLSTM Classifier ---
class TreeLSTMClassifier(nn.Module):
    def __init__(self, vocab_size, embedding_dim, hidden_dim):
        super().__init__()
        self.embedding = nn.Embedding(vocab_size, embedding_dim, padding_idx=0)
        self.tree_lstm_cell = BinaryTreeLSTMCell(embedding_dim, hidden_dim)
        self.fc1 = nn.Linear(hidden_dim, 32)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(32, 1)

    def forward(self, tree: TreeNode):
        def recurse(node: TreeNode):
            x = self.embedding(torch.tensor(node.label).to(next(self.parameters()).device))
            if node.left and node.right:
                h_l, c_l = recurse(node.left)
                h_r, c_r = recurse(node.right)
                return self.tree_lstm_cell(x, (h_l, c_l), (h_r, c_r))
            elif node.left:
                h_l, c_l = recurse(node.left)
                zero = torch.zeros_like(h_l)
                return self.tree_lstm_cell(x, (h_l, c_l), (zero, zero))
            else:
                zero = torch.zeros((self.tree_lstm_cell.hidden_dim,), device=x.device)
                return self.tree_lstm_cell(x, (zero, zero), (zero, zero))

        h_root, _ = recurse(tree)
        x = self.relu(self.fc1(h_root))
        return self.fc2(x).squeeze()

In [48]:
# Re-import necessary components due to state reset
import torch
from torch.utils.data import DataLoader, Dataset
import torch.nn as nn
from sklearn.model_selection import train_test_split
from typing import Optional, List

# Define TreeNode class
class TreeNode:
    def __init__(self, label: int, left: Optional['TreeNode'] = None, right: Optional['TreeNode'] = None):
        self.label = label
        self.left = left
        self.right = right

# Define TreeFormulaDataset
class TreeFormulaDataset(Dataset):
    def __init__(self, formulas, labels, tokenizer):
        self.trees = [self.formula_to_tree(f, tokenizer) for f in formulas]
        self.labels = labels

    def __len__(self):
        return len(self.trees)

    def __getitem__(self, idx):
        return self.trees[idx], torch.tensor(self.labels[idx], dtype=torch.float32)

    def formula_to_tree(self, formula, tokenizer) -> TreeNode:
        if isinstance(formula, str):
            formula = eval(formula)  # Assumes string eval is safe here
        if formula.__class__.__name__ == 'Letter':
            return TreeNode(tokenizer.formula_to_token[formula])
        elif formula.__class__.__name__ == 'Falsity':
            return TreeNode(tokenizer.falsity_token)
        elif 'UnaryConnectiveFormula' in [base.__name__ for base in formula.__class__.__bases__]:
            child = self.formula_to_tree(formula.formula, tokenizer)
            return TreeNode(tokenizer.connective_map[formula.__class__.__name__], left=child)
        elif 'BinaryConnectiveFormula' in [base.__name__ for base in formula.__class__.__bases__]:
            left = self.formula_to_tree(formula.left, tokenizer)
            right = self.formula_to_tree(formula.right, tokenizer)
            return TreeNode(tokenizer.connective_map[formula.__class__.__name__], left=left, right=right)
        else:
            raise ValueError("Unknown formula type")

# Prepare data loaders
def prepare_tree_dataset(parsed_formulas, truth_values, tokenizer, test_size=0.2, batch_size=32, seed=42):
    X_train, X_test, y_train, y_test = train_test_split(
        parsed_formulas, truth_values, test_size=test_size, random_state=seed)

    train_dataset = TreeFormulaDataset(X_train, y_train, tokenizer)
    test_dataset = TreeFormulaDataset(X_test, y_test, tokenizer)

    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, collate_fn=lambda x: x)
    test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, collate_fn=lambda x: x)

    return train_loader, test_loader, X_train, X_test, y_train, y_test

# Training function
def train_tree_lstm(model, train_loader, test_loader, optimizer, loss_fn, epochs, device):
    model.to(device)
    results = {"train_loss": [], "train_acc": [], "test_loss": [], "test_acc": []}

    for epoch in range(epochs):
        model.train()
        train_loss, train_correct = 0, 0

        for batch in train_loader:
            trees, labels = zip(*batch)
            optimizer.zero_grad()
            preds = torch.stack([model(tree) for tree in trees])
            labels_tensor = torch.tensor(labels, dtype=torch.float32, device=device)
            loss = loss_fn(preds, labels_tensor)
            loss.backward()
            optimizer.step()

            train_loss += loss.item()
            train_correct += (torch.round(torch.sigmoid(preds)) == labels_tensor).sum().item()

        train_loss /= len(train_loader)
        train_acc = train_correct / len(train_loader.dataset)

        model.eval()
        test_loss, test_correct = 0, 0
        with torch.no_grad():
            for batch in test_loader:
                trees, labels = zip(*batch)
                preds = torch.stack([model(tree) for tree in trees])
                labels_tensor = torch.tensor(labels, dtype=torch.float32, device=device)
                loss = loss_fn(preds, labels_tensor)
                test_loss += loss.item()
                test_correct += (torch.round(torch.sigmoid(preds)) == labels_tensor).sum().item()

        test_loss /= len(test_loader)
        test_acc = test_correct / len(test_loader.dataset)

        results["train_loss"].append(train_loss)
        results["train_acc"].append(train_acc)
        results["test_loss"].append(test_loss)
        results["test_acc"].append(test_acc)

        print(f"Epoch {epoch+1} | Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}, "
              f"Test Loss: {test_loss:.4f}, Test Acc: {test_acc:.4f}")
    
    return results


In [49]:
# 1. Prepara i dati
train_loader, test_loader, X_train, X_test, y_train, y_test = prepare_tree_dataset(
    parsed_formulas, truth_values, tokenizer, test_size=0.2, batch_size=32)

# 2. Inizializza il modello
model = TreeLSTMClassifier(
    vocab_size=len(tokenizer.formula_to_token) + 10,  # aggiungi margine
    embedding_dim=32,
    hidden_dim=64
)

# 3. Imposta optimizer e loss
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
loss_fn = torch.nn.BCEWithLogitsLoss()
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# 4. Addestra il modello
results = train_tree_lstm(
    model=model,
    train_loader=train_loader,
    test_loader=test_loader,
    optimizer=optimizer,
    loss_fn=loss_fn,
    epochs=10,
    device=device
)

/pytorch/aten/src/ATen/native/cuda/Indexing.cu:1500: indexSelectSmallIndex: block: [0,0,0], thread: [0,0,0] Assertion `srcIndex < srcSelectDimSize` failed.
/pytorch/aten/src/ATen/native/cuda/Indexing.cu:1500: indexSelectSmallIndex: block: [0,0,0], thread: [1,0,0] Assertion `srcIndex < srcSelectDimSize` failed.
/pytorch/aten/src/ATen/native/cuda/Indexing.cu:1500: indexSelectSmallIndex: block: [0,0,0], thread: [2,0,0] Assertion `srcIndex < srcSelectDimSize` failed.
/pytorch/aten/src/ATen/native/cuda/Indexing.cu:1500: indexSelectSmallIndex: block: [0,0,0], thread: [3,0,0] Assertion `srcIndex < srcSelectDimSize` failed.
/pytorch/aten/src/ATen/native/cuda/Indexing.cu:1500: indexSelectSmallIndex: block: [0,0,0], thread: [4,0,0] Assertion `srcIndex < srcSelectDimSize` failed.
/pytorch/aten/src/ATen/native/cuda/Indexing.cu:1500: indexSelectSmallIndex: block: [0,0,0], thread: [5,0,0] Assertion `srcIndex < srcSelectDimSize` failed.
/pytorch/aten/src/ATen/native/cuda/Indexing.cu:1500: indexSelect

RuntimeError: CUDA error: device-side assert triggered
CUDA kernel errors might be asynchronously reported at some other API call, so the stacktrace below might be incorrect.
For debugging consider passing CUDA_LAUNCH_BLOCKING=1
Compile with `TORCH_USE_CUDA_DSA` to enable device-side assertions.
