# Apply Perturbations

In [1]:
import random
import pandas as pd
import re
import string

In [2]:
# Run load_dataset.py for this data
df = pd.read_json("sensitivity.json")

## Toggle quantifier

In [3]:
def toggle_quantifier(formula):
    # The negation symbol to look for
    all_symbol = "∀"
    every_symbol = "∃"

    # Check if the formula starts with the negation symbol
    if all_symbol in formula:
        return formula.replace(all_symbol, every_symbol)
    else:
        return formula.replace(every_symbol, all_symbol)

# swap quantifiers if they are available
df['quantifier'] = df['premisesFOL'].apply(toggle_quantifier)

## Remove Negations

In [4]:
def toggle_negation(formula):
    # Define regex patterns for detecting negation
    negation_pattern = '¬'

    if negation_pattern in formula:
        formula = formula.replace(negation_pattern, '!')

    pred_pattern = r'\b[A-Za-z][A-Za-z0-9]*\([^()]*\)'

    def add_negation(match):
        return '¬' + match.group(0)

    # Substitute the match with the negated match
    modified_sentence = re.sub(pred_pattern, add_negation, formula)
    final = modified_sentence.replace('!¬','').replace('!(¬','(').replace('!','')

    return final

# remove negation if available
df['negation'] = df['premisesFOL'].apply(toggle_negation)

## Toggle Operators

In [5]:
def toggle_operators(formula):
    # The negation symbol to look for
    and_symbol = "∧"
    or_symbol = "∨"

    if and_symbol in formula and or_symbol in formula:
      return formula.replace(and_symbol, ';').replace(or_symbol, and_symbol).replace(';',or_symbol)

    elif and_symbol in formula:
      return formula.replace(and_symbol, or_symbol)
    elif or_symbol in formula:
      return formula.replace(or_symbol, and_symbol)
    else:
      return formula

# Swap and and or symbold where available
df['AndOr'] = df['premisesFOL'].apply(toggle_operators)

In [6]:
def toggle_xor(formula):
    # The negation symbol to look for
    and_symbol = "⊕"
    or_symbol = "∨"

    if and_symbol in formula and or_symbol in formula:
      return formula.replace(and_symbol, ';').replace(or_symbol, and_symbol).replace(';',or_symbol)

    elif and_symbol in formula:
      return formula.replace(and_symbol, or_symbol)
    elif or_symbol in formula:
      return formula.replace(or_symbol, and_symbol)
    else:
      return formula

# Swap or and xor symbols where available
df['OrXor'] = df['premisesFOL'].apply(toggle_xor)

## Get predicates

In [7]:
def extract_predicates(fol_statement):
    # Regular expression pattern to match predicates
    pattern = r'\b[A-Za-z][A-Za-z0-9]*\([^()]*\)'

    # Find all predicate matches in the statement
    predicates = re.findall(pattern, fol_statement)
    generalized_predicates = []

    for predicate in predicates:
        # Split arguments on commas and strip spaces
        args = predicate.split('(')[-1].split(')')[0].split(',')
        args = [arg.strip() for arg in args]

        # Replace each argument with a placeholder variable
        if len(args)>3:
          new_args = [chr(110 + i) for i in range(len(args))]
        else:
          new_args = [chr(120 + i) for i in range(len(args))]  # 120 is ASCII for 'x'

        # Get predicate name (assuming predicate format is "Name(args)")
        predicate_name = predicate.split('(')[0]

        # Rebuild the predicate with new variable placeholders
        new_predicate = f"{predicate_name}({', '.join(new_args)})"
        generalized_predicates.append(new_predicate)

    # Remove duplicates by converting the list to a set and then back to a list
    unique_predicates = list(set(generalized_predicates))

    if len(unique_predicates)==0:
      return fol_statement

    return f"{'∨ '.join(unique_predicates)}"

# Get predicate values for each FOL nad join them using and and operator
df['operator'] = df['premisesFOL'].apply(extract_predicates)

## Change Predicates

In [8]:
import re

def change_predicates(formula):
    # Define the pattern to match predicates with negation
    pred_pattern = r'¬\b[A-Za-z][A-Za-z0-9]*\('

    def replace_negation_with_not(match):
        # Replace the negation symbol '¬' with 'Not'
        return 'Not' + match.group(0)[1:]  # Skip the first character (¬) and prepend 'Not'

    # Substitute the match with 'Not' + predicate
    modified_sentence = re.sub(pred_pattern, replace_negation_with_not, formula)

    return modified_sentence

# change predicate to "Not"predicate where the not condition is present
df['predicate'] = df['premisesFOL'].apply(change_predicates)

## Remove all words

In [9]:
def convert_to_unique_alphabets(expression):
    # Create a list of unique alphabets
    unique_alphabets = list(string.ascii_uppercase)

    # Function to generate unique names
    def unique_name(index):
        return unique_alphabets[index]

    # Define a regex pattern to identify variables and constants
    pattern = r'\b[a-zA-Z_]\w*\b'  # Matches variables and constants

    # Find all variables and constants in the expression
    matches = set(re.findall(pattern, expression))

    # Remove any common variable names to avoid conflicts
    if 'x' in matches:
        matches.remove('x')
    if 'y' in matches:
        matches.remove('y')

    # Generate a mapping of each variable/constant to a unique alphabet
    name_mapping = {}
    for i, item in enumerate(matches):
        if i >= len(unique_alphabets):
            raise ValueError("Too many unique identifiers needed.")
        name_mapping[item] = unique_name(i)

    # Function to replace variables/constants with unique alphabet
    def replace_match(match):
        return name_mapping.get(match.group(0), match.group(0))

    # Replace variables/constants in the expression
    pattern = r'\b[a-zA-Z_]\w*\b'
    new_expression = re.sub(pattern, replace_match, expression)

    return new_expression

# Example usage
original_expression = '∀x (Apartment(x) ∧ LiveIn(ava, x) ∧ ¬Cover(x, water) ∧ ¬Cover(x, electricity) ∧ ¬Cover(x, gas) ∧ ¬Cover(x, heating))'
new_expression = convert_to_unique_alphabets(original_expression)
print(new_expression)


∀x (D(x) ∧ H(G, x) ∧ ¬F(x, C) ∧ ¬F(x, E) ∧ ¬F(x, B) ∧ ¬F(x, A))


In [10]:
# Use generic predicate names
df['variable'] = df['premisesFOL'].apply(convert_to_unique_alphabets)

In [11]:
# Save the data
df.to_json("perturbations.json", orient='records', indent=4)

## Get Stats

In [12]:
df.columns

Index(['premisesFOL', 'premisesNL', 'operators', 'count', 'quantifier',
       'negation', 'AndOr', 'OrXor', 'operator', 'predicate', 'variable'],
      dtype='object')

In [13]:
for column in ['quantifier', 'negation', 'AndOr', 'OrXor', 'operator', 'predicate', 'variable']:
    match_count = df.apply(lambda x: 1 if x['premisesFOL'] == x[column] else 0, axis=1).sum()
    match_percentage = (1-match_count / len(df)) * 100
    print(f'{column}: {round(match_percentage,2)}')


quantifier: 69.9
negation: 99.03
AndOr: 59.22
OrXor: 32.04
operator: 98.06
predicate: 22.33
variable: 100.0
