In [None]:
!pip install spacy nltk pandas tqdm
!python -m spacy download en_core_web_sm
import nltk
nltk.download("wordnet")
nltk.download("omw-1.4")

Collecting en-core-web-sm==3.8.0
  Downloading https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.8.0/en_core_web_sm-3.8.0-py3-none-any.whl (12.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m12.8/12.8 MB[0m [31m59.8 MB/s[0m eta [36m0:00:00[0m
[?25h[38;5;2m Download and installation successful[0m
You can now load the package via spacy.load('en_core_web_sm')
[38;5;3m Restart to reload dependencies[0m
If you are in a Jupyter or Colab notebook, you may need to restart Python in
order to load all the package's dependencies. You can do this by selecting the
'Restart kernel' or 'Restart runtime' option.


[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package omw-1.4 to /root/nltk_data...
[nltk_data]   Package omw-1.4 is already up-to-date!


True

In [None]:
import os
import json
import random
import re
from pathlib import Path
import pandas as pd
import spacy
from nltk.corpus import wordnet as wn
import nltk
from tqdm import tqdm
nlp = spacy.load("en_core_web_sm")
import inflect

In [None]:
# Initialize the inflect engine for singular/plural conversions
p = inflect.engine()

In [None]:
COMPLEX_PATTERNS = {
    "premises": {
        "all": [
            # 1. Plural Subject (A_pl) → Plural Predicate (B_pl) - (Your original 'Pure plural → plural' group)
            {"tpl": "All {a_pl} are {b_pl}.", "a_type": "plural", "b_type": "plural"},
            {"tpl": "All {a_pl} are, in fact, {b_pl}.", "a_type": "plural", "b_type": "plural"},
            {"tpl": "All {a_pl} belong to the class of {b_pl}.", "a_type": "plural", "b_type": "plural"},
            {"tpl": "All {a_pl} belong to the category of {b_pl}.", "a_type": "plural", "b_type": "plural"},
            {"tpl": "All {a_pl} are, by definition, {b_pl}.", "a_type": "plural", "b_type": "plural"},
            {"tpl": "Without exception, all {a_pl} are {b_pl}.", "a_type": "plural", "b_type": "plural"},
            {"tpl": "{a_pl} are, without exception, {b_pl}.", "a_type": "plural", "b_type": "plural"},
            {"tpl": "Any and all {a_pl} are {b_pl}.", "a_type": "plural", "b_type": "plural"},

            # 2. Plural Subject (Each of the A_pl) → Singular/Article Predicate (B_sg_art or B_sg) - (Your original 'Plural → singular' group)
            # Note: We use {b_sg_art} to ensure correct grammar (e.g., "...is an operator" not "...is operator").
            {"tpl": "Each of the {a_pl} is, in fact, {b_sg_art}.", "a_type": "plural", "b_type": "singular_art"},
            {"tpl": "Every single {a_pl} is {b_sg_art}.", "a_type": "plural", "b_type": "singular_art"},

            # 3. Singular Subject (A_sg) → Singular/Article Predicate (B_sg_art or B_sg) - (Your original 'Singular → singular' group)
            {"tpl": "Every {a_sg} is {b_sg_art}.", "a_type": "singular", "b_type": "singular_art"},
            {"tpl": "Every {a_sg} belongs to the category of {b_sg_art}.", "a_type": "singular", "b_type": "singular_art"},
            {"tpl": "Every {a_sg} is classified as a type of {b_sg}.", "a_type": "singular", "b_type": "singular"},
            {"tpl": "Each and every {a_sg} is a type of {b_sg}.", "a_type": "singular", "b_type": "singular"},
            {"tpl": "Every {a_sg} is also {b_sg_art}.", "a_type": "singular", "b_type": "singular_art"},
            {"tpl": "Every single {a_sg} falls into the classification of {b_sg}.", "a_type": "singular", "b_type": "singular"},
            {"tpl": "Every single {a_sg} is inherently {b_sg_art}.", "a_type": "singular", "b_type": "singular_art"},
            {"tpl": "Each and every {a_sg} is {b_sg_art}.", "a_type": "singular", "b_type": "singular_art"},
            {"tpl": "Anything that is {a_sg_art} is {b_sg_art}.", "a_type": "singular_art", "b_type": "singular_art"},
            {"tpl": "It is true that every {a_sg} is {b_sg_art}.", "a_type": "singular", "b_type": "singular_art"},
            {"tpl": "Everything that is {a_sg_art} is {b_sg_art}.", "a_type": "singular_art", "b_type": "singular_art"},
            {"tpl": "It is a fact that every {a_sg} is {b_sg_art}.", "a_type": "singular", "b_type": "singular_art"},
            {"tpl": "Every single {a_sg} is {b_sg_art}.", "a_type": "singular", "b_type": "singular_art"},
            {"tpl": "Each {a_sg}, without exception, is {b_sg_art}.", "a_type": "singular", "b_type": "singular_art"},
            {"tpl": "Any {a_sg} is {b_sg_art}.", "a_type": "singular", "b_type": "singular_art"},
            {"tpl": "Every item classified as {a_sg} is inherently {b_sg_art}.", "a_type": "singular", "b_type": "singular_art"},
            {"tpl": "Anything that is {a_sg_art} invariably falls into the category of {b_sg_art}.", "a_type": "singular_art", "b_type": "singular_art"},
            {"tpl": "Anything that can be called {a_sg} is, without exception, {b_sg_art}.", "a_type": "singular", "b_type": "singular_art"},
            {"tpl": "{a_sg_art} is {b_sg_art}.", "a_type": "singular_art", "b_type": "singular_art"},
            {"tpl": "{a_sg} is, without exception, {b_sg_art}.", "a_type": "singular", "b_type": "singular_art"},
            {"tpl": "Every single {a_sg} is, in fact, {b_sg_art}.", "a_type": "singular", "b_type": "singular_art"},
            {"tpl": "Every {a_sg} can be described as {b_sg_art}.", "a_type": "singular", "b_type": "singular_art"},
            {"tpl": "It is the case that all {a_pl} are {b_pl}.", "a_type": "plural", "b_type": "plural"},
            {"tpl": "There is nothing that is {a_sg_art} that is not also {b_sg_art}.", "a_type": "singular_art", "b_type": "singular_art"},
            {"tpl": "It is the case that every {a_sg} is {b_sg_art}.", "a_type": "singular", "b_type": "singular_art"},

            # 4. Singular Subject (A_sg) → Plural Predicate (B_pl)
            {"tpl": "Every single {a_sg} belongs to {b_pl}.", "a_type": "singular", "b_type": "plural"}
        ],
        "some": [
            # 1. Standard Plural Subject (A_pl) → Plural Predicate (B_pl)
            {"tpl": "Some {a_pl} are {b_pl}.", "a_type": "plural", "b_type": "plural"},
            {"tpl": "There are some {a_pl} that are considered {b_pl}.", "a_type": "plural", "b_type": "plural"},
            {"tpl": "There are certain {a_pl} that can be classified as {b_pl}.", "a_type": "plural", "b_type": "plural"},
            {"tpl": "Some {a_pl} are known to be {b_pl}.", "a_type": "plural", "b_type": "plural"},
            {"tpl": "Certain {a_pl} belong to the category of {b_pl}.", "a_type": "plural", "b_type": "plural"},
            {"tpl": "A portion of {a_pl} are considered {b_pl}.", "a_type": "plural", "b_type": "plural"},
            {"tpl": "A fraction of all {a_pl} are classified as {b_pl}.", "a_type": "plural", "b_type": "plural"},
            {"tpl": "A subset of {a_pl} are considered {b_pl}.", "a_type": "plural", "b_type": "plural"},
            {"tpl": "There exist {a_pl} that are also {b_pl}.", "a_type": "plural", "b_type": "plural"},
            {"tpl": "Among the {a_pl}, a certain number of them are {b_pl}.", "a_type": "plural", "b_type": "plural"},
            {"tpl": "A subset of {a_pl} includes {b_pl}.", "a_type": "plural", "b_type": "plural"},
            {"tpl": "It is true that some {a_pl} are {b_pl}.", "a_type": "plural", "b_type": "plural"},
            {"tpl": "There are some {a_pl} that are {b_pl}.", "a_type": "plural", "b_type": "plural"},
            {"tpl": "Among the group of {a_pl}, some individuals are also {b_pl}.", "a_type": "plural", "b_type": "plural"},
            {"tpl": "There are {a_pl} that are also {b_pl}.", "a_type": "plural", "b_type": "plural"},
            {"tpl": "A subset of {a_pl} constitutes {b_pl}.", "a_type": "plural", "b_type": "plural"},
            {"tpl": "A certain number of {a_pl} are {b_pl}.", "a_type": "plural", "b_type": "plural"},
            {"tpl": "A few {a_pl} are {b_pl}.", "a_type": "plural", "b_type": "plural"},
            {"tpl": "A portion of {a_pl} are {b_pl}.", "a_type": "plural", "b_type": "plural"},
            {"tpl": "Among the {a_pl}, a few are {b_pl}.", "a_type": "plural", "b_type": "plural"},
            {"tpl": "There are {a_pl}, and some of them are {b_pl}.", "a_type": "plural", "b_type": "plural"},
            {"tpl": "A number of {a_pl} are {b_pl}.", "a_type": "plural", "b_type": "plural"},
            {"tpl": "A subset of items that are {a_pl} are {b_pl}.", "a_type": "plural", "b_type": "plural"},
            {"tpl": "A few {a_pl} are, in fact, {b_pl}.", "a_type": "plural", "b_type": "plural"},
            {"tpl": "Some {a_pl} are, without a doubt, {b_pl}.", "a_type": "plural", "b_type": "plural"},
            {"tpl": "A group of {a_pl} can be described as {b_pl}.", "a_type": "plural", "b_type": "plural"},
            {"tpl": "A group of {a_pl} are {b_pl}.", "a_type": "plural", "b_type": "plural"},
            {"tpl": "Some {a_pl} happen to be {b_pl}.", "a_type": "plural", "b_type": "plural"},
            {"tpl": "Among {a_pl}, there are some that are {b_pl}.", "a_type": "plural", "b_type": "plural"},
            {"tpl": "Certain {a_pl} exist that are also {b_pl}.", "a_type": "plural", "b_type": "plural"},
            {"tpl": "The group of {a_pl} includes some that are {b_pl}.", "a_type": "plural", "b_type": "plural"},
            {"tpl": "A small number of {a_pl} are {b_pl}.", "a_type": "plural", "b_type": "plural"},
            {"tpl": "The class of {a_pl} and the class of {b_pl} have elements in common.", "a_type": "plural", "b_type": "plural"},
            {"tpl": "Of the items that are {a_pl}, a portion of them are {b_pl}.", "a_type": "plural", "b_type": "plural"},
            {"tpl": "Something that is {a_sg_art} is {b_sg_art}.", "a_type": "singular_art", "b_type": "singular_art"},
            {"tpl": "There are no {a_pl} that are not {b_pl}.", "a_type": "plural", "b_type": "plural"},
            {"tpl": "It is not the case that {a_pl} are non-{b_pl}.", "a_type": "plural", "b_type": "plural"},
            {"tpl": "At least one quantity of {a_sg} is {b_sg}.", "a_type": "singular", "b_type": "singular"}
        ],
        "no": [
            # 1. Plural Subject (A_pl) → Plural Predicate (B_pl)
            # This is the most common and grammatically flexible form (e.g., No drivers are operators).
            {"tpl": "No {a_pl} are {b_pl}.", "a_type": "plural", "b_type": "plural"},
            {"tpl": "It is certain that no {a_pl} belong to the class of {b_pl}.", "a_type": "plural", "b_type": "plural"},
            {"tpl": "No {a_pl} belongs to the category of {b_pl}.", "a_type": "plural", "b_type": "plural"},
            {"tpl": "{a_pl} are never {b_pl}.", "a_type": "plural", "b_type": "plural"},
            {"tpl": "No {a_pl} are indeed {b_pl}.", "a_type": "plural", "b_type": "plural"},
            {"tpl": "{a_pl} are in no way {b_pl}.", "a_type": "plural", "b_type": "plural"},
            {"tpl": "There are no {a_pl} which are {b_pl}.", "a_type": "plural", "b_type": "plural"},
            {"tpl": "There are no {a_pl} that are {b_pl}.", "a_type": "plural", "b_type": "plural"},
            {"tpl": "It is true that no {a_pl} are {b_pl}.", "a_type": "plural", "b_type": "plural"},
            {"tpl": "Absolutely no {a_pl} are considered {b_pl}.", "a_type": "plural", "b_type": "plural"},
            {"tpl": "{a_pl} are not {b_pl}.", "a_type": "plural", "b_type": "plural"},
            {"tpl": "{a_pl} are non-{b_pl}.", "a_type": "plural", "b_type": "plural"},
            {"tpl": "Absolutely no {a_pl} constitute {b_pl}.", "a_type": "plural", "b_type": "plural"},
            {"tpl": "{a_pl} are entirely distinct from {b_pl}.", "a_type": "plural", "b_type": "plural"},
            {"tpl": "{a_pl} include no {b_pl} at all.", "a_type": "plural", "b_type": "plural"},
            {"tpl": "The set of {a_pl} contains no {b_pl}.", "a_type": "plural", "b_type": "plural"},
            {"tpl": "{a_pl} are completely separate from {b_pl}.", "a_type": "plural", "b_type": "plural"},
            {"tpl": "{a_pl} and {b_pl} do not overlap.", "a_type": "plural", "b_type": "plural"},
            {"tpl": "{a_pl} are not, in any way, {b_pl}.", "a_type": "plural", "b_type": "plural"},
            {"tpl": "{a_pl} and {b_pl} have no overlap whatsoever.", "a_type": "plural", "b_type": "plural"},
            {"tpl": "None of the {a_pl} are {b_pl}.", "a_type": "plural", "b_type": "plural"},
            {"tpl": "The class of {a_pl} and the class of {b_pl} do not overlap.", "a_type": "plural", "b_type": "plural"},
            {"tpl": "The set of {a_pl} and the set of {b_pl} are mutually exclusive.", "a_type": "plural", "b_type": "plural"},
            {"tpl": "The category of {a_pl} and the category of {b_pl} have no members in common.", "a_type": "plural", "b_type": "plural"},

            # 2. Singular Subject (A_sg) → Singular/Article Predicate (B_sg_art)
            # Used for emphasis on the individual (e.g., Not a single driver is an operator).
            {"tpl": "Not a single {a_sg} can be considered {b_sg_art}.", "a_type": "singular", "b_type": "singular_art"},
            {"tpl": "It is impossible for any {a_sg} to be {b_sg_art}.", "a_type": "singular", "b_type": "singular_art"},
            {"tpl": "No {a_sg} is a type of {b_sg}.", "a_type": "singular", "b_type": "singular"},
            {"tpl": "There are no instances where {a_sg_art} is also {b_sg_art}.", "a_type": "singular_art", "b_type": "singular_art"},
            {"tpl": "{a_sg_art} cannot be {b_sg_art}.", "a_type": "singular_art", "b_type": "singular_art"},
            {"tpl": "There is no {a_sg} that is {b_sg_art}.", "a_type": "singular", "b_type": "singular_art"},
            {"tpl": "Not one single {a_sg} is {b_sg_art}.", "a_type": "singular", "b_type": "singular_art"},
            {"tpl": "There is not a single {a_sg} that is {b_sg_art}.", "a_type": "singular", "b_type": "singular_art"},
            {"tpl": "{a_sg_art} is never {b_sg_art}.", "a_type": "singular_art", "b_type": "singular_art"},
            {"tpl": "It is impossible for {a_sg_art} to be {b_sg_art}.", "a_type": "singular_art", "b_type": "singular_art"},
            {"tpl": "It is a fact that no creature classified as {a_sg} is also {b_sg_art}.", "a_type": "singular", "b_type": "singular_art"},
            {"tpl": "Nothing that is {a_sg_art} is {b_sg_art}.", "a_type": "singular_art", "b_type": "singular_art"},
            {"tpl": "A {a_sg} can never be {b_sg_art}.", "a_type": "singular", "b_type": "singular_art"},
            {"tpl": "No {a_sg} can be called {b_sg_art}.", "a_type": "singular", "b_type": "singular_art"},
            {"tpl": "Not a single {a_sg} is {b_sg_art}.", "a_type": "singular", "b_type": "singular_art"},
            {"tpl": "It is the case that no {a_sg} is {b_sg_art}.", "a_type": "singular", "b_type": "singular_art"},
            {"tpl": "It is true that no {a_sg} is {b_sg_art}.", "a_type": "singular", "b_type": "singular_art"},
            {"tpl": "It is not the case that some {a_sg} are {b_sg_art}.", "a_type": "singular", "b_type": "singular_art"},
            {"tpl": "Absolutely no {a_sg} is also {b_sg_art}.", "a_type": "singular", "b_type": "singular_art"},
            {"tpl": "It is not true that any {a_sg} is {b_sg_art}.", "a_type": "singular", "b_type": "singular_art"},
            {"tpl": "Every single {a_sg} is not {b_sg_art}.", "a_type": "singular", "b_type": "singular_art"},
            {"tpl": "Things that are {a_pl} are not {b_pl}.", "a_type": "plural", "b_type": "plural"},
            {"tpl": "Every {a_sg} is not {b_sg_art}.", "a_type": "singular", "b_type": "singular_art"}

        ],
        "some_not": [
            # 1. Plural Subject (A_pl) → Plural Predicate (B_pl)
            # The most direct phrasing (e.g., Some drivers are not operators).
            {"tpl": "Some {a_pl} are not considered {b_pl}.", "a_type": "plural", "b_type": "plural"},
            {"tpl": "There are {a_pl} that are not categorized as {b_pl}.", "a_type": "plural", "b_type": "plural"},
            {"tpl": "There are certain {a_pl} that are not {b_pl}.", "a_type": "plural", "b_type": "plural"},
            {"tpl": "There are certain {a_pl} that are non-{b_pl}.", "a_type": "plural", "b_type": "plural"},
            {"tpl": "Among instances of {a_pl}, some do not possess the attribute of being {b_pl}.", "a_type": "plural", "b_type": "plural"},
            {"tpl": "A subset of {a_pl} exists that are not {b_pl}.", "a_type": "plural", "b_type": "plural"},
            {"tpl": "A subset of {a_pl} exists that are non-{b_pl}.", "a_type": "plural", "b_type": "plural"},
            {"tpl": "There are some {a_pl} which are not {b_pl}.", "a_type": "plural", "b_type": "plural"},
            {"tpl": "There are some {a_pl} which are non-{b_pl}.", "a_type": "plural", "b_type": "plural"},
            {"tpl": "There exist some things which are {a_pl} that are not {b_pl}.", "a_type": "plural", "b_type": "plural"},
            {"tpl": "There exist some things which are {a_pl} that are non-{b_pl}.", "a_type": "plural", "b_type": "plural"},
            {"tpl": "Some {a_pl} do not fall under the classification of {b_pl}.", "a_type": "plural", "b_type": "plural"},

            # 2. Singular Subject (A_sg) → Singular/Article Predicate (B_sg_art)
            # These use the equivalent "Not every A is B" structure (e.g., Not every driver is an operator).
            {"tpl": "Not every {a_sg} is {b_sg_art}.", "a_type": "singular", "b_type": "singular_art"},
            {"tpl": "It is not true that every {a_sg} is {b_sg_art}.", "a_type": "singular", "b_type": "singular_art"},
            {"tpl": "At least one {a_sg} is not {b_sg_art}.", "a_type": "singular", "b_type": "singular_art"},
            {"tpl": "At least one {a_sg} is non-{b_sg}.", "a_type": "singular", "b_type": "singular_art"},
            {"tpl": "There is at least one {a_sg} that is not {b_sg_art}.", "a_type": "singular", "b_type": "singular_art"},
            {"tpl": "There is at least one {a_sg} that is non-{b_sg}.", "a_type": "singular", "b_type": "singular_art"},
            {"tpl": "Of the items that are {a_pl}, some are non-{b_pl}.", "a_type": "plural", "b_type": "plural"}
        ]
    },

    "conclusions": [
        "Therefore, {conclusion}.",
        "It is true that {conclusion}.",
        "Consequently, {conclusion}.",
        "Consequently, it can be said that {conclusion}.",
        "Consequently, it follows that {conclusion}.",
        "Consequently, it must be true that {conclusion}.",
        "Consequently, we can conclude that {conclusion}.",
        "Consequently, it is possible to infer that {conclusion}.",
        "Consequently, it can be concluded that {conclusion}.",
        "As a result, {conclusion}.",
        "As a result, it must be that {conclusion}.",
        "As a result, we can conclude that {conclusion}.",
        "It logically follows that {conclusion}.",
        "Thus, {conclusion}.",
        "Thus, it logically follows that {conclusion}.",
        "Thus, it can be concluded that {conclusion}.",
        "Thus, it is concluded that {conclusion}.",
        "Thus, it must be true that {conclusion}.",
        "Thus, we can validly conclude that {conclusion}.",
        "Thus, it is the case that {conclusion}.",
        "It follows that {conclusion}.",
        "From this, it follows that {conclusion}.",
        "It follows, then, that {conclusion}.",
        "Based on this, it must be the case that {conclusion}.",
        "This means that {conclusion}.",
        "It is therefore the case that {conclusion}.",
        "From this, it is concluded that {conclusion}.",
        "This implies that {conclusion}.",
        "As a consequence, {conclusion}.",
        "It follows from this that {conclusion}.",
        "It can be deduced that {conclusion}.",
        "The only conclusion is that {conclusion}.",
        "Hence, it can be concluded that {conclusion}.",
        "One must conclude that {conclusion}.",
        "This proves that {conclusion}.",
        "This leads to the conclusion that {conclusion}.",
        "From this, it can be concluded that {conclusion}.",
        "It is necessarily concluded that {conclusion}.",
        "It is concluded that {conclusion}.",
        "As such, it is necessarily true that {conclusion}.",
        "The result of this is that {conclusion}.",
        "It is a necessary conclusion that {conclusion}.",
        "It is necessarily true that {conclusion}.",
        "This has led to the conclusion that {conclusion}.",
        "It is thus the case that {conclusion}.",
        "A conclusion that can be drawn from this is that {conclusion}.",
        "It must be the case that {conclusion}.",
        "From these facts, it is clear that {conclusion}.",
        "It is therefore true that {conclusion}.",
        "From this, it can be claimed that {conclusion}.",
        "This logically means that {conclusion}.",
        "This is why {conclusion}.",
        "It is therefore a fact that {conclusion}.",
        "From this, it can be stated that {conclusion}.",
        "It can be concluded that {conclusion}.",
        "It must follow that {conclusion}.",
        "It necessarily follows that {conclusion}.",
        "This necessitates the conclusion that {conclusion}.",
        "It is the case that {conclusion}.",
        "A valid inference is that {conclusion}.",
        "From this, it is suggested that {conclusion}.",
        "It therefore stands that {conclusion}.",
        "So, {conclusion}.",
        "For this reason, {conclusion}.",
        "This necessitates that {conclusion}.",
        "On this basis, it has been stated that {conclusion}.",
        "Hence, {conclusion}.",
        "It has been said that {conclusion}.",
        "We can conclude from this that {conclusion}.",
        "It can be logically concluded that {conclusion}.",
        "Based on this, {conclusion}.",
        "This suggests that {conclusion}.",
        "From these statements, it can be concluded that {conclusion}.",
        "It is a logical necessity that {conclusion}.",
        "We can conclude that {conclusion}.",
        "It follows directly that {conclusion}.",
        "It must be true that {conclusion}.",
        "One might conclude that {conclusion}.",
        "One can therefore conclude that {conclusion}.",
        "It must therefore be true that {conclusion}.",
        "This means {conclusion}.",
        "It is therefore suggested that {conclusion}.",
        "It has been proposed that {conclusion}.",
        "One can conclude that {conclusion}.",
        "One can therefore conclude that {conclusion}.",
        "It's the case that {conclusion}.",
        "It is a logical consequence that {conclusion}.",
        "We must conclude that {conclusion}.",
        "It is therefore a sound deduction that {conclusion}.",
        "A consequence is that {conclusion}.",
        "This makes it true that {conclusion}.",
        "It therefore follows that {conclusion}.",
        "From this, {conclusion}.",
        "The consequence is that {conclusion}.",
        "Based on this, {conclusion}.",
        "The conclusion that {conclusion} is inescapable.",
        "Therefore, it is implied that {conclusion}."
    ]
}

In [None]:
COMPLEX_PATTERNS_WITH_FOURTH_VAR = {
    "premises": {
        "all": [
            {"tpl": "Every {d_sg} that is {a_sg_art} is also {b_sg_art}.", "a_type": "singular_art", "b_type": "singular_art", "d_type": "singular"},
            {"tpl": "If {d_sg_art} is {a_sg_art}, then it must be {b_sg_art}.", "a_type": "singular_art", "b_type": "singular_art", "d_type": "singular_art"},
            {"tpl": "There is no {d_sg} that is {a_sg_art} which is not also {b_sg_art}.", "a_type": "singular_art", "b_type": "singular_art", "d_type": "singular"},
            {"tpl": "Whatever is both {d_sg_art} and {a_sg_art} is necessarily {b_sg_art}.", "a_type": "singular_art", "b_type": "singular_art", "d_type": "singular_art"},
        ],
        "some": [
            {"tpl": "There exists at least one {d_sg} that is both {a_sg_art} and {b_sg_art}.", "a_type": "singular_art", "b_type": "singular_art", "d_type": "singular"},
            {"tpl": "At least one {d_sg} that is {a_sg_art} is {b_sg_art}.", "a_type": "singular_art", "b_type": "singular_art", "d_type": "singular"},
            {"tpl": "Some things are simultaneously {d_sg}, {a_sg}, and {b_sg}.", "a_type": "singular", "b_type": "singular", "d_type": "singular"},
            {"tpl": "In some cases, {d_sg_art} that is {a_sg_art} is also {b_sg_art}.", "a_type": "singular_art", "b_type": "singular_art", "d_type": "singular_art"},
            {"tpl": "It is not the case that no {d_sg} that is {a_sg_art} is {b_sg_art}.", "a_type": "singular_art", "b_type": "singular_art", "d_type": "singular"},
            {"tpl": "A portion of the {d_pl} that are {a_pl} are also {b_sg}.", "a_type": "plural", "b_type": "singular", "d_type": "plural"}
        ],
        "no": [
            {"tpl": "None of the {d_pl} that are {a_pl} are {b_pl}.", "a_type": "plural", "b_type": "plural", "d_type": "plural"},
            {"tpl": "Being {b_sg_art} and being {d_sg_art} that is {a_sg_art} are mutually exclusive.", "a_type": "singular_art", "b_type": "singular_art", "d_type": "singular_art"},
            {"tpl": "If {d_sg_art} is {a_sg_art}, it cannot be {b_sg_art}.", "a_type": "singular_art", "b_type": "singular_art", "d_type": "singular_art"},
            {"tpl": "The class of {d_pl} that are {a_pl} contains zero members of {b_pl}.", "a_type": "plural", "b_type": "plural", "d_type": "plural"},
            {"tpl": "Nothing is at once {d_sg_art}, {a_sg_art}, and {b_sg_art}.", "a_type": "singular_art", "b_type": "singular_art", "d_type": "singular_art"},
            {"tpl": "There are no {d_pl} that are {a_sg} and also {b_sg}.", "a_type": "singular", "b_type": "singular", "d_type": "plural"}
        ],
        "some_not": [
            {"tpl": "Not every {d_sg} that is {a_sg_art} is {b_sg_art}.", "a_type": "singular_art", "b_type": "singular_art", "d_type": "singular"},
            {"tpl": "There is at least one {d_sg} that is {a_sg_art} but is not {b_sg_art}.", "a_type": "singular_art", "b_type": "singular_art", "d_type": "singular"},
            {"tpl": "Some members of the '{d_pl} and {a_pl}' group are excluded from {b_pl}.", "a_type": "plural", "b_type": "plural", "d_type": "plural"},
            {"tpl": "A few {d_pl} that are {a_pl} fail to be {b_pl}.", "a_type": "plural", "b_type": "plural", "d_type": "plural"},
            {"tpl": "While some {d_pl} that are {a_pl} might be {b_pl}, others are definitely not.", "a_type": "plural", "b_type": "plural", "d_type": "plural"},
        ]
    }
}

In [None]:
def get_singular_and_plural(term):
    """
    Uses the inflect library to get the singular and plural forms,
    safely handling cases where inflect returns False.
    """
    term = term.strip()

    # Attempt to get the singular form first
    sg_attempt = p.singular_noun(term)

    # 1. Handle cases where the term is already plural (e.g., 'bottles')
    if sg_attempt is not False:
        sg = sg_attempt
        pl = term
    # 2. Handle cases where the term is singular or inflect can't find a plural (e.g., 'bottle')
    else:
        sg = term
        pl = p.plural_noun(term)

    # Final check: if inflect returned False for the singular attempt, the input was likely singular.
    if sg is False:
        sg = term

    # return sg, pl
    return term, term

In [None]:
def get_article(term):
    """Returns the singular term preceded by 'a' or 'an'."""
    term = term.strip()
    return p.an(term)

In [None]:
def get_basic_syllogism_parts(syllogism):
    """
    Parses a simple syllogism (e.g., 'All A are B. All B are C. All A are C.')
    into its three sentences.
    """
    # Simple split by '. ' which usually works for basic syllogisms
    sentences = [s.strip() for s in syllogism.split('.') if s.strip()]

    return sentences

In [None]:
def parse_sentence_for_type_and_terms(sentence):
    """
    Analyzes a simple sentence (All A are B) and returns its type and the terms A and B.
    It returns terms in their original plural/singular state, which must be converted
    later for the complex template.
    """
    sentence = sentence.strip()

    # Pattern for All (A-Type) with 4th variable
    # E.g., All {d} that are {a} are {b}.
    match = re.match(r"All (.*) that are (.*) are (.*).", sentence)
    if match: return "all", match.group(1), match.group(2), match.group(3)

    # Pattern for Some...Not (O-Type) with 4th variable
    # E.g., Some {d} that are {a} are not {b}.
    match = re.match(r"Some (.*) that are (.*) are not (.*).", sentence)
    if match: return "some_not", match.group(1), match.group(2), match.group(3)

    # Pattern for Some (I-Type) with 4th variable
    # E.g., Some {d} that are {a} are {b}.
    match = re.match(r"Some (.*) that are (.*) are (.*).", sentence)
    if match: return "some", match.group(1), match.group(2), match.group(3)

    # Pattern for No (E-Type) with 4th variable
    # E.g., No {d} that are {a} are {b}.
    match = re.match(r"No (.*) that are (.*) are (.*).", sentence)
    if match: return "no", match.group(1), match.group(2), match.group(3)

    # Pattern for All (A-Type)
    # E.g., All {a} are {b}.
    match = re.match(r"All (.*) are (.*).", sentence)
    if match: return "all", None, match.group(1), match.group(2)

    # Pattern for Some...Not (O-Type)
    # E.g., Some {a} are not {b}.
    match = re.match(r"Some (.*) are not (.*).", sentence)
    if match: return "some_not", None, match.group(1), match.group(2)

    # Pattern for Some (I-Type)
    # E.g., Some {a} are {b}.
    match = re.match(r"Some (.*) are (.*).", sentence)
    if match: return "some", None, match.group(1), match.group(2)

    # Pattern for No (E-Type)
    # E.g., No {a} are {b}.
    match = re.match(r"No (.*) are (.*).", sentence)
    if match: return "no", None, match.group(1), match.group(2)

    # Note: If your simple syllogisms use articles (e.g., "All the dogs are pets"),
    # you might need to adjust the regex to strip them out.
    raise ValueError(f"Could not parse basic syllogism type from sentence: {sentence}")

In [None]:
def split_syllogism_into_sentences(syllogism):
    """
    Splits the syllogism into sentences and separates the conclusion.
    Assumes sentences are separated by a period and the LAST sentence is the conclusion.
    """
    sentences = [s.strip() for s in syllogism.split('.') if s.strip()]

    if len(sentences) < 2:
        raise ValueError(f"Syllogism must contain at least one premise and one conclusion: {syllogism}")

    # Re-add the period to the end of each sentence for the parser to work
    sentences = [s + '.' for s in sentences]

    conclusion = sentences.pop()
    premises = sentences

    return premises, conclusion

In [None]:
def create_sub_dict(term_original_subject, term_original_predicate, term_original_fourth, synonym_dict=None):
    """Creates the substitution map for A and B terms."""
    # We are not using synonym_dict in this final version, so it's safely ignored.

    sub_map = {}

    # Subject terms (A)
    sg_a, pl_a = get_singular_and_plural(term_original_subject)
    sub_map['a_sg'] = sg_a
    sub_map['a_pl'] = pl_a
    sub_map['a_sg_art'] = get_article(sg_a)

    # Predicate terms (B)
    sg_b, pl_b = get_singular_and_plural(term_original_predicate)
    sub_map['b_sg'] = sg_b
    sub_map['b_pl'] = pl_b
    sub_map['b_sg_art'] = get_article(sg_b)

    # Fourth Variable (D)
    if term_original_fourth:
        sg_d, pl_d = get_singular_and_plural(term_original_fourth)
        sub_map['d_sg'] = sg_d
        sub_map['d_pl'] = pl_d
        sub_map['d_sg_art'] = get_article(sg_d)

    return sub_map

In [None]:
def safe_lowercase_first(s, sub_map):
    for val in sub_map.values():
        if s.startswith(val):
            return s  # do NOT lowercase
    return s[:1].lower() + s[1:]


In [None]:
def convert_syllogism_to_complex(syllogism_data, complex_patterns_data, complex_patterns_data_with_fourth_var):
    """
    Converts a single simple syllogism object into a complex one, sentence-by-sentence.
    """
    simple_syllogism = syllogism_data['syllogism']

    premises_simple, conclusion_simple = split_syllogism_into_sentences(simple_syllogism)

    # 1. Process Premises
    complex_premises = []
    for premise in premises_simple:
        p_type, p_fourth, p_subject, p_predicate = parse_sentence_for_type_and_terms(premise)
        # Note: Synonym dict is not used here for simplicity. If added, pass it in create_sub_dict
        sub_map = create_sub_dict(p_subject, p_predicate, p_fourth)

        if p_fourth:
            p_template = random.choice(complex_patterns_data_with_fourth_var['premises'][p_type])
        else:
            p_template = random.choice(complex_patterns_data['premises'][p_type])

        p_complex = p_template['tpl'].format(**sub_map)
        p_complex = p_complex[0].upper() + p_complex[1:]

        complex_premises.append(p_complex)

    # 2. Process Conclusion
    c_type, c_fourth, c_subject, c_predicate = parse_sentence_for_type_and_terms(conclusion_simple)
    c_sub_map = create_sub_dict(c_subject, c_predicate, c_fourth)

    if c_fourth:
        c_premise_template = random.choice(complex_patterns_data_with_fourth_var['premises'][c_type])
    else:
        c_premise_template = random.choice(complex_patterns_data['premises'][c_type])

    c_premise_complex = c_premise_template['tpl'].format(**c_sub_map)
    c_premise_complex = safe_lowercase_first(c_premise_complex, c_sub_map)

    c_wrapper_template = random.choice(complex_patterns_data['conclusions'])
    c_complex = c_wrapper_template.format(conclusion=c_premise_complex)
    c_complex = c_complex[:-1]
    c_complex = c_complex[0].upper() + c_complex[1:]

    # 3. Reconstruct the final complex syllogism string
    final_complex_syllogism = " ".join(complex_premises) + " " + c_complex

    # 4. Create the new data object
    new_data = syllogism_data.copy()
    new_data['syllogism'] = final_complex_syllogism

    return new_data

In [None]:
def process_dataset(input_data, complex_patterns_data, complex_patterns_data_with_fourth_var):
    """Iterates over the dataset and applies the conversion logic."""
    complex_dataset = []
    for item in input_data:
        try:
            complex_item = convert_syllogism_to_complex(item, complex_patterns_data, complex_patterns_data_with_fourth_var)
            complex_dataset.append(complex_item)
        except ValueError as e:
            print(f" Skipping item ID {item.get('id', 'N/A')} due to parsing error: {e}")
            continue

    return complex_dataset

In [None]:
# Example Data for Testing
sample_data = [
    {
        "id": "d41eddc7-5ad8-4468-87cf-229f7ae8260e",
        "syllogism": "All animals that are non-aquatic are fish. Some mammals are non-aquatic. Some mammals are not fish.",
        "validity": True,
        "plausibility": True
    },
    {
        "id": "033c1941-2d6e-4342-af63-d97ce2ece488",
        "syllogism": "All υ are Π. All Π are Ε. Some Ε are υ.",
        "validity": True,
        "plausibility": True
    },
    {
        "id": "1a2b3c4d-9e8f-7g6h-5i4j-3k2l1m0n9o8p",
        "syllogism": "No fish are mammals. Some mammals are whales. Some whales are not fish.",
        "validity": True,
        "plausibility": True
    }
]

# Run the conversion on the sample data, passing the REFINED_PATTERNS
complex_syllogism_data = process_dataset(sample_data, COMPLEX_PATTERNS, COMPLEX_PATTERNS_WITH_FOURTH_VAR)

# Print the results
print("\n--- Converted Complex Syllogisms ---")
print(json.dumps(complex_syllogism_data, indent=4, ensure_ascii = False))


--- Converted Complex Syllogisms ---
[
    {
        "id": "d41eddc7-5ad8-4468-87cf-229f7ae8260e",
        "syllogism": "Whatever is both an animals and a non-aquatic is necessarily a fish. A small number of mammals are non-aquatic. We can conclude from this that there exist some things which are mammals that are non-fish.",
        "validity": true,
        "plausibility": true
    },
    {
        "id": "033c1941-2d6e-4342-af63-d97ce2ece488",
        "syllogism": "Any and all υ are Π. There is nothing that is a Π that is not also a Ε. This has led to the conclusion that there are Ε, and some of them are υ.",
        "validity": true,
        "plausibility": true
    },
    {
        "id": "1a2b3c4d-9e8f-7g6h-5i4j-3k2l1m0n9o8p",
        "syllogism": "None of the fish are mammals. A subset of mammals includes whales. It logically follows that there exist some things which are whales that are non-fish.",
        "validity": true,
        "plausibility": true
    }
]


In [None]:
languages = ["english"]
# languages = [""]

OUTPUT_FOLDER = '/content/complex_syllogisms_output'

for lang in languages:
    INPUT_FILE = f"/content/st4_precomplex_symbolic.json"

    #  FIX: Extract only the filename from the input path 
    OUTPUT_FILE_NAME = f"st4_complex_symbolic.json"

    # Define the full output path
    OUTPUT_PATH = os.path.join(OUTPUT_FOLDER, OUTPUT_FILE_NAME)

    # 1. Load Data
    print(f"\n--- Processing {lang.upper()} ---")
    print(f"1. Loading data from: {INPUT_FILE}")
    try:
        with open(INPUT_FILE, 'r', encoding = 'utf-8') as f:
            input_data = json.load(f)
    except Exception as e:
        print(f" Error loading data for {lang}: {e}. Skipping this language.")
        # Use 'continue' to move to the next language instead of 'exit()'
        continue

    # 2. Process Data
    print(f"2. Processing {len(input_data)} syllogisms with process_dataset...")

    # NOTE: You MUST ensure COMPLEX_PATTERNS is accessible here.
    try:
        complex_dataset = process_dataset(input_data, COMPLEX_PATTERNS, COMPLEX_PATTERNS_WITH_FOURTH_VAR)
        print(f" Conversion complete. Generated {len(complex_dataset)} items.")
    except Exception as e:
        print(f" Error processing data for {lang}: {e}. Skipping save.")
        continue


    # 3. Create directory if it doesn't exist
    print(f"3. Ensuring directory {OUTPUT_FOLDER} exists...")
    try:
        os.makedirs(OUTPUT_FOLDER, exist_ok=True)
        print(" Directory created or already exists.")
    except Exception as e:
        print(f" Error creating directory: {e}.")
        continue

    # 4. Save Data
    print(f"4. Saving data to: {OUTPUT_PATH}")
    try:
        with open(OUTPUT_PATH, 'w', encoding = 'utf-8') as f:
            json.dump(complex_dataset, f, indent=4, ensure_ascii = False)
        print(f" Success! Complex dataset saved to {OUTPUT_PATH}")
    except IOError as e:
        print(f" Error saving output file: {e}. Check permissions.")


--- Processing ENGLISH ---
1. Loading data from: /content/st4_precomplex_symbolic.json
2. Processing 9016 syllogisms with process_dataset...
 Conversion complete. Generated 9016 items.
3. Ensuring directory /content/complex_syllogisms_output exists...
 Directory created or already exists.
4. Saving data to: /content/complex_syllogisms_output/st4_complex_symbolic.json
 Success! Complex dataset saved to /content/complex_syllogisms_output/st4_complex_symbolic.json


In [None]:
# !unzip colab_send.zip

In [None]:
!zip -r translated.zip complex_syllogisms_output/

  adding: complex_syllogisms_output/ (stored 0%)
  adding: complex_syllogisms_output/st4_complex.json (deflated 79%)
  adding: complex_syllogisms_output/st4_complex_symbolic.json (deflated 80%)
