In [None]:
!pip install spacy nltk pandas tqdm
# Try to download a lightweight Swahili model if available (no-op if missing)
!python -m spacy download xx_ent_wiki_sm || true
import nltk
nltk.download("wordnet")
nltk.download("omw-1.4")

In [None]:
import os
import json
import random
import re
from pathlib import Path
import pandas as pd
import spacy
from nltk.corpus import wordnet as wn
import nltk
from tqdm import tqdm
# Load a small multilingual pipeline if available, otherwise blank Swahili-ish pipeline
try:
    nlp = spacy.load("xx_ent_wiki_sm")
except Exception:
    nlp = spacy.blank("xx")
import inflect

In [None]:
# Keep inflect for API compatibility (Swahili doesn't use 'a'/'an')
p = inflect.engine()

In [None]:
COMPLEX_PATTERNS = {
  "premises": {
    "all": [
      {"tpl": "Wote {a_pl} ni {b_pl}.", "a_type": "plural", "b_type": "plural"},
      {"tpl": "Kila {a_pl} ni aina ya {b_pl}.", "a_type": "plural", "b_type": "plural"},
      {"tpl": "Hakuna toleo, wote {a_pl} ni {b_pl}.", "a_type": "plural", "b_type": "plural"},
      {"tpl": "{a_pl} kwa ujumla hujumuishwa ndani ya {b_pl}.", "a_type": "plural", "b_type": "plural"},
      {"tpl": "Vyote {a_pl} vinahesabiwa kuwa {b_pl}.", "a_type": "plural", "b_type": "plural"},
      {"tpl": "Kila mmoja wa {a_pl} ni mwanachama wa {b_pl}.", "a_type": "plural", "b_type": "plural"},
      {"tpl": "Bila ubaguzi, {a_pl} wote ni {b_pl}.", "a_type": "plural", "b_type": "plural"},
      {"tpl": "Ni kweli kwamba {a_pl} wote ni {b_pl}.", "a_type": "plural", "b_type": "plural"},
      {"tpl": "Kundi zima la {a_pl} ni {b_pl}.", "a_type": "plural", "b_type": "plural"},
      {"tpl": "Yeyote aliye {a_pl} lazima awe {b_pl}.", "a_type": "plural", "b_type": "plural"},
      {"tpl": "Sifa ya kuwa {a_pl} inamaanisha kuwa {b_pl}.", "a_type": "plural", "b_type": "plural"},
      {"tpl": "{a_pl} wote bila shaka ni {b_pl}.", "a_type": "plural", "b_type": "plural"},
      {"tpl": "Kila kipengele cha {a_pl} kiko ndani ya {b_pl}.", "a_type": "plural", "b_type": "plural"},
      {"tpl": "Hakika, {a_pl} wote ni {b_pl}.", "a_type": "plural", "b_type": "plural"},
      {"tpl": "Inajulikana kuwa {a_pl} wote ni {b_pl}.", "a_type": "plural", "b_type": "plural"}
    ],
    "some": [
      {"tpl": "Baadhi ya {a_pl} ni {b_pl}.", "a_type": "plural", "b_type": "plural"},
      {"tpl": "Kuna baadhi ya {a_pl} ambazo ni {b_pl}.", "a_type": "plural", "b_type": "plural"},
      {"tpl": "Sehemu ya {a_pl} inaweza kupatikana kama {b_pl}.", "a_type": "plural", "b_type": "plural"},
      {"tpl": "Mmoja kati ya {a_pl} ni {b_pl}.", "a_type": "plural", "b_type": "plural"},
      {"tpl": "Kuna kadhaa ya {a_pl} ambazo ni {b_pl}.", "a_type": "plural", "b_type": "plural"},
      {"tpl": "Angalau {a_pl} mmoja ni {b_pl}.", "a_type": "plural", "b_type": "plural"},
      {"tpl": "Kuna uwezekano wa {a_pl} fulani kuwa {b_pl}.", "a_type": "plural", "b_type": "plural"},
      {"tpl": "Sio wote, lakini baadhi ya {a_pl} ni {b_pl}.", "a_type": "plural", "b_type": "plural"},
      {"tpl": "Kuna mifano ya {a_pl} ambao ni {b_pl}.", "a_type": "plural", "b_type": "plural"},
      {"tpl": "Baadhi ya washiriki wa {a_pl} ni {b_pl}.", "a_type": "plural", "b_type": "plural"},
      {"tpl": "Inatokea kwamba baadhi ya {a_pl} ni {b_pl}.", "a_type": "plural", "b_type": "plural"},
      {"tpl": "Kuna vitu katika {a_pl} ambavyo ni {b_pl}.", "a_type": "plural", "b_type": "plural"},
      {"tpl": "Kiasi fulani cha {a_pl} ni {b_pl}.", "a_type": "plural", "b_type": "plural"},
      {"tpl": "Wapo {a_pl} ambao pia ni {b_pl}.", "a_type": "plural", "b_type": "plural"},
      {"tpl": "Baadhi ya vitu vikiwa {a_pl} basi ni {b_pl}.", "a_type": "plural", "b_type": "plural"}
    ],
    "no": [
      {"tpl": "Hakuna {a_pl} ni {b_pl}.", "a_type": "plural", "b_type": "plural"},
      {"tpl": "Hakuna mmoja kati ya {a_pl} anayeweza kuwa {b_pl}.", "a_type": "plural", "b_type": "plural"},
      {"tpl": "{a_pl} haina uhusiano wa kuwa {b_pl}.", "a_type": "plural", "b_type": "plural"},
      {"tpl": "Hakuna mfano wa {a_pl} kuwa {b_pl}.", "a_type": "plural", "b_type": "plural"},
      {"tpl": "{a_pl} kabisa sio {b_pl}.", "a_type": "plural", "b_type": "plural"},
      {"tpl": "Haiwezekani kwa {a_pl} kuwa {b_pl}.", "a_type": "plural", "b_type": "plural"},
      {"tpl": "Kundi la {a_pl} halina mawasiliano na {b_pl}.", "a_type": "plural", "b_type": "plural"},
      {"tpl": "Hakuna mshiriki hata mmoja wa {a_pl} aliye {b_pl}.", "a_type": "plural", "b_type": "plural"},
      {"tpl": "Kwa vyovyote vile, hakuna {a_pl} ni {b_pl}.", "a_type": "plural", "b_type": "plural"},
      {"tpl": "Sifa za {a_pl} na {b_pl} hazikutani.", "a_type": "plural", "b_type": "plural"},
      {"tpl": "Ni kosa kusema kwamba {a_pl} ni {b_pl}.", "a_type": "plural", "b_type": "plural"},
      {"tpl": "Kila {a_pl} amejitenga na kuwa {b_pl}.", "a_type": "plural", "b_type": "plural"},
      {"tpl": "Hakuna mmoja wa {a_pl} aliye katika kundi la {b_pl}.", "a_type": "plural", "b_type": "plural"},
      {"tpl": "{a_pl} na {b_pl} ni vitu tofauti kabisa.", "a_type": "plural", "b_type": "plural"},
      {"tpl": "Huwezi kupata {a_pl} yeyote aliye {b_pl}.", "a_type": "plural", "b_type": "plural"}
    ],
    "some_not": [
      {"tpl": "Baadhi ya {a_pl} si {b_pl}.", "a_type": "plural", "b_type": "plural"},
      {"tpl": "Sehemu ya {a_pl} haijumuishi {b_pl}.", "a_type": "plural", "b_type": "plural"},
      {"tpl": "Kuna {a_pl} ambao sio {b_pl}.", "a_type": "plural", "b_type": "plural"},
      {"tpl": "Si wote {a_pl} ni {b_pl}.", "a_type": "plural", "b_type": "plural"},
      {"tpl": "Sehemu fulani ya {a_pl} hawezi kuhesabiwa kuwa {b_pl}.", "a_type": "plural", "b_type": "plural"},
      {"tpl": "Kuna angalau mmoja kati ya {a_pl} asiye {b_pl}.", "a_type": "plural", "b_type": "plural"},
      {"tpl": "Kuna washiriki wa {a_pl} ambao hawako ndani ya {b_pl}.", "a_type": "plural", "b_type": "plural"},
      {"tpl": "Sio kila {a_pl} ni {b_pl}.", "a_type": "plural", "b_type": "plural"},
      {"tpl": "Kuna {a_pl} fulani wasio na sifa ya {b_pl}.", "a_type": "plural", "b_type": "plural"},
      {"tpl": "Baadhi ya vitu vilivyo {a_pl} havina uhusiano na {b_pl}.", "a_type": "plural", "b_type": "plural"},
      {"tpl": "Inatokea kwamba baadhi ya {a_pl} hawawi {b_pl}.", "a_type": "plural", "b_type": "plural"},
      {"tpl": "Sio kweli kwamba {a_pl} wote ni {b_pl}.", "a_type": "plural", "b_type": "plural"},
      {"tpl": "Kuna upungufu wa {b_pl} miongoni mwa baadhi ya {a_pl}.", "a_type": "plural", "b_type": "plural"},
      {"tpl": "Baadhi ya {a_pl} wako nje ya kundi la {b_pl}.", "a_type": "plural", "b_type": "plural"},
      {"tpl": "Kuna wale katika {a_pl} ambao si {b_pl}.", "a_type": "plural", "b_type": "plural"}
    ]
  },
  "conclusions": [
    "Kwa hiyo, {conclusion}.",
    "Hivyo basi, {conclusion}.",
    "Kwa hivyo, {conclusion}.",
    "Kutokana na hayo, {conclusion}.",
    "Matokeo yake, {conclusion}.",
    "Hii inamaanisha kuwa, {conclusion}.",
    "Tunaweza kuhitimisha kwamba, {conclusion}.",
    "Ni wazi kuwa, {conclusion}.",
    "Kwa kuzingatia hayo, {conclusion}.",
    "Hivyo, ni dhahiri kwamba, {conclusion}."
  ]
}

In [None]:
COMPLEX_PATTERNS_WITH_FOURTH_VAR = {
  "premises": {
    "all": [
      {"tpl": "Wote {d_pl} ambao ni {a_pl} ni {b_pl}.", "a_type": "plural", "b_type": "plural", "d_type": "plural"},
      {"tpl": "Kila {d_sg} ambayo ni {a_sg} ni pia {b_sg}.", "a_type": "singular", "b_type": "singular", "d_type": "singular"},
      {"tpl": "Hakuna {d_sg} ambaye ni {a_sg} na si {b_sg}.", "a_type": "singular", "b_type": "singular", "d_type": "singular"},
      {"tpl": "Kila {d_sg} anayehusishwa na {a_sg} lazima awe {b_sg}.", "a_type": "singular", "b_type": "singular", "d_type": "singular"},
      {"tpl": "{d_pl} zote zenye sifa ya {a_pl} ziko kwenye {b_pl}.", "a_type": "plural", "b_type": "plural", "d_type": "plural"},
      {"tpl": "Ikiwa {d_sg} ni {a_sg}, basi ni lazima awe {b_sg}.", "a_type": "singular", "b_type": "singular", "d_type": "singular"},
      {"tpl": "Mtu yeyote katika {d_pl} akiwa {a_pl} basi ni {b_pl}.", "a_type": "plural", "b_type": "plural", "d_type": "plural"},
      {"tpl": "Washiriki wote wa {d_pl} wenye sifa ya {a_pl} ni {b_pl}.", "a_type": "plural", "b_type": "plural", "d_type": "plural"},
      {"tpl": "Ni kweli kwamba {d_pl} wote walio {a_pl} ni pia {b_pl}.", "a_type": "plural", "b_type": "plural", "d_type": "plural"},
      {"tpl": "Kila kipengele cha {d_pl} ambacho ni {a_pl} kinahesabiwa kuwa {b_pl}.", "a_type": "plural", "b_type": "plural", "d_type": "plural"},
      {"tpl": "Vyote katika {d_pl} vilivyo {a_pl} ni {b_pl}.", "a_type": "plural", "b_type": "plural", "d_type": "plural"},
      {"tpl": "Kila mara {d_sg} akiwa {a_sg}, yeye pia ni {b_sg}.", "a_type": "singular", "b_type": "singular", "d_type": "singular"},
      {"tpl": "Mwanachama yeyote wa {d_pl} ambaye ni {a_pl} ni {b_pl}.", "a_type": "plural", "b_type": "plural", "d_type": "plural"},
      {"tpl": "Ikiwa kitu ni {d_sg} na ni {a_sg}, basi ni {b_sg}.", "a_type": "singular", "b_type": "singular", "d_type": "singular"},
      {"tpl": "Kwa vyovyote vile, {d_pl} walio {a_pl} ni {b_pl}.", "a_type": "plural", "b_type": "plural", "d_type": "plural"}
    ],
    "some": [
      {"tpl": "Kuna angalau {d_sg} mmoja ambaye ni {a_sg} na ni {b_sg}.", "a_type": "singular", "b_type": "singular", "d_type": "singular"},
      {"tpl": "Baadhi ya {d_pl} ni {a_pl} na pia ni {b_pl}.", "a_type": "plural", "b_type": "plural", "d_type": "plural"},
      {"tpl": "Katika baadhi ya kesi, {d_sg} ni {a_sg} na ni {b_sg}.", "a_type": "singular", "b_type": "singular", "d_type": "singular"},
      {"tpl": "Sehemu ya {d_pl} zina sifa za {a_pl} na {b_pl}.", "a_type": "plural", "b_type": "plural", "d_type": "plural"},
      {"tpl": "Kuna kadhaa ya {d_sg} zilizo {a_sg} na pia {b_sg}.", "a_type": "singular", "b_type": "singular", "d_type": "singular"},
      {"tpl": "Inatokea kwamba baadhi ya {d_pl} ni {a_pl} na {b_pl} kwa pamoja.", "a_type": "plural", "b_type": "plural", "d_type": "plural"},
      {"tpl": "Angalau mwanachama mmoja wa {d_pl} ni {a_pl} na {b_pl}.", "a_type": "plural", "b_type": "plural", "d_type": "plural"},
      {"tpl": "Kuna mifano ya {d_pl} ambao ni washiriki wa {a_pl} na {b_pl}.", "a_type": "plural", "b_type": "plural", "d_type": "plural"},
      {"tpl": "Baadhi ya {d_pl} hupatikana wakiwa na sifa za {a_pl} na {b_pl}.", "a_type": "plural", "b_type": "plural", "d_type": "plural"},
      {"tpl": "Kuna vitu katika {d_pl} ambavyo ni {a_pl} na pia ni {b_pl}.", "a_type": "plural", "b_type": "plural", "d_type": "plural"},
      {"tpl": "Wapo {d_pl} fulani ambao ni {a_pl} na {b_pl}.", "a_type": "plural", "b_type": "plural", "d_type": "plural"},
      {"tpl": "Si nadra kupata {d_pl} aliyekuwa {a_pl} na {b_pl}.", "a_type": "plural", "b_type": "plural", "d_type": "plural"},
      {"tpl": "Katika kundi la {d_pl}, wapo baadhi walio {a_pl} na {b_pl}.", "a_type": "plural", "b_type": "plural", "d_type": "plural"},
      {"tpl": "Sehemu fulani ya {d_pl} ni {a_pl} na {b_pl}.", "a_type": "plural", "b_type": "plural", "d_type": "plural"},
      {"tpl": "Baadhi ya wale walio {d_pl} wana sifa za {a_pl} na {b_pl}.", "a_type": "plural", "b_type": "plural", "d_type": "plural"}
    ],
    "no": [
      {"tpl": "Hakuna {d_pl} ambayo ni {a_pl} na pia {b_pl}.", "a_type": "plural", "b_type": "plural", "d_type": "plural"},
      {"tpl": "Hakuna {d_sg} aliyekuwa {a_sg} na kuwa {b_sg}.", "a_type": "singular", "b_type": "singular", "d_type": "singular"},
      {"tpl": "Kuna hakuna {d_pl} zinazojumuisha {a_pl} na {b_pl}.", "a_type": "plural", "b_type": "plural", "d_type": "plural"},
      {"tpl": "Ikiwa {d_sg} ni {a_sg}, hawezi kuwa {b_sg}.", "a_type": "singular", "b_type": "singular", "d_type": "singular"},
      {"tpl": "Hakuna mfano wa {d_sg} kuwa {a_sg} na kuwa {b_sg}.", "a_type": "singular", "b_type": "singular", "d_type": "singular"},
      {"tpl": "Washiriki wa {d_pl} hawawezi kuwa {a_pl} na {b_pl} kwa wakati mmoja.", "a_type": "plural", "b_type": "plural", "d_type": "plural"},
      {"tpl": "Huwezi kupata {d_pl} yeyote aliyekuwa {a_pl} na {b_pl}.", "a_type": "plural", "b_type": "plural", "d_type": "plural"},
      {"tpl": "Katika kundi la {d_pl}, hakuna mmoja aliye {a_pl} na {b_pl}.", "a_type": "plural", "b_type": "plural", "d_type": "plural"},
      {"tpl": "Ni kosa kusema kwamba {d_pl} yeyote ni {a_pl} na pia {b_pl}.", "a_type": "plural", "b_type": "plural", "d_type": "plural"},
      {"tpl": "Hakuna sifa ya {a_pl} inayopatikana pamoja na {b_pl} miongoni mwa {d_pl}.", "a_type": "plural", "b_type": "plural", "d_type": "plural"},
      {"tpl": "Kundi la {d_pl} haliruhusu mchanganyiko wa {a_pl} na {b_pl}.", "a_type": "plural", "b_type": "plural", "d_type": "plural"},
      {"tpl": "Haiwezekani kwa washiriki wa {d_pl} kuwa {a_pl} na pia {b_pl}.", "a_type": "plural", "b_type": "plural", "d_type": "plural"},
      {"tpl": "Hakuna hata mmoja katika {d_pl} aliye na sifa zote mbili za {a_pl} na {b_pl}.", "a_type": "plural", "b_type": "plural", "d_type": "plural"},
      {"tpl": "Ikiwa kitu ni {d_sg} na ni {a_sg}, basi hakiwezi kuwa {b_sg}.", "a_type": "singular", "b_type": "singular", "d_type": "singular"},
      {"tpl": "Hakuna mmoja aliye {d_pl} anayeweza kudai kuwa {a_pl} na {b_pl}.", "a_type": "plural", "b_type": "plural", "d_type": "plural"}
    ],
    "some_not": [
      {"tpl": "Sio kila {d_sg} ambaye ni {a_sg} ni {b_sg}.", "a_type": "singular", "b_type": "singular", "d_type": "singular"},
      {"tpl": "Angalau {d_sg} mojawapo ni {a_sg} lakini sio {b_sg}.", "a_type": "singular", "b_type": "singular", "d_type": "singular"},
      {"tpl": "Baadhi ya {d_pl} zinakuwa {a_pl} lakini si {b_pl}.", "a_type": "plural", "b_type": "plural", "d_type": "plural"},
      {"tpl": "Kuna {d_sg} ambazo ni {a_sg} lakini haziko kwenye {b_sg}.", "a_type": "singular", "b_type": "singular", "d_type": "singular"},
      {"tpl": "Sehemu fulani ya {d_pl} zilizo {a_pl} hazipo kama {b_pl}.", "a_type": "plural", "b_type": "plural", "d_type": "plural"},
      {"tpl": "Kuna washiriki wa {d_pl} walio {a_pl} lakini si {b_pl}.", "a_type": "plural", "b_type": "plural", "d_type": "plural"},
      {"tpl": "Washiriki fulani wa {d_pl} ni {a_pl} lakini hawajumuishwi katika {b_pl}.", "a_type": "plural", "b_type": "plural", "d_type": "plural"},
      {"tpl": "Ni kweli kwamba baadhi ya {d_pl} ambao ni {a_pl} sio {b_pl}.", "a_type": "plural", "b_type": "plural", "d_type": "plural"},
      {"tpl": "Sio wote katika kundi la {d_pl} walio {a_pl} ni washiriki wa {b_pl}.", "a_type": "plural", "b_type": "plural", "d_type": "plural"},
      {"tpl": "Baadhi ya wale walio {d_pl} na wenye sifa ya {a_pl} wanakosa sifa ya {b_pl}.", "a_type": "plural", "b_type": "plural", "d_type": "plural"},
      {"tpl": "Kuna {d_pl} fulani wenye sifa ya {a_pl} wasio na sifa ya {b_pl}.", "a_type": "plural", "b_type": "plural", "d_type": "plural"},
      {"tpl": "Ikitokea {d_sg} ni {a_sg}, inawezekana asiwe {b_sg}.", "a_type": "singular", "b_type": "singular", "d_type": "singular"},
      {"tpl": "Washiriki wa {d_pl} walio katika {a_pl} wengine si {b_pl}.", "a_type": "plural", "b_type": "plural", "d_type": "plural"},
      {"tpl": "Kuna upungufu wa sifa ya {b_pl} miongoni mwa baadhi ya {d_pl} walio {a_pl}.", "a_type": "plural", "b_type": "plural", "d_type": "plural"},
      {"tpl": "Inafahamika kwamba baadhi ya {d_pl} walio {a_pl} si {b_pl}.", "a_type": "plural", "b_type": "plural", "d_type": "plural"}
    ]
  }
}

In [None]:
def get_singular_and_plural(term):
    term = term.strip()
    # Swahili doesn't use English-style singular/plural markers for template needs; return same
    return term, term

In [None]:
def get_article(term):
    # Swahili doesn't use 'a'/'an'; return the term unchanged for templates
    return term.strip()

In [None]:
def get_basic_syllogism_parts(syllogism):
    sentences = [s.strip() for s in syllogism.split('.') if s.strip()]
    return sentences

In [None]:
def parse_sentence_for_type_and_terms(sentence):
    sentence = sentence.strip()
    match = re.match(r"All (.*) that are (.*) are (.*).", sentence)
    if match: return "all", match.group(1), match.group(2), match.group(3)
    match = re.match(r"Some (.*) that are (.*) are not (.*).", sentence)
    if match: return "some_not", match.group(1), match.group(2), match.group(3)
    match = re.match(r"Some (.*) that are (.*) are (.*).", sentence)
    if match: return "some", match.group(1), match.group(2), match.group(3)
    match = re.match(r"No (.*) that are (.*) are (.*).", sentence)
    if match: return "no", match.group(1), match.group(2), match.group(3)
    match = re.match(r"All (.*) are (.*).", sentence)
    if match: return "all", None, match.group(1), match.group(2)
    match = re.match(r"Some (.*) are not (.*).", sentence)
    if match: return "some_not", None, match.group(1), match.group(2)
    match = re.match(r"Some (.*) are (.*).", sentence)
    if match: return "some", None, match.group(1), match.group(2)
    match = re.match(r"No (.*) are (.*).", sentence)
    if match: return "no", None, match.group(1), match.group(2)
    raise ValueError(f"Could not parse basic syllogism type from sentence: {sentence}")

In [None]:
def split_syllogism_into_sentences(syllogism):
    sentences = [s.strip() for s in syllogism.split('.') if s.strip()]
    if len(sentences) < 2:
        raise ValueError(f"Syllogism must contain at least one premise and one conclusion: {syllogism}")
    sentences = [s + '.' for s in sentences]
    conclusion = sentences.pop()
    premises = sentences
    return premises, conclusion

In [None]:
def create_sub_dict(term_original_subject, term_original_predicate, term_original_fourth, synonym_dict=None):
    sub_map = {}
    sg_a, pl_a = get_singular_and_plural(term_original_subject)
    sub_map['a_sg'] = sg_a
    sub_map['a_pl'] = pl_a
    sub_map['a_sg_art'] = get_article(sg_a)
    sg_b, pl_b = get_singular_and_plural(term_original_predicate)
    sub_map['b_sg'] = sg_b
    sub_map['b_pl'] = pl_b
    sub_map['b_sg_art'] = get_article(sg_b)
    if term_original_fourth:
        sg_d, pl_d = get_singular_and_plural(term_original_fourth)
        sub_map['d_sg'] = sg_d
        sub_map['d_pl'] = pl_d
        sub_map['d_sg_art'] = get_article(sg_d)
    return sub_map

In [None]:
def safe_lowercase_first(s, sub_map):
    for val in sub_map.values():
        if s.startswith(val):
            return s
    return s[:1].lower() + s[1:]

In [None]:
def convert_syllogism_to_complex(syllogism_data, complex_patterns_data, complex_patterns_data_with_fourth_var):
    simple_syllogism = syllogism_data['syllogism']
    premises_simple, conclusion_simple = split_syllogism_into_sentences(simple_syllogism)
    complex_premises = []
    for premise in premises_simple:
        p_type, p_fourth, p_subject, p_predicate = parse_sentence_for_type_and_terms(premise)
        sub_map = create_sub_dict(p_subject, p_predicate, p_fourth)
        if p_fourth:
            p_template = random.choice(complex_patterns_data_with_fourth_var['premises'][p_type])
        else:
            p_template = random.choice(complex_patterns_data['premises'][p_type])
        p_complex = p_template['tpl'].format(**sub_map)
        try:
            p_complex = p_complex[0].upper() + p_complex[1:]
        except Exception:
            pass
        complex_premises.append(p_complex)
    c_type, c_fourth, c_subject, c_predicate = parse_sentence_for_type_and_terms(conclusion_simple)
    c_sub_map = create_sub_dict(c_subject, c_predicate, c_fourth)
    if c_fourth:
        c_premise_template = random.choice(complex_patterns_data_with_fourth_var['premises'][c_type])
    else:
        c_premise_template = random.choice(complex_patterns_data['premises'][c_type])
    c_premise_complex = c_premise_template['tpl'].format(**c_sub_map)
    c_premise_complex = safe_lowercase_first(c_premise_complex, c_sub_map)
    c_wrapper_template = random.choice(complex_patterns_data['conclusions'])
    c_complex = c_wrapper_template.format(conclusion=c_premise_complex)
    c_complex = c_complex[:-1]
    try:
        c_complex = c_complex[0].upper() + c_complex[1:]
    except Exception:
        pass
    final_complex_syllogism = " ".join(complex_premises) + " " + c_complex
    new_data = syllogism_data.copy()
    new_data['syllogism'] = final_complex_syllogism
    return new_data

In [None]:
def process_dataset(input_data, complex_patterns_data, complex_patterns_data_with_fourth_var):
    complex_dataset = []
    for item in input_data:
        try:
            complex_item = convert_syllogism_to_complex(item, complex_patterns_data, complex_patterns_data_with_fourth_var)
            complex_dataset.append(complex_item)
        except ValueError as e:
            print(f" Skipping item ID {item.get('id', 'N/A')} due to parsing error: {e}")
            continue
    return complex_dataset

In [None]:
# Example Data for Testing (input remains in symbolic/simple English form)
sample_data = [
  {"id": "sw-1", "syllogism": "All animals that are non-aquatic are fish. Some mammals are non-aquatic. Some mammals are not fish.", "validity": True, "plausibility": True},
  {"id": "sw-2", "syllogism": "All υ are Π. All Π are Ε. Some Ε are υ.", "validity": True, "plausibility": True},
  {"id": "sw-3", "syllogism": "No fish are mammals. Some mammals are whales. Some whales are not fish.", "validity": True, "plausibility": True},
]
complex_syllogism_data = process_dataset(sample_data, COMPLEX_PATTERNS, COMPLEX_PATTERNS_WITH_FOURTH_VAR)
print("
--- Matokeo ya mabadiliko ya mifano ya silojizimu (Swahili) ---")
print(json.dumps(complex_syllogism_data, indent=4, ensure_ascii=False))