In [None]:
!pip install spacy nltk pandas tqdm
# optional: try to download a small Bengali model (may be unavailable)
!python -m spacy download xx_ent_wiki_sm || true
import nltk
nltk.download("wordnet")
nltk.download("omw-1.4")

In [None]:
import os
import json
import random
import re
from pathlib import Path
import pandas as pd
import spacy
from nltk.corpus import wordnet as wn
import nltk
from tqdm import tqdm
try:
    nlp = spacy.load("xx_ent_wiki_sm")
except Exception:
    nlp = spacy.blank("xx")
import inflect

In [None]:
# Keep inflect for API compatibility; Bengali templates don't use English articles
p = inflect.engine()

In [None]:
COMPLEX_PATTERNS = {
  "premises": {
    "all": [
      {"tpl": "সমস্ত {a_pl} হল {b_pl}।", "a_type": "plural", "b_type": "plural"},
      {"tpl": "সবই {a_pl} {b_pl}-এর অন্তর্গত।", "a_type": "plural", "b_type": "plural"},
      {"tpl": "কোনও ব্যতিক্রম ছাড়া, সব {a_pl} {b_pl}।", "a_type": "plural", "b_type": "plural"},
      {"tpl": "{a_pl} সাধারণভাবে {b_pl} হিসেবে শ্রেণীবদ্ধ।", "a_type": "plural", "b_type": "plural"},
      {"tpl": "{a_pl} কে {b_pl} হিসেবে গণ্য করা হয়।", "a_type": "plural", "b_type": "plural"},
      {"tpl": "প্রতিটি {a_sg} নিশ্চিতভাবেই একটি {b_sg}।", "a_type": "singular", "b_type": "singular"},
      {"tpl": "সব ধরণের {a_pl} {b_pl} শ্রেণীর অন্তর্ভুক্ত।", "a_type": "plural", "b_type": "plural"},
      {"tpl": "যদি কিছু {a_sg} হয়, তবে সেটি অবশ্যই {b_sg} হবে।", "a_type": "singular", "b_type": "singular"},
      {"tpl": "এমন কোনো {a_sg} নেই যা {b_sg} নয়।", "a_type": "singular", "b_type": "singular"},
      {"tpl": "যেকোনো {a_sg} একটি {b_sg} হিসেবে বিবেচিত।", "a_type": "singular", "b_type": "singular"},
      {"tpl": "{a_pl} এর পুরো গোষ্ঠীটি {b_pl} এর অধীনে আসে।", "a_type": "plural", "b_type": "plural"},
      {"tpl": "নিঃসন্দেহে প্রতিটি {a_sg} হল {b_sg}।", "a_type": "singular", "b_type": "singular"},
      {"tpl": "{a_pl} বলতে অপরিহার্যভাবে {b_pl} বোঝায়।", "a_type": "plural", "b_type": "plural"},
      {"tpl": "সব {a_pl}-ই আসলে {b_pl}।", "a_type": "plural", "b_type": "plural"},
      {"tpl": "যা কিছুই {a_sg}, তা-ই {b_sg}।", "a_type": "singular", "b_type": "singular"}
    ],
    "some": [
      {"tpl": "কিছু {a_pl} হল {b_pl}।", "a_type": "plural", "b_type": "plural"},
      {"tpl": "কয়েকটি {a_pl} {b_pl} হিসেবে পাওয়া যায়।", "a_type": "plural", "b_type": "plural"},
      {"tpl": "{a_pl} এর মধ্যে কিছু {b_pl}।", "a_type": "plural", "b_type": "plural"},
      {"tpl": "আংশিকভাবে {a_pl} {b_pl}।", "a_type": "plural", "b_type": "plural"},
      {"tpl": "কয়েকটি {a_pl} নিশ্চিতভাবে {b_pl}।", "a_type": "plural", "b_type": "plural"},
      {"tpl": "এমন কিছু {a_pl} আছে যারা {b_pl}।", "a_type": "plural", "b_type": "plural"},
      {"tpl": "কমপক্ষে একটি {a_sg} আছে যা {b_sg}।", "a_type": "singular", "b_type": "singular"},
      {"tpl": "সব নয়, তবে কিছু {a_pl} {b_pl}।", "a_type": "plural", "b_type": "plural"},
      {"tpl": "মাঝে মাঝে {a_sg} {b_sg} হিসেবে দেখা যায়।", "a_type": "singular", "b_type": "singular"},
      {"tpl": "{a_pl} শ্রেণীর একটি অংশ {b_pl}।", "a_type": "plural", "b_type": "plural"},
      {"tpl": "বেশ কিছু {a_pl} হল {b_pl}।", "a_type": "plural", "b_type": "plural"},
      {"tpl": "এটি সম্ভব যে কিছু {a_pl} {b_pl}।", "a_type": "plural", "b_type": "plural"},
      {"tpl": "{a_pl} সমূহের মাঝে {b_pl} বিদ্যমান।", "a_type": "plural", "b_type": "plural"},
      {"tpl": "নির্দিষ্ট কিছু {a_pl} {b_pl} এর বৈশিষ্ট্যযুক্ত।", "a_type": "plural", "b_type": "plural"},
      {"tpl": "অল্প কিছু {a_pl} {b_pl} শ্রেণীতে পড়ে।", "a_type": "plural", "b_type": "plural"}
    ],
    "no": [
      {"tpl": "কোনও {a_pl} {b_pl} নয়।", "a_type": "plural", "b_type": "plural"},
      {"tpl": "সর্বশেষে, কোনো {a_pl}ই {b_pl} নয়।", "a_type": "plural", "b_type": "plural"},
      {"tpl": "{a_pl} এবং {b_pl} এর কোনো সাদৃশ্য নেই।", "a_type": "plural", "b_type": "plural"},
      {"tpl": "প্রকৃতপক্ষে, নেই এমন {a_pl} যা {b_pl}।", "a_type": "plural", "b_type": "plural"},
      {"tpl": "{a_pl} সম্পূর্ণরূপে {b_pl} নয়।", "a_type": "plural", "b_type": "plural"},
      {"tpl": "এমন কোনো {a_sg} থাকা অসম্ভব যা {b_sg}।", "a_type": "singular", "b_type": "singular"},
      {"tpl": "কোনো অবস্থাতেই {a_pl} {b_pl} হতে পারে না।", "a_type": "plural", "b_type": "plural"},
      {"tpl": "{a_pl} এবং {b_pl} সম্পূর্ণ আলাদা শ্রেণী।", "a_type": "plural", "b_type": "plural"},
      {"tpl": "একটি {a_sg}-ও {b_sg} নয়।", "a_type": "singular", "b_type": "singular"},
      {"tpl": "একেবারেই কোনো {a_pl} {b_pl}-এর তালিকায় নেই।", "a_type": "plural", "b_type": "plural"},
      {"tpl": "যদি কিছু {a_sg} হয়, তবে সেটি {b_sg} হতে পারে না।", "a_type": "singular", "b_type": "singular"},
      {"tpl": "{a_pl} এবং {b_pl} এর মধ্যে কোনো সাধারণ অংশ নেই।", "a_type": "plural", "b_type": "plural"},
      {"tpl": "এক জন {a_sg}-ও {b_sg} শ্রেণীর নয়।", "a_type": "singular", "b_type": "singular"},
      {"tpl": "পুরোপুরিভাবে কোনো {a_pl} {b_pl} নয়।", "a_type": "plural", "b_type": "plural"},
      {"tpl": "{a_sg} কখনও {b_sg} হতে পারে না।", "a_type": "singular", "b_type": "singular"}
    ],
    "some_not": [
      {"tpl": "কিছু {a_pl} নয় {b_pl}।", "a_type": "plural", "b_type": "plural"},
      {"tpl": "{a_pl} এর মধ্যে কিছু অংশ {b_pl} নয়।", "a_type": "plural", "b_type": "plural"},
      {"tpl": "আংশিক {a_pl} যা {b_pl} নয়।", "a_type": "plural", "b_type": "plural"},
      {"tpl": "কয়েকটি {a_pl} অন্তর্ভুক্ত নয় {b_pl}।", "a_type": "plural", "b_type": "plural"},
      {"tpl": "সব {a_pl} {b_pl} নয়।", "a_type": "plural", "b_type": "plural"},
      {"tpl": "প্রতিটি {a_sg} {b_sg} হতে হবে এমন নয়।", "a_type": "singular", "b_type": "singular"},
      {"tpl": "এমন কিছু {a_pl} আছে যারা {b_pl} এর বাইরে।", "a_type": "plural", "b_type": "plural"},
      {"tpl": "কমপক্ষে একটি {a_sg} আছে যা {b_sg} নয়।", "a_type": "singular", "b_type": "singular"},
      {"tpl": "{a_pl} সমূহের একটি অংশ {b_pl} শ্রেণী বহির্ভূত।", "a_type": "plural", "b_type": "plural"},
      {"tpl": "এটি সত্যি যে সব {a_pl} {b_pl} নয়।", "a_type": "plural", "b_type": "plural"},
      {"tpl": "কিছু {a_pl} {b_pl} এর বৈশিষ্ট্য ধারণ করে না।", "a_type": "plural", "b_type": "plural"},
      {"tpl": "সব ধরণের {a_pl} {b_pl} নয়।", "a_type": "plural", "b_type": "plural"},
      {"tpl": "{a_pl} এর মাঝে এমন কিছু আছে যা {b_pl} নয়।", "a_type": "plural", "b_type": "plural"},
      {"tpl": "নির্দিষ্ট কিছু {a_pl} {b_pl} হতে বিরত থাকে।", "a_type": "plural", "b_type": "plural"},
      {"tpl": "অল্প কিছু {a_pl} আছে যারা {b_pl} নয়।", "a_type": "plural", "b_type": "plural"}
    ]
  },
  "conclusions": [
    "অতএব, {conclusion}।",
    "এ থেকে বোঝা যায়, {conclusion}।",
    "সুতরাং, {conclusion}।",
    "এনিয়ে বলা যায়, {conclusion}।",
    "পরিসমাপ্তিতে, {conclusion}।",
    "ফলস্বরূপ, {conclusion}।",
    "এর থেকে আমরা সিদ্ধান্তে আসতে পারি যে, {conclusion}।",
    "এর মানে দাঁড়ায়, {conclusion}।",
    "সংক্ষেপে বললে, {conclusion}।",
    "নিশ্চিতভাবেই, {conclusion}।"
  ]
}

In [None]:
COMPLEX_PATTERNS_WITH_FOURTH_VAR = {
  "premises": {
    "all": [
      {"tpl": "সমস্ত {d_pl} যারা {a_pl}, তারা {b_pl}।", "a_type": "plural", "b_type": "plural", "d_type": "plural"},
      {"tpl": "যদি {d_sg} হয় {a_sg}, তবে এটি {b_sg}।", "a_type": "singular", "b_type": "singular", "d_type": "singular"},
      {"tpl": "কোনও {d_sg} নেই যা {a_sg} এবং নয় {b_sg}।", "a_type": "singular", "b_type": "singular", "d_type": "singular"},
      {"tpl": "যারা {d_sg} ও {a_sg}, তারা অবশ্যই {b_sg}।", "a_type": "singular", "b_type": "singular", "d_type": "singular"},
      {"tpl": "{d_pl} এর মধ্যে সব যে {a_pl}, তারা {b_pl}।", "a_type": "plural", "b_type": "plural", "d_type": "plural"},
      {"tpl": "যেকোনো {d_sg} যা {a_sg} হিসেবে বিবেচিত, সেটি {b_sg}।", "a_type": "singular", "b_type": "singular", "d_type": "singular"},
      {"tpl": "প্রতিটি {d_sg} যা একটি {a_sg}, তা আসলে একটি {b_sg}।", "a_type": "singular", "b_type": "singular", "d_type": "singular"},
      {"tpl": "{d_pl} এর অন্তর্গত সব {a_pl} হল {b_pl}।", "a_type": "plural", "b_type": "plural", "d_type": "plural"},
      {"tpl": "নিঃসন্দেহে {d_pl} শ্রেণীর যারা {a_pl}, তারা {b_pl}।", "a_type": "plural", "b_type": "plural", "d_type": "plural"},
      {"tpl": "যদি কোনো ব্যক্তি {d_sg} এবং {a_sg} হয়, তবে সে {b_sg} হবে।", "a_type": "singular", "b_type": "singular", "d_type": "singular"},
      {"tpl": "সমস্ত {d_pl} যারা একই সাথে {a_pl}, তারা {b_pl} শ্রেণীর।", "a_type": "plural", "b_type": "plural", "d_type": "plural"},
      {"tpl": "এমন কোনো {d_sg} পাওয়া যাবে না যা {a_sg} কিন্তু {b_sg} নয়।", "a_type": "singular", "b_type": "singular", "d_type": "singular"},
      {"tpl": "সব {d_pl} যারা {a_pl} হিসেবে গণ্য, তারা {b_pl}।", "a_type": "plural", "b_type": "plural", "d_type": "plural"},
      {"tpl": "যা কিছুই {d_sg} ও {a_sg}, তা অবশ্যই {b_sg}।", "a_type": "singular", "b_type": "singular", "d_type": "singular"},
      {"tpl": "{d_pl} এর মধ্যে সব {a_pl}-ই মূলত {b_pl}।", "a_type": "plural", "b_type": "plural", "d_type": "plural"}
    ],
    "some": [
      {"tpl": "কমপক্ষে একটি {d_sg} আছে যা {a_sg} এবং {b_sg} উভয়ই।", "a_type": "singular", "b_type": "singular", "d_type": "singular"},
      {"tpl": "কিছু {d_pl} একই সাথে {a_pl} এবং {b_pl}।", "a_type": "plural", "b_type": "plural", "d_type": "plural"},
      {"tpl": "কিছু ক্ষেত্রে, {d_sg} হচ্ছে {a_sg} এবং {b_sg}।", "a_type": "singular", "b_type": "singular", "d_type": "singular"},
      {"tpl": "আংশিকভাবে {d_pl} {a_pl} ও {b_pl} উভয়ই।", "a_type": "plural", "b_type": "plural", "d_type": "plural"},
      {"tpl": "কয়েকটি {d_sg} একই সঙ্গে {a_sg} ও {b_sg}।", "a_type": "singular", "b_type": "singular", "d_type": "singular"},
      {"tpl": "{d_pl} এর মাঝে এমন কিছু আছে যারা {a_pl} এবং {b_pl}।", "a_type": "plural", "b_type": "plural", "d_type": "plural"},
      {"tpl": "মাঝে মাঝে একটি {d_sg} {a_sg} ও {b_sg} হিসেবে দেখা যায়।", "a_type": "singular", "b_type": "singular", "d_type": "singular"},
      {"tpl": "বেশ কিছু {d_pl} আছে যারা {a_pl} ও {b_pl}।", "a_type": "plural", "b_type": "plural", "d_type": "plural"},
      {"tpl": "এটি সম্ভব যে কোনো {d_sg} {a_sg} এবং {b_sg}।", "a_type": "singular", "b_type": "singular", "d_type": "singular"},
      {"tpl": "{d_pl} সমূহের একটি অংশ {a_pl} এবং {b_pl}।", "a_type": "plural", "b_type": "plural", "d_type": "plural"},
      {"tpl": "নির্দিষ্ট কিছু {d_pl} একই সাথে {a_pl} ও {b_pl} হিসেবে পাওয়া যায়।", "a_type": "plural", "b_type": "plural", "d_type": "plural"},
      {"tpl": "অল্প কিছু {d_pl} আছে যারা {a_pl} ও {b_pl} শ্রেণীর।", "a_type": "plural", "b_type": "plural", "d_type": "plural"},
      {"tpl": "দেখা যায় যে কিছু {d_pl} {a_pl} এবং {b_pl} উভয়ই।", "a_type": "plural", "b_type": "plural", "d_type": "plural"},
      {"tpl": "কমপক্ষে কয়েকটি {d_pl} {a_pl} ও {b_pl} এর বৈশিষ্ট্য ধারণ করে।", "a_type": "plural", "b_type": "plural", "d_type": "plural"},
      {"tpl": "কিছু {d_pl} আছে যারা একই সাথে {a_pl} এবং {b_pl} হিসেবে পরিচিত।", "a_type": "plural", "b_type": "plural", "d_type": "plural"}
    ],
    "no": [
      {"tpl": "{d_pl} এর মধ্যে কেউই {a_pl} এবং {b_pl} উভয় নয়।", "a_type": "plural", "b_type": "plural", "d_type": "plural"},
      {"tpl": "কোনও {d_sg} নেই যা {a_sg} ও {b_sg} উভয়ই।", "a_type": "singular", "b_type": "singular", "d_type": "singular"},
      {"tpl": "যদি {d_sg} হয় {a_sg}, তবে এটি {b_sg} হতে পারে না।", "a_type": "singular", "b_type": "singular", "d_type": "singular"},
      {"tpl": "কোনও {d_pl} নেই যা একই সাথে {a_pl} ও {b_pl}।", "a_type": "plural", "b_type": "plural", "d_type": "plural"},
      {"tpl": "গভীরভাবে বলতে গেলে, {d_sg} যা {a_sg} তা {b_sg} নয়।", "a_type": "singular", "b_type": "singular", "d_type": "singular"},
      {"tpl": "{d_pl} শ্রেণীর মধ্যে {a_pl} ও {b_pl} এর কোনো সাধারণ মিল নেই।", "a_type": "plural", "b_type": "plural", "d_type": "plural"},
      {"tpl": "এমন কোনো {d_sg} থাকা অসম্ভব যা {a_sg} ও {b_sg} উভয়ই।", "a_type": "singular", "b_type": "singular", "d_type": "singular"},
      {"tpl": "কোনো অবস্থাতেই {d_pl} একই সাথে {a_pl} এবং {b_pl} হতে পারে না।", "a_type": "plural", "b_type": "plural", "d_type": "plural"},
      {"tpl": "{d_pl} এর মাঝে এমন কেউই নেই যে {a_pl} এবং {b_pl} উভয়ই।", "a_type": "plural", "b_type": "plural", "d_type": "plural"},
      {"tpl": "যদি কিছু {d_sg} এবং {a_sg} হয়, তবে সেটি {b_sg} নয়।", "a_type": "singular", "b_type": "singular", "d_type": "singular"},
      {"tpl": "পুরোপুরিভাবে কোনো {d_pl} একই সাথে {a_pl} ও {b_pl} নয়।", "a_type": "plural", "b_type": "plural", "d_type": "plural"},
      {"tpl": "এক জন {d_sg}-ও নেই যে {a_sg} এবং {b_sg}।", "a_type": "singular", "b_type": "singular", "d_type": "singular"},
      {"tpl": "{d_pl} এর ক্ষেত্রে {a_pl} বৈশিষ্ট্যটি {b_pl} বৈশিষ্ট্যকে বর্জন করে।", "a_type": "plural", "b_type": "plural", "d_type": "plural"},
      {"tpl": "যেকোনো {d_sg} যা {a_sg}, সেটি কখনোই {b_sg} নয়।", "a_type": "singular", "b_type": "singular", "d_type": "singular"},
      {"tpl": "একেবারেই কোনো {d_pl} {a_pl} ও {b_pl} এর সংমিশ্রণ নয়।", "a_type": "plural", "b_type": "plural", "d_type": "plural"}
    ],
    "some_not": [
      {"tpl": "সব {d_sg} যারা {a_sg} তারা {b_sg} নয়।", "a_type": "singular", "b_type": "singular", "d_type": "singular"},
      {"tpl": "কমপক্ষে একটি {d_sg} আছে যা {a_sg} কিন্তু {b_sg} নয়।", "a_type": "singular", "b_type": "singular", "d_type": "singular"},
      {"tpl": "কিছু {d_pl} আছে যারা {a_pl} কিন্তু {b_pl} নয়।", "a_type": "plural", "b_type": "plural", "d_type": "plural"},
      {"tpl": "কয়েকটি {d_sg} যা {a_sg} কিন্তু {b_sg} নয়।", "a_type": "singular", "b_type": "singular", "d_type": "singular"},
      {"tpl": "আংশিক {d_pl} যা {a_pl} কিন্তু {b_pl} নয়।", "a_type": "plural", "b_type": "plural", "d_type": "plural"},
      {"tpl": "{d_pl} এর মাঝে এমন কিছু আছে যারা {a_pl} হলেও {b_pl} নয়।", "a_type": "plural", "b_type": "plural", "d_type": "plural"},
      {"tpl": "প্রতিটি {d_sg} যা {a_sg}, সেটি {b_sg} নাও হতে পারে।", "a_type": "singular", "b_type": "singular", "d_type": "singular"},
      {"tpl": "মাঝে মাঝে {d_sg} যা {a_sg}, তা {b_sg} শ্রেণীর বাইরে থাকে।", "a_type": "singular", "b_type": "singular", "d_type": "singular"},
      {"tpl": "বেশ কিছু {d_pl} আছে যারা {a_pl} কিন্তু {b_pl} শ্রেণী বহির্ভূত।", "a_type": "plural", "b_type": "plural", "d_type": "plural"},
      {"tpl": "এটি সত্যি যে সব {d_pl} যারা {a_pl}, তারা {b_pl} নয়।", "a_type": "plural", "b_type": "plural", "d_type": "plural"},
      {"tpl": "নির্দিষ্ট কিছু {d_pl} {a_pl} হয়েও {b_pl} এর বৈশিষ্ট্য ধারণ করে না।", "a_type": "plural", "b_type": "plural", "d_type": "plural"},
      {"tpl": "অল্প কিছু {d_pl} আছে যারা {a_pl} কিন্তু {b_pl} নয়।", "a_type": "plural", "b_type": "plural", "d_type": "plural"},
      {"tpl": "দেখা যায় যে কিছু {d_pl} {a_pl} হলেও মূলত {b_pl} নয়।", "a_type": "plural", "b_type": "plural", "d_type": "plural"},
      {"tpl": "কমপক্ষে কয়েকটি {d_pl} যারা {a_pl}, তারা {b_pl} এর আওতায় পড়ে না।", "a_type": "plural", "b_type": "plural", "d_type": "plural"},
      {"tpl": "কিছু {d_pl} আছে যারা {a_pl} হিসেবে পরিচিত হলেও {b_pl} নয়।", "a_type": "plural", "b_type": "plural", "d_type": "plural"}
    ]
  }
}

In [None]:
def get_singular_and_plural(term):
    term = term.strip()
    # Bengali templates use same token for compatibility
    return term, term

In [None]:
def get_article(term):
    # Bengali does not use 'a'/'an' in the same way; return term
    return term.strip()

In [None]:
def get_basic_syllogism_parts(syllogism):
    sentences = [s.strip() for s in syllogism.split('.') if s.strip()]
    return sentences

In [None]:
def parse_sentence_for_type_and_terms(sentence):
    sentence = sentence.strip()
    match = re.match(r"All (.*) that are (.*) are (.*).", sentence)
    if match: return "all", match.group(1), match.group(2), match.group(3)
    match = re.match(r"Some (.*) that are (.*) are not (.*).", sentence)
    if match: return "some_not", match.group(1), match.group(2), match.group(3)
    match = re.match(r"Some (.*) that are (.*) are (.*).", sentence)
    if match: return "some", match.group(1), match.group(2), match.group(3)
    match = re.match(r"No (.*) that are (.*) are (.*).", sentence)
    if match: return "no", match.group(1), match.group(2), match.group(3)
    match = re.match(r"All (.*) are (.*).", sentence)
    if match: return "all", None, match.group(1), match.group(2)
    match = re.match(r"Some (.*) are not (.*).", sentence)
    if match: return "some_not", None, match.group(1), match.group(2)
    match = re.match(r"Some (.*) are (.*).", sentence)
    if match: return "some", None, match.group(1), match.group(2)
    match = re.match(r"No (.*) are (.*).", sentence)
    if match: return "no", None, match.group(1), match.group(2)
    raise ValueError(f"Could not parse basic syllogism type from sentence: {sentence}")

In [None]:
def split_syllogism_into_sentences(syllogism):
    sentences = [s.strip() for s in syllogism.split('.') if s.strip()]
    if len(sentences) < 2:
        raise ValueError(f"Syllogism must contain at least one premise and one conclusion: {syllogism}")
    sentences = [s + '.' for s in sentences]
    conclusion = sentences.pop()
    premises = sentences
    return premises, conclusion

In [None]:
def create_sub_dict(term_original_subject, term_original_predicate, term_original_fourth, synonym_dict=None):
    sub_map = {}
    sg_a, pl_a = get_singular_and_plural(term_original_subject)
    sub_map['a_sg'] = sg_a
    sub_map['a_pl'] = pl_a
    sub_map['a_sg_art'] = get_article(sg_a)
    sg_b, pl_b = get_singular_and_plural(term_original_predicate)
    sub_map['b_sg'] = sg_b
    sub_map['b_pl'] = pl_b
    sub_map['b_sg_art'] = get_article(sg_b)
    if term_original_fourth:
        sg_d, pl_d = get_singular_and_plural(term_original_fourth)
        sub_map['d_sg'] = sg_d
        sub_map['d_pl'] = pl_d
        sub_map['d_sg_art'] = get_article(sg_d)
    return sub_map

In [None]:
def safe_lowercase_first(s, sub_map):
    for val in sub_map.values():
        if s.startswith(val):
            return s
    return s[:1].lower() + s[1:]

In [None]:
def convert_syllogism_to_complex(syllogism_data, complex_patterns_data, complex_patterns_data_with_fourth_var):
    simple_syllogism = syllogism_data['syllogism']
    premises_simple, conclusion_simple = split_syllogism_into_sentences(simple_syllogism)
    complex_premises = []
    for premise in premises_simple:
        p_type, p_fourth, p_subject, p_predicate = parse_sentence_for_type_and_terms(premise)
        sub_map = create_sub_dict(p_subject, p_predicate, p_fourth)
        if p_fourth:
            p_template = random.choice(complex_patterns_data_with_fourth_var['premises'][p_type])
        else:
            p_template = random.choice(complex_patterns_data['premises'][p_type])
        p_complex = p_template['tpl'].format(**sub_map)
        try:
            p_complex = p_complex[0].upper() + p_complex[1:]
        except Exception:
            pass
        complex_premises.append(p_complex)
    c_type, c_fourth, c_subject, c_predicate = parse_sentence_for_type_and_terms(conclusion_simple)
    c_sub_map = create_sub_dict(c_subject, c_predicate, c_fourth)
    if c_fourth:
        c_premise_template = random.choice(complex_patterns_data_with_fourth_var['premises'][c_type])
    else:
        c_premise_template = random.choice(complex_patterns_data['premises'][c_type])
    c_premise_complex = c_premise_template['tpl'].format(**c_sub_map)
    c_premise_complex = safe_lowercase_first(c_premise_complex, c_sub_map)
    c_wrapper_template = random.choice(complex_patterns_data['conclusions'])
    c_complex = c_wrapper_template.format(conclusion=c_premise_complex)
    c_complex = c_complex[:-1]
    try:
        c_complex = c_complex[0].upper() + c_complex[1:]
    except Exception:
        pass
    final_complex_syllogism = " ".join(complex_premises) + " " + c_complex
    new_data = syllogism_data.copy()
    new_data['syllogism'] = final_complex_syllogism
    return new_data

In [None]:
def process_dataset(input_data, complex_patterns_data, complex_patterns_data_with_fourth_var):
    complex_dataset = []
    for item in input_data:
        try:
            complex_item = convert_syllogism_to_complex(item, complex_patterns_data, complex_patterns_data_with_fourth_var)
            complex_dataset.append(complex_item)
        except ValueError as e:
            print(f" Skipping item ID {item.get('id', 'N/A')} due to parsing error: {e}")
            continue
    return complex_dataset

In [None]:
# Example Data for Testing (input remains in symbolic/simple English form)
sample_data = [
  {"id": "bn-1", "syllogism": "All animals that are non-aquatic are fish. Some mammals are non-aquatic. Some mammals are not fish.", "validity": True, "plausibility": True},
  {"id": "bn-2", "syllogism": "All υ are Π. All Π are Ε. Some Ε are υ.", "validity": True, "plausibility": True},
  {"id": "bn-3", "syllogism": "No fish are mammals. Some mammals are whales. Some whales are not fish.", "validity": True, "plausibility": True},
]
complex_syllogism_data = process_dataset(sample_data, COMPLEX_PATTERNS, COMPLEX_PATTERNS_WITH_FOURTH_VAR)
print("
--- রূপান্তরিত জটিল সিলোজিজম (Bengali) ---")
print(json.dumps(complex_syllogism_data, indent=4, ensure_ascii=False))