In [10]:
import nltk

nltk.download("punkt")
nltk.download("punkt_tab")
nltk.download("averaged_perceptron_tagger_eng")

from nltk.tokenize import word_tokenize
from nltk import pos_tag

A = "Heavier bodies fall faster."
B = "All bodies fall at the same speed."

tokens_A = word_tokenize(A)   # inglés por defecto
tokens_B = word_tokenize(B)

tags_A = pos_tag(tokens_A)    # usa eng por defecto
tags_B = pos_tag(tokens_B)

print("A:", tags_A)
print("B:", tags_B)


A: [('Heavier', 'NNP'), ('bodies', 'NNS'), ('fall', 'VBP'), ('faster', 'RB'), ('.', '.')]
B: [('All', 'DT'), ('bodies', 'NNS'), ('fall', 'VBP'), ('at', 'IN'), ('the', 'DT'), ('same', 'JJ'), ('speed', 'NN'), ('.', '.')]


[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\Biempi01\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package punkt_tab to
[nltk_data]     C:\Users\Biempi01\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt_tab is already up-to-date!
[nltk_data] Downloading package averaged_perceptron_tagger_eng to
[nltk_data]     C:\Users\Biempi01\AppData\Roaming\nltk_data...
[nltk_data]   Package averaged_perceptron_tagger_eng is already up-to-
[nltk_data]       date!


In [13]:
# Paquete Natural language toolkit en https://www.nltk.org/
# En GitHub https://github.com/nltk/nltk

import nltk
sentence = """Heavier bodies fall faster"""
tokens = nltk.word_tokenize(sentence)
tokens

['Heavier', 'bodies', 'fall', 'faster']

In [14]:
import nltk

nltk.download("punkt")
nltk.download("punkt_tab")
nltk.download("averaged_perceptron_tagger_eng")
nltk.download("stopwords")


[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\Biempi01\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package punkt_tab to
[nltk_data]     C:\Users\Biempi01\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt_tab is already up-to-date!
[nltk_data] Downloading package averaged_perceptron_tagger_eng to
[nltk_data]     C:\Users\Biempi01\AppData\Roaming\nltk_data...
[nltk_data]   Package averaged_perceptron_tagger_eng is already up-to-
[nltk_data]       date!
[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\Biempi01\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


True

In [15]:
from nltk.tokenize import sent_tokenize, word_tokenize

print(sent_tokenize(A))
print(word_tokenize(A))

print(sent_tokenize(B))
print(word_tokenize(B))


['Heavier bodies fall faster.']
['Heavier', 'bodies', 'fall', 'faster', '.']
['All bodies fall at the same speed.']
['All', 'bodies', 'fall', 'at', 'the', 'same', 'speed', '.']


In [16]:
from nltk import pos_tag

print(pos_tag(word_tokenize(A)))
print(pos_tag(word_tokenize(B)))


[('Heavier', 'NNP'), ('bodies', 'NNS'), ('fall', 'VBP'), ('faster', 'RB'), ('.', '.')]
[('All', 'DT'), ('bodies', 'NNS'), ('fall', 'VBP'), ('at', 'IN'), ('the', 'DT'), ('same', 'JJ'), ('speed', 'NN'), ('.', '.')]


In [17]:
from nltk.corpus import stopwords

sw = set(stopwords.words("english"))

def remove_stopwords(text):
    return [w for w in word_tokenize(text.lower()) if w.isalpha() and w not in sw]

print(remove_stopwords(A))
print(remove_stopwords(B))


['heavier', 'bodies', 'fall', 'faster']
['bodies', 'fall', 'speed']


In [18]:
from nltk.stem import PorterStemmer

stemmer = PorterStemmer()

def stem(text):
    return [stemmer.stem(w) for w in remove_stopwords(text)]

print(stem(A))
print(stem(B))


['heavier', 'bodi', 'fall', 'faster']
['bodi', 'fall', 'speed']


In [19]:
from nltk.stem import WordNetLemmatizer
nltk.download("wordnet")
nltk.download("omw-1.4")

lemmatizer = WordNetLemmatizer()

def lemmatize(text):
    return [lemmatizer.lemmatize(w) for w in remove_stopwords(text)]

print(lemmatize(A))
print(lemmatize(B))


[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\Biempi01\AppData\Roaming\nltk_data...
[nltk_data] Downloading package omw-1.4 to
[nltk_data]     C:\Users\Biempi01\AppData\Roaming\nltk_data...


['heavier', 'body', 'fall', 'faster']
['body', 'fall', 'speed']


In [20]:
from nltk import FreqDist

print(FreqDist(stem(A)))
print(FreqDist(stem(B)))


<FreqDist with 4 samples and 4 outcomes>
<FreqDist with 3 samples and 3 outcomes>


In [21]:
from nltk import bigrams, trigrams

print(list(bigrams(word_tokenize(A))))
print(list(trigrams(word_tokenize(A))))


[('Heavier', 'bodies'), ('bodies', 'fall'), ('fall', 'faster'), ('faster', '.')]
[('Heavier', 'bodies', 'fall'), ('bodies', 'fall', 'faster'), ('fall', 'faster', '.')]


In [22]:
grammar = r"""
NP: {<DT>?<JJ.*>*<NN.*>+}
VP: {<VB.*><RB.*>*}
"""

from nltk import RegexpParser

chunker = RegexpParser(grammar)

tree_A = chunker.parse(pos_tag(word_tokenize(A)))
tree_B = chunker.parse(pos_tag(word_tokenize(B)))

print(tree_A)
print(tree_B)


(S (NP Heavier/NNP bodies/NNS) (VP fall/VBP faster/RB) ./.)
(S
  (NP All/DT bodies/NNS)
  (VP fall/VBP)
  at/IN
  (NP the/DT same/JJ speed/NN)
  ./.)


In [24]:
from nltk.corpus import wordnet as wn

def synsets(word):
    return wn.synsets(word)

print(synsets("heavier"))
print(synsets("speed"))


[Synset('heavy.a.01'), Synset('heavy.a.02'), Synset('heavy.a.03'), Synset('heavy.a.04'), Synset('fleshy.s.01'), Synset('clayey.s.02'), Synset('heavy.s.07'), Synset('heavy.a.08'), Synset('heavy.a.09'), Synset('heavy.s.10'), Synset('dense.s.01'), Synset('heavy.s.12'), Synset('heavy.s.13'), Synset('big.s.06'), Synset('heavy.s.15'), Synset('intemperate.s.03'), Synset('grave.s.03'), Synset('heavy.s.18'), Synset('heavy.s.19'), Synset('heavy.s.20'), Synset('heavy.s.21'), Synset('heavy.s.22'), Synset('heavy.s.23'), Synset('arduous.s.01'), Synset('heavy.s.25'), Synset('heavy.s.26'), Synset('big.s.13')]
[Synset('speed.n.01'), Synset('speed.n.02'), Synset('speed.n.03'), Synset('focal_ratio.n.01'), Synset('amphetamine.n.01'), Synset('rush.v.01'), Synset('accelerate.v.01'), Synset('travel_rapidly.v.01'), Synset('speed.v.04'), Synset('accelerate.v.02')]


In [25]:
def jaccard(a, b):
    return len(set(a) & set(b)) / len(set(a) | set(b))

print(jaccard(stem(A), stem(B)))


0.4


In [26]:
def classify(sentence):
    tags = pos_tag(word_tokenize(sentence))
    if any(tag in ("JJR", "RBR") for _, tag in tags):
        return "COMPARATIVE_CLAIM"
    if "all" in sentence.lower() and "same" in sentence.lower():
        return "UNIVERSAL_EQUALITY"
    return "OTHER"

print(classify(A))
print(classify(B))


OTHER
UNIVERSAL_EQUALITY


In [28]:

import nltk

def ensure(resource_path: str, package: str):
    try:
        nltk.data.find(resource_path)
        return
    except LookupError:
        print(f"[INFO] Instalando: {package}")
        nltk.download(package)
        nltk.data.find(resource_path)  # si falla aquí, te enteras inmediatamente

# Requisitos para tu pipeline (por tu historial de errores)
ensure("tokenizers/punkt", "punkt")
ensure("tokenizers/punkt_tab/english/", "punkt_tab")
ensure("taggers/averaged_perceptron_tagger_eng/", "averaged_perceptron_tagger_eng")

# Requisitos para NER chunking en tu NLTK
ensure("chunkers/maxent_ne_chunker_tab/english_ace_multiclass/", "maxent_ne_chunker_tab")
ensure("corpora/words", "words")

from nltk import ne_chunk, pos_tag
from nltk.tokenize import word_tokenize

A = "Heavier bodies fall faster."
B = "All bodies fall at the same speed."

print(ne_chunk(pos_tag(word_tokenize(A))))
print(ne_chunk(pos_tag(word_tokenize(B))))


[INFO] Instalando: maxent_ne_chunker_tab


[nltk_data] Downloading package maxent_ne_chunker_tab to
[nltk_data]     C:\Users\Biempi01\AppData\Roaming\nltk_data...
[nltk_data]   Unzipping chunkers\maxent_ne_chunker_tab.zip.


(S (GPE Heavier/NNP) bodies/NNS fall/VBP faster/RB ./.)
(S All/DT bodies/NNS fall/VBP at/IN the/DT same/JJ speed/NN ./.)


In [29]:
# Ejemplos de introducción de heurísticas

def physical_heuristic(sentence: str):
    s = sentence.lower()

    if "heavier" in s and "faster" in s:
        return {
            "status": "PHYSICALLY_SUSPECT",
            "reason": "Assumes mass alone determines falling speed.",
            "model": "Contradicted by classical mechanics."
        }

    if "all bodies" in s and "same speed" in s:
        return {
            "status": "PHYSICALLY_INCOMPLETE",
            "reason": "True only under restricted conditions (vacuum).",
            "model": "Condition-dependent truth."
        }

    return {
        "status": "UNKNOWN",
        "reason": "No heuristic rule matched."
    }

A = "Heavier bodies fall faster."
B = "All bodies fall at the same speed."

print("A:", physical_heuristic(A))
print("B:", physical_heuristic(B))

# La heurística no decide la verdad, decide la consistencia con un modelo y grado de especificación




A: {'status': 'PHYSICALLY_SUSPECT', 'reason': 'Assumes mass alone determines falling speed.', 'model': 'Contradicted by classical mechanics.'}
B: {'status': 'PHYSICALLY_INCOMPLETE', 'reason': 'True only under restricted conditions (vacuum).', 'model': 'Condition-dependent truth.'}


In [32]:
#Exaple introcuction heuristics with a decission tre (no semantics, LLMs included)

import re

A = "Heavier bodies fall faster."
B = "All bodies fall at the same speed."

def detect_claim_type(s: str) -> str:
    s = s.lower().strip()
    if re.search(r"\bheavier\b", s) and re.search(r"\bfaster\b", s) and re.search(r"\bfall\b", s):
        return "MASS_IMPLIES_FASTER"
    if re.search(r"\ball\b", s) and re.search(r"\bsame speed\b", s) and re.search(r"\bfall\b", s):
        return "UNIVERSAL_SAME_SPEED"
    return "UNKNOWN"

def needs_context(claim_type: str) -> bool:
    # Ambas suelen necesitar condiciones para evaluarlas "respecto a la realidad"
    return claim_type in {"MASS_IMPLIES_FASTER", "UNIVERSAL_SAME_SPEED"}

def ask_questions(claim_type: str):
    """
    Devuelve una lista de preguntas mínimas para desambiguar.
    """
    qs = []
    # Pregunta 1: medio (vacío vs aire) es la más determinante
    qs.append("¿Estamos en vacío (vacuum) o en aire (air)? Responde: vacuum/air")
    # Pregunta 2: si hay aire, la forma/área importa por arrastre
    qs.append("¿Los cuerpos tienen la misma forma y área frontal? Responde: same_shape/different_shape")
    # Pregunta 3: si quieres, precisión adicional
    qs.append("¿Comparas 'velocidad' instantánea o 'aceleración' (acceleration)? Responde: speed/acceleration")
    return qs

def evaluate_with_context(sentence: str, ctx: dict):
    """
    Dictamen condicional basado en un modelo físico simplificado:
    - En vacío: misma aceleración g (ignorando relatividad).
    - En aire: depende fuertemente del arrastre (forma/área), no solo masa.
    """
    claim_type = detect_claim_type(sentence)

    medium = ctx.get("medium")              # 'vacuum'/'air'
    shape = ctx.get("shape")                # 'same_shape'/'different_shape'
    measure = ctx.get("measure")            # 'speed'/'acceleration'

    # Si falta algo, no inventamos.
    missing = [k for k in ("medium", "shape", "measure") if ctx.get(k) is None]
    if missing:
        return {
            "status": "NEED_MORE_CONTEXT",
            "missing": missing,
            "note": "No se puede evaluar sin fijar condiciones."
        }

    # Evaluación
    if claim_type == "MASS_IMPLIES_FASTER":
        if medium == "vacuum":
            return {
                "status": "FALSE_UNDER_CTX",
                "because": "En vacío, la aceleración no depende de la masa.",
                "detail": "Bajo el modelo clásico (Galileo/Newton), masa ≠ caída más rápida en vacío."
            }
        if medium == "air":
            # En aire, puede ocurrir que un objeto más pesado caiga más rápido,
            # pero NO por ser más pesado en sí, sino por la relación peso/arrastre y la forma.
            if shape == "same_shape":
                return {
                    "status": "CONDITIONALLY_PLAUSIBLE_BUT_MISATTRIBUTED",
                    "because": "En aire, con misma forma, el más pesado puede alcanzar mayor velocidad terminal.",
                    "detail": "Pero la causa relevante es el arrastre (drag) y la relación peso/área, no 'masa por sí sola'."
                }
            else:
                return {
                    "status": "UNDETERMINED_UNDER_CTX",
                    "because": "Con formas distintas, el arrastre domina; la masa por sí sola no predice el resultado.",
                    "detail": "Necesitarías parámetros de drag/área/densidad para decidir."
                }

    if claim_type == "UNIVERSAL_SAME_SPEED":
        if medium == "vacuum":
            # “same speed” es menos correcto que “same acceleration”
            if measure == "acceleration":
                return {
                    "status": "TRUE_UNDER_CTX",
                    "because": "En vacío, todos comparten la misma aceleración g.",
                    "detail": "Es la formulación correcta del principio galileano."
                }
            else:
                return {
                    "status": "MISSTATED_BUT_RELATED",
                    "because": "En vacío, lo igual es la aceleración, no la velocidad en todo instante.",
                    "detail": "La velocidad depende del tiempo y condiciones iniciales; con mismas condiciones iniciales, sí coinciden."
                }
        if medium == "air":
            return {
                "status": "FALSE_UNDER_CTX",
                "because": "En aire, distintas masas/formas tienen distinta velocidad terminal y dinámica por arrastre.",
                "detail": "La igualdad universal de velocidad no se sostiene sin restricciones fuertes."
            }

    return {
        "status": "UNKNOWN",
        "because": "La frase no coincide con patrones del agente.",
        "claim_type": claim_type
    }

def run_agent(sentence: str):
    claim_type = detect_claim_type(sentence)
    print(f"Frase: {sentence}")
    print(f"Tipo detectado: {claim_type}")

    if not needs_context(claim_type):
        print("No tengo reglas para esta frase.")
        return

    # Preguntas mínimas
    ctx = {"medium": None, "shape": None, "measure": None}
    for q in ask_questions(claim_type):
        ans = input(q + "\n> ").strip().lower()
        if ans in ("vacuum", "air"):
            ctx["medium"] = ans
        elif ans in ("same_shape", "different_shape"):
            ctx["shape"] = ans
        elif ans in ("speed", "acceleration"):
            ctx["measure"] = ans
        else:
            print("Respuesta no reconocida. Dejo ese campo como desconocido.")

    result = evaluate_with_context(sentence, ctx)
    print("\nDICTAMEN:")
    for k, v in result.items():
        print(f"- {k}: {v}")

# Ejemplos de ejecución:
# run_agent(A)
# run_agent(B)


In [31]:
run_agent(A)

Frase: Heavier bodies fall faster.
Tipo detectado: MASS_IMPLIES_FASTER


¿Estamos en vacío (vacuum) o en aire (air)? Responde: vacuum/air
>  air
¿Los cuerpos tienen la misma forma y área frontal? Responde: same_shape/different_shape
>  different_shape
¿Comparas 'velocidad' instantánea o 'aceleración' (acceleration)? Responde: speed/acceleration
>  speed



DICTAMEN:
- status: UNDETERMINED_UNDER_CTX
- because: Con formas distintas, el arrastre domina; la masa por sí sola no predice el resultado.
- detail: Necesitarías parámetros de drag/área/densidad para decidir.


In [33]:
pip install sentence-transformers


Note: you may need to restart the kernel to use updated packages.


In [1]:
from sentence_transformers import SentenceTransformer, util

A = "Heavier bodies fall faster."
B = "All bodies fall at the same speed."

# Mini “base de conocimiento” (plantillas)
KB = [
    ("VACUUM_EQUAL_ACCEL", "In a vacuum, all objects fall with the same acceleration (ignoring relativity)."),
    ("AIR_DRAG_MATTERS", "In air, drag depends on shape and area; falling speed is not determined by mass alone."),
    ("SPEED_VS_ACCEL", "Equal acceleration is not the same as equal speed at all times unless initial conditions match."),
    ("ARISTOTLE_STYLE", "Heavier objects fall faster (an Aristotelian claim)."),
]

model = SentenceTransformer("all-MiniLM-L6-v2")

kb_texts = [t for _, t in KB]
kb_emb = model.encode(kb_texts, convert_to_tensor=True)

def analyze(sentence: str, topk=3):
    emb = model.encode(sentence, convert_to_tensor=True)
    sims = util.cos_sim(emb, kb_emb)[0]
    ranked = sorted(list(enumerate(sims)), key=lambda x: float(x[1]), reverse=True)[:topk]
    hits = [(KB[i][0], KB[i][1], float(score)) for i, score in ranked]

    # Heurística mínima basada en hits (pero guiada por embeddings)
    labels = [h[0] for h in hits]
    questions = []
    if "VACUUM_EQUAL_ACCEL" in labels or "AIR_DRAG_MATTERS" in labels:
        questions.append("Context needed: vacuum or air?")
    if "SPEED_VS_ACCEL" in labels:
        questions.append("Clarify: are you claiming same speed or same acceleration?")

    return {
        "sentence": sentence,
        "top_matches": hits,
        "questions": questions
    }

print(analyze(A))
print(analyze(B))


{'sentence': 'Heavier bodies fall faster.', 'top_matches': [('ARISTOTLE_STYLE', 'Heavier objects fall faster (an Aristotelian claim).', 0.7986726760864258), ('VACUUM_EQUAL_ACCEL', 'In a vacuum, all objects fall with the same acceleration (ignoring relativity).', 0.5380131006240845), ('AIR_DRAG_MATTERS', 'In air, drag depends on shape and area; falling speed is not determined by mass alone.', 0.5199270248413086)], 'questions': ['Context needed: vacuum or air?']}
{'sentence': 'All bodies fall at the same speed.', 'top_matches': [('VACUUM_EQUAL_ACCEL', 'In a vacuum, all objects fall with the same acceleration (ignoring relativity).', 0.7063049077987671), ('ARISTOTLE_STYLE', 'Heavier objects fall faster (an Aristotelian claim).', 0.6049437522888184), ('AIR_DRAG_MATTERS', 'In air, drag depends on shape and area; falling speed is not determined by mass alone.', 0.5472099184989929)], 'questions': ['Context needed: vacuum or air?']}
