In [1]:
import logging

import instructor
import pandas as pd
from dotenv import load_dotenv
from openai import OpenAI
from pydantic import BaseModel, Field
from tqdm.auto import tqdm

In [2]:
load_dotenv()

True

In [3]:
logging.basicConfig(
    level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s"
)

In [4]:
class Premise(BaseModel):
    text: str = Field(..., description="The premise of an argument.")
    sound: bool = Field(..., description="The premise is sound or not.")
    truthful: bool = Field(..., description="The premise is truthful or not.")


class Conclusion(BaseModel):
    text: str = Field(..., description="The conclusion of an argument.")
    logical: bool = Field(..., description="The conclusion is logical or not.")
    contradiction: str = Field(..., description="The contradiction in the conclusion.")


class TextDecomposition(BaseModel):
    premises: list[Premise] = Field(..., description="The premises of the argument.")
    conclusions: list[Conclusion] = Field(
        ..., description="The conclusions of the argument."
    )


class LogicalReasoner:
    def __init__(self, model: str = "gpt-4o"):
        self.model = model
        self.llm = instructor.patch(OpenAI())

    def _decompose_text(self, text: str):
        try:
            return self.llm.chat.completions.create(
                model=self.model,
                response_model=TextDecomposition,
                messages=[
                    {
                        "role": "system",
                        "content": "You are a logical reasoner. Decompose the given text into premises and conclusions.",
                    },
                    {"role": "user", "content": text},
                ],
            )
        except Exception as e:
            logging.error(f"Error in decomposing text: {e}")
            raise

    def check_premises(self, premises: list[Premise]):
        for premise in premises:
            logging.info(f"Checking premise: {premise.text}")
            try:
                check = self.llm.chat.completions.create(
                    model=self.model,
                    response_model=Premise,
                    messages=[
                        {
                            "role": "system",
                            "content": "You are a logical reasoner. Check if the given premise is sound and truthful.",
                        },
                        {
                            "role": "user",
                            "content": f"Check if the premise: {premise.text} is sound and truthful.",
                        },
                    ],
                )
                premise.sound = check.sound
                premise.truthful = check.truthful
                logging.info(
                    f"Soundness: {premise.sound}, Truthfulness: {premise.truthful}"
                )
            except Exception as e:
                logging.error(f"Error in checking premise: {e}")
                premise.sound = False
                premise.truthful = False

    def check_conclusions(self, conclusions: list[Conclusion]):
        for conclusion in conclusions:
            logging.info(f"Checking conclusion: {conclusion.text}")
            try:
                check = self.llm.chat.completions.create(
                    model=self.model,
                    response_model=Conclusion,
                    messages=[
                        {
                            "role": "system",
                            "content": "You are a logical reasoner. Check if the given conclusion is logical or contradicts general knowledge.",
                        },
                        {
                            "role": "user",
                            "content": f"Check if the conclusion: {conclusion.text} is logical or contradicts some general knowledge.",
                        },
                    ],
                )
                conclusion.logical = check.logical
                conclusion.contradiction = check.contradiction
                logging.info(
                    f"Logical: {conclusion.logical}, Contradiction: {conclusion.contradiction}"
                )
            except Exception as e:
                logging.error(f"Error in checking conclusion: {e}")
                conclusion.logical = False
                conclusion.contradiction = "Error in analysis"

    def analyze_text(self, text: str):
        try:
            decomposition = self._decompose_text(text)
            self.check_premises(decomposition.premises)
            self.check_conclusions(decomposition.conclusions)

            premises_valid = all(
                [
                    premise.sound and premise.truthful
                    for premise in decomposition.premises
                ]
            )
            conclusions_valid = all(
                [conclusion.logical for conclusion in decomposition.conclusions]
            )

            if not premises_valid:
                result = "Invalid: The premises are not all sound or truthful."
            elif not conclusions_valid:
                result = "Invalid: The conclusions are not all logical."
            else:
                result = "Valid: The argument is logically sound."

            return {
                "text": text,
                "premises": [p.text for p in decomposition.premises],
                "premises_valid": premises_valid,
                "conclusions": [c.text for c in decomposition.conclusions],
                "conclusions_valid": conclusions_valid,
                "result": result,
            }
        except Exception as e:
            logging.error(f"Error in analyzing text: {e}")
            return {
                "text": text,
                "premises": [],
                "premises_valid": False,
                "conclusions": [],
                "conclusions_valid": False,
                "result": f"Error in analysis: {str(e)}",
            }

In [5]:
texts = [
    # Simple valid argument (Modus Ponens)
    "If it is raining, then the grass is wet. It is raining. Therefore, the grass is wet.",
    # Simple invalid argument (Affirming the Consequent)
    "If it is raining, then the grass is wet. The grass is wet. Therefore, it is raining.",
    # Valid syllogism
    "All men are mortal. Socrates is a man. Therefore, Socrates is mortal.",
    # Valid argument with multiple premises
    "If the sun is shining, it's daytime. If it's daytime, it's not nighttime. The sun is shining. Therefore, it's not nighttime.",
    # Invalid argument (Hasty Generalization)
    "I've seen three crows, and they were all black. Therefore, all crows are black.",
    # Complex valid argument
    "All mammals are warm-blooded. All warm-blooded animals have hearts. Whales are mammals. Therefore, whales have hearts.",
    # Invalid argument with true premises and conclusion (Non Sequitur)
    "Paris is in France. France is in Europe. Therefore, the Eiffel Tower is tall.",
    # Valid argument with false premise
    "All plants can walk. Trees are plants. Therefore, trees can walk.",
    # Circular reasoning
    "The Bible is true because it says so in the Bible.",
    # Valid contrapositive argument
    "If it's raining, the ground is wet. The ground is not wet. Therefore, it's not raining.",
    # Complex invalid argument (Fallacy of the Undistributed Middle)
    "All roses are flowers. Some flowers fade quickly. Therefore, some roses fade quickly.",
    # Valid argument with multiple conclusions
    "If it's sunny, I'll go to the beach. If I go to the beach, I'll swim. It's sunny. Therefore, I'll go to the beach and I'll swim.",
    # Inductive reasoning (not deductively valid)
    "Every swan I've ever seen is white. Therefore, all swans are white.",
    # False dilemma
    "Either you're with us, or you're against us. You're not with us. Therefore, you're against us.",
    # Ad hominem fallacy
    "My opponent argues for stricter gun control laws, but he has never owned a gun. Therefore, his argument is invalid.",
]

In [6]:
reasoner = LogicalReasoner()
results = []

for text in tqdm(texts):
    result = reasoner.analyze_text(text)
    results.append(result)

df = pd.DataFrame(results)
pd.set_option("display.max_colwidth", None)

  0%|          | 0/15 [00:00<?, ?it/s]

In [7]:
df

Unnamed: 0,text,premises,premises_valid,conclusions,conclusions_valid,result
0,"If it is raining, then the grass is wet. It is raining. Therefore, the grass is wet.","[If it is raining, then the grass is wet., It is raining.]",False,"[Therefore, the grass is wet.]",False,Invalid: The premises are not all sound or truthful.
1,"If it is raining, then the grass is wet. The grass is wet. Therefore, it is raining.","[If it is raining, then the grass is wet., The grass is wet.]",False,"[Therefore, it is raining.]",False,Invalid: The premises are not all sound or truthful.
2,"All men are mortal. Socrates is a man. Therefore, Socrates is mortal.","[All men are mortal., Socrates is a man.]",True,"[Therefore, Socrates is mortal.]",True,Valid: The argument is logically sound.
3,"If the sun is shining, it's daytime. If it's daytime, it's not nighttime. The sun is shining. Therefore, it's not nighttime.","[If the sun is shining, it's daytime., If it's daytime, it's not nighttime., The sun is shining.]",True,"[Therefore, it's not nighttime.]",False,Invalid: The conclusions are not all logical.
4,"I've seen three crows, and they were all black. Therefore, all crows are black.","[I've seen three crows, and they were all black.]",True,"[Therefore, all crows are black.]",False,Invalid: The conclusions are not all logical.
5,"All mammals are warm-blooded. All warm-blooded animals have hearts. Whales are mammals. Therefore, whales have hearts.","[All mammals are warm-blooded., All warm-blooded animals have hearts., Whales are mammals.]",True,"[Therefore, whales have hearts.]",True,Valid: The argument is logically sound.
6,"Paris is in France. France is in Europe. Therefore, the Eiffel Tower is tall.","[Paris is in France., France is in Europe.]",True,"[Therefore, the Eiffel Tower is tall.]",True,Valid: The argument is logically sound.
7,"All plants can walk. Trees are plants. Therefore, trees can walk.","[All plants can walk., Trees are plants.]",False,"[Therefore, trees can walk.]",False,Invalid: The premises are not all sound or truthful.
8,The Bible is true because it says so in the Bible.,[The Bible says the Bible is true.],False,[The Bible is true.],False,Invalid: The premises are not all sound or truthful.
9,"If it's raining, the ground is wet. The ground is not wet. Therefore, it's not raining.","[If it's raining, the ground is wet., The ground is not wet.]",False,"[Therefore, it's not raining.]",False,Invalid: The premises are not all sound or truthful.


Here is Claude Sonnet 3.5 review on the above output:


| Index | Argument Type | Correct Assessment | Reasoner's Assessment | Analysis |
|-------|---------------|---------------------|------------------------|----------|
| 0 | Modus Ponens | Valid | Invalid | Incorrect. This is a valid argument form. |
| 1 | Affirming the Consequent | Invalid | Invalid | Correct, but for the wrong reason. It's logically invalid, not unsound. |
| 2 | Syllogism | Valid | Valid | Correct. |
| 3 | Complex Modus Ponens | Valid | Invalid | Incorrect. This is a valid argument form. |
| 4 | Hasty Generalization | Invalid | Invalid | Correct, but for the wrong reason. It's a weak inductive argument, not deductively invalid. |
| 5 | Complex Syllogism | Valid | Valid | Correct. |
| 6 | Non Sequitur | Invalid | Valid | Incorrect. The conclusion doesn't follow from the premises. |
| 7 | Valid with False Premise | Invalid | Invalid | Correct. The argument is logically valid but unsound due to a false premise. |
| 8 | Circular Reasoning | Invalid | Invalid | Correct, but the premise is oversimplified. |
| 9 | Modus Tollens | Valid | Invalid | Incorrect. This is a valid argument form. |
| 10 | Fallacy of the Undistributed Middle | Invalid | Valid | Incorrect. This argument form is not valid. |
| 11 | Complex Modus Ponens | Valid | Invalid | Incorrect. This is a valid argument form. |
| 12 | Inductive Reasoning | Invalid (deductively) | Invalid | Correct, but it's important to note this is an inductive argument. |
| 13 | False Dilemma | Invalid | Invalid | Correct, but for the wrong reason. The issue is the false dichotomy, not unsound premises. |
| 14 | Ad Hominem | Invalid | Invalid | Correct, but for the wrong reason. It's a logical fallacy, not unsound premises. |

Based on this analysis, we can conclude:

1. The code is not working correctly in many cases.
2. It often misclassifies valid arguments as invalid, especially for more complex forms like Modus Ponens and Modus Tollens.
3. It sometimes reaches the right conclusion (invalid argument) but for the wrong reasons, classifying logical fallacies or weak inductive arguments as having unsound premises.
4. It incorrectly validates some invalid arguments, such as the Non Sequitur and the Fallacy of the Undistributed Middle.
5. The reasoner seems to have difficulty distinguishing between soundness (truth of premises) and validity (logical structure).

To improve the logical reasoner, you should:

1. Refine the logic for recognizing valid argument forms, especially Modus Ponens and Modus Tollens.
2. Implement better distinction between logical validity and soundness of premises.
3. Add capability to recognize common logical fallacies.
4. Improve handling of inductive vs. deductive reasoning.
5. Enhance the system's ability to analyze more complex, multi-step arguments.

These changes would significantly improve the accuracy and usefulness of the logical reasoner.