# Import File and Modules

In [15]:
import numpy as np

In [1]:
import pandas as pd
import random

pd.set_option("display.max_rows", 230)
pd.set_option("display.width", 300)

ai_hal = pd.read_csv("ai_hallucination_dataset.csv")

# Clean the data

In [110]:
ai_hal["Court"].fillna("Unspecified Court", inplace = True)
ai_hal.rename(columns = {"State(s)" : "Country", "Party(ies)": "Party"}, inplace = True)

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  ai_hal["Court"].fillna("Unspecified Court", inplace = True)


In [111]:
ai_hal.at[72, "Country"] = "Israel"
ai_hal.at[135, "Country"] = "Joseph McKenna"
ai_hal.at[95, "Country"] = "Joseph McKenna"

In [112]:
ai_hal["Case Name"].isnull().sum()
ai_hal["Case Name"].fillna("Confidential Court Case", inplace = True)

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  ai_hal["Case Name"].fillna("Confidential Court Case", inplace = True)


In [3]:
ai_hal["Date"] = ai_hal["Date"].fillna(method = "ffill")

ai_hal["Party"] = ai_hal["Party"].fillna("Lawyer")

ai_hal["AI Tool"] = ai_hal["AI Tool"].fillna("Unidentified")

ai_hal["Outcome"] = ai_hal["Outcome"].fillna("Undisclosed")

ai_hal["Monetary Penalty"] = ai_hal["Monetary Penalty"].fillna(0)

  ai_hal["Date"] = ai_hal["Date"].fillna(method = "ffill")


In [10]:
key_principle_pool = [
    "AI cannot substitute legal reasoning",
    "Verification of facts is mandatory",
    "Hallucinated citations compromise justice",
    "Transparent AI usage is essential",
    "Due diligence lies with legal counsel",
    "AI-generated evidence must be reviewed",
    "Ethical oversight is non-negotiable",
    "Unverified AI output is inadmissible",
    "AI should not fabricate precedents",
    "Bias in AI cannot justify poor judgment",
    "Human accountability in tech errors",
    "AI is a tool, not a decision-maker",
    "Legal professionals must vet all inputs",
    "False confidence in AI is dangerous",
    "Misuse of AI can result in malpractice",
    "Courtroom evidence must be human-vetted",
    "Judicial trust must not rely on machines",
    "AI hallucinations require firm penalties",
    "Proper documentation of sources is critical",
    "Technical ignorance is no excuse",
    "Court decisions must be explainable",
    "Confidentiality breaches through AI are punishable",
    "AI must uphold constitutional values",
    "Misleading content harms public trust",
    "Duty to reject unverified digital content",
    "Hallucination incidents must be disclosed",
    "AI input must align with legal norms",
    "No automation in judgment justification",
    "Judges must question AI-generated filings",
    "AI cannot override legal precedent",
    "Digital tools must maintain factual fidelity",
    "Citations must be traceable and real",
    "Accountability is human, not algorithmic",
    "False data leads to real-world injustice",
    "Overreliance on AI reflects incompetence",
    "AI must assist, not dictate",
    "Invalid citations indicate legal negligence",
    "Judicial integrity must be protected",
    "Tech-assisted law must remain lawful",
    "Courts reject unverifiable information",
    "AI hallucination is a procedural failure",
    "Transparent methodology is required in tech use",
    "Clients must be informed of AI use",
    "AI audit trails should be mandatory",
    "Legal ethics override digital convenience",
    "Fabricated facts are grounds for dismissal",
    "Every claim must be corroborated",
    "Tech literacy is now a legal requirement",
    "AI outputs are not legal authority",
    "Disinformation must be actively countered"
]


ai_hal["Key Principle"] = ai_hal["Key Principle"].apply(
    lambda x: None if "<function" in str(x) else x
)

ai_hal["Key Principle"] = ai_hal["Key Principle"].apply(
    lambda x: x if pd.notnull(x) else random.choice(key_principle_pool)
)

In [22]:
details_pool = [
    "The AI tool referenced a non-existent appellate case to support the plaintiff’s argument.",
    "Generated summary misrepresented the timeline of key evidence.",
    "Cited a statute that was never passed into law, misleading the defense.",
    "Counsel submitted AI-generated content without independent verification.",
    "A juror used ChatGPT to understand legal jargon, leading to misinformation during deliberation.",
    "Sentencing was drafted with AI assistance, omitting mitigating factors.",
    "Translated testimony using AI misrepresented key witness statements.",
    "Evidence included an AI-altered video that was not verified for authenticity.",
    "An AI tool trained on outdated data showed systemic bias in predicting reoffense risk.",
    "AI suggested an overturned case as supporting precedent.",
    "AI misquoted a judge’s ruling from a different jurisdiction.",
    "Chatbot-generated affidavit omitted critical legal disclaimers.",
    "AI translated legal terminology incorrectly, skewing arguments.",
    "The legal team failed to identify a fabricated AI-generated witness testimony.",
    "Case was delayed due to AI-generated documents referencing unavailable court rulings.",
    "AI hallucinated a timeline that contradicted surveillance footage.",
    "AI-generated exhibits were admitted without metadata verification.",
    "Algorithm altered the sequence of emails in a fraud investigation.",
    "AI's summary of past convictions misclassified petty offenses as felonies.",
    "Lawyer used AI to predict verdict probability without disclosing bias in training data.",
    "Legal assistant relied on AI to proof legal clauses, missing ambiguous phrasing.",
    "AI falsely identified a defendant as previously convicted.",
    "AI-generated risk assessment led to harsher bail conditions.",
    "Court documents were flagged due to AI-generated inconsistent referencing format.",
    "Judgment included hallucinated legal doctrine from non-existent journal.",
    "AI wrongly translated key exhibit documents from Spanish to English.",
    "Defense argued improper use of AI violated due process.",
    "AI confused two similar-sounding legal precedents, misleading the jury.",
    "Model fabricated expert testimony in summary briefs.",
    "Court ruling was appealed due to reliance on unverified AI translations.",
    "AI inserted citation from a fictional legal magazine.",
    "Prosecutor used AI-generated forensic model without validating its methodology.",
    "Assistant added AI-written footnotes that contradicted the main argument.",
    "Court reprimanded legal intern for submitting AI-written cross-examination questions.",
    "AI misclassified a civil matter as a criminal case in pretrial documentation.",
    "Generated legal motion contradicted local jurisdictional laws.",
    "Algorithm omitted prior settlements relevant to the ongoing dispute.",
    "Summary judgement influenced by biased AI-generated sentiment analysis.",
    "AI tool inaccurately matched legal citations to wrong cases.",
    "Expert witness relied on AI simulations without replicability.",
    "Hallucinated historical case law used in climate litigation.",
    "AI wrongly assigned liability in a complex multi-defendant lawsuit.",
    "Witness summary was auto-generated by a tool using unreliable NLP tagging.",
    "Evidence chain-of-custody was broken due to automated timestamping errors.",
    "AI misidentified statute expiration dates, impacting appeal timeline.",
    "AI-assigned case tags led to misfiling of urgent case materials.",
    "Court cited AI hallucination as grounds for retrial.",
    "Legal chatbot misrepresented tenant rights during landlord dispute.",
    "AI merged two unrelated cases in legal summary submission.",
    "Assistant unknowingly submitted AI-fabricated letters of authority."
]

ai_hal["Details"] = ai_hal["Details"].apply(
    lambda x: None if "<function" in str(x) else x
)

ai_hal["Details"] = ai_hal["Details"].apply(
    lambda x: x if pd.notnull(x) else random.choice(details_pool)
)

# Fix Datatypes

In [130]:
ai_hal["Date"] = pd.to_datetime(ai_hal["Date"])

In [134]:
ai_hal["Monetary Penalty"] = ai_hal["Monetary Penalty"].str.replace(r'[^\d\.-]', '', regex=True)

ai_hal["Monetary Penalty"] = pd.to_numeric(ai_hal["Monetary Penalty"], errors='coerce')

# Remove/Fix Unnecessary Columns

In [None]:
ai_hal = ai_hal.drop("Pointer", axis = 1)

ai_hal = ai_hal.drop("Source", axis = 1)

In [6]:
ai_hal = ai_hal.rename(columns = {"Monetary Penalty" : "Monetary Penalty (USD$)"})

# Export The File

In [24]:
ai_hal.to_csv("ai_hallucination_dataset.csv", index=False)