In [1]:
import numpy as np
import random
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

corpus = [
    "Section 101, Contract Law: Governs contract formation requirements, including offer, acceptance, and consideration. Subsection 101.1 details enforceable contract types. Subsection 101.2 addresses remedies for breach, specifying compensatory, punitive, and nominal damages.",
    "Section 102, Contract Law: This section covers implied contracts and quasi-contracts, focusing on obligations created without explicit agreements. Subsection 102.1 explains situations of unjust enrichment where courts may enforce restitution.",
    "Section 201, Tort Law: Provides framework for civil wrongs like negligence, defamation, and nuisance. Subsection 201.1 outlines the 'duty of care' in negligence cases. Subsection 201.2 addresses defenses, including consent and contributory negligence.",
    "Section 202, Tort Law: Discusses personal injury claims, property damage, and emotional distress. Subsection 202.1 elaborates on defamation and the standards of proof for libel versus slander. Subsection 202.2 specifies trespass laws and liabilities.",
    "Section 301, Criminal Law: Classifies crimes into felonies, misdemeanors, and infractions. Subsection 301.1 defines the distinction based on severity and potential punishment. Subsection 301.2 establishes sentencing guidelines, including parole and probation.",
    "Section 302, Criminal Law: Governs rights of the accused, including the right to counsel and fair trial procedures. Subsection 302.1 emphasizes protection from double jeopardy. Subsection 302.2 outlines the Miranda rights and admissibility of evidence.",
    "Section 401, Constitutional Law: Establishes the government's structure, fundamental rights, and freedoms. Subsection 401.1 includes protections for freedom of speech and press. Subsection 401.2 emphasizes equal protection and non-discrimination.",
    "Section 402, Constitutional Law: Covers the separation of powers among executive, legislative, and judicial branches. Subsection 402.1 enforces checks and balances to prevent abuse of power. Subsection 402.2 details processes for amending the constitution.",
    "Section 501, Environmental Law: Regulates emissions and pollution control. Subsection 501.1 mandates limits for air and water pollutants. Subsection 501.2 outlines penalties for non-compliance, with a focus on environmental conservation.",
    "Section 502, Environmental Law: Addresses waste management and hazardous substances. Subsection 502.1 mandates the safe disposal of hazardous materials. Subsection 502.2 provides guidelines for renewable energy incentives and sustainability practices.",
    "Section 601, Employment Law: Protects workers' rights, covering minimum wage, overtime, and workplace safety. Subsection 601.1 sets standards for fair labor practices. Subsection 601.2 outlines legal recourse for workplace harassment and discrimination.",
    "Section 602, Employment Law: Discusses employee benefits, including health insurance, retirement, and paid leave. Subsection 602.1 mandates employers' responsibilities under the Family and Medical Leave Act. Subsection 602.2 provides whistleblower protections.",
    "Section 701, Family Law: Manages domestic relationships, including marriage, divorce, and child custody. Subsection 701.1 outlines grounds for divorce and annulment. Subsection 701.2 details custody arrangements and visitation rights.",
    "Section 702, Family Law: Addresses adoption, foster care, and guardianship. Subsection 702.1 explains adoption procedures and parental rights. Subsection 702.2 governs child support obligations and enforcement.",
    "Section 801, Tax Law: Regulates individual and corporate income taxes, including deductions and credits. Subsection 801.1 outlines reporting requirements. Subsection 801.2 addresses penalties for tax evasion and underpayment.",
    "Section 802, Tax Law: Governs property taxes, estate taxes, and gift taxes. Subsection 802.1 sets rules for valuation and assessment. Subsection 802.2 includes exemptions and deductions for primary residences and charitable donations.",
    "Section 901, Real Estate Law: Governs property ownership, land use, and zoning laws. Subsection 901.1 specifies requirements for deeds and title transfers. Subsection 901.2 covers landlord-tenant relations and lease agreements.",
    "Section 902, Real Estate Law: Discusses mortgage financing, foreclosure, and real estate transactions. Subsection 902.1 mandates disclosure of property defects. Subsection 902.2 outlines foreclosure procedures and homeowner rights.",
    "Section 1001, Intellectual Property Law: Covers patents, trademarks, copyrights, and trade secrets. Subsection 1001.1 details protection for inventions and industrial designs. Subsection 1001.2 addresses trademark infringement and fair use.",
    "Section 1002, Intellectual Property Law: Governs licensing, royalties, and ownership transfers. Subsection 1002.1 explains exclusive and non-exclusive licenses. Subsection 1002.2 outlines remedies for copyright and patent violations.",
    "Section 1101, Immigration Law: Establishes rules for visas, asylum, and naturalization. Subsection 1101.1 details eligibility for work visas and residency. Subsection 1101.2 provides grounds for deportation and appeal processes.",
    "Section 1102, Immigration Law: Covers refugee and asylum procedures. Subsection 1102.1 outlines protection for individuals fleeing persecution. Subsection 1102.2 specifies rights and responsibilities for temporary residents.",
    "Section 1201, Health Law: Regulates patient rights, medical consent, and privacy. Subsection 1201.1 mandates informed consent before medical procedures. Subsection 1201.2 establishes HIPAA standards for patient confidentiality.",
    "Section 1202, Health Law: Discusses public health policies, vaccination, and disease control. Subsection 1202.1 mandates vaccination requirements for schools. Subsection 1202.2 provides guidelines for handling public health emergencies.",
    "Section 1301, Education Law: Covers K-12 and higher education standards, funding, and access. Subsection 1301.1 mandates equal access to education. Subsection 1301.2 outlines students' rights, including protections against discrimination.",
    "Section 1302, Education Law: Discusses special education and accommodations for disabilities. Subsection 1302.1 mandates Individualized Education Plans (IEPs) for eligible students. Subsection 1302.2 sets standards for teacher qualifications.",
    "Section 1401, Consumer Protection Law: Regulates advertising, product safety, and fair practices. Subsection 1401.1 mandates truth in advertising. Subsection 1401.2 provides guidelines for product recalls and consumer rights.",
    "Section 1402, Consumer Protection Law: Discusses debt collection practices, financial disclosures, and lending standards. Subsection 1402.1 mandates clear communication of loan terms. Subsection 1402.2 provides protections against predatory lending.",
    "Section 1501, Cyber Law: Regulates digital privacy, cybersecurity, and online crimes. Subsection 1501.1 defines unauthorized access to computer systems. Subsection 1501.2 covers data protection and breach notification laws.",
    "Section 1502, Cyber Law: Addresses intellectual property issues in the digital age. Subsection 1502.1 covers digital copyright infringement. Subsection 1502.2 specifies the legal protections for digital content creators.",
    "Section 1601, International Law: Governs relations between nations and international treaties. Subsection 1601.1 discusses sovereignty and diplomatic immunity. Subsection 1601.2 covers the rights and duties of states under international law.",
    "Section 1602, International Law: Deals with international dispute resolution mechanisms. Subsection 1602.1 discusses arbitration and mediation. Subsection 1602.2 provides guidelines for state-to-state legal proceedings.",
    "Section 1701, Environmental Law: Establishes regulations for managing natural resources and waste. Subsection 1701.1 mandates pollution control standards. Subsection 1701.2 outlines regulations for carbon emissions reductions.",
    "Section 1702, Environmental Law: Governs biodiversity conservation and wildlife protection. Subsection 1702.1 provides guidelines for protected areas. Subsection 1702.2 addresses the illegal wildlife trade and conservation efforts.",
    "Section 1801, Family Law: Addresses child welfare, protection, and adoption. Subsection 1801.1 sets standards for foster care placements. Subsection 1801.2 establishes adoption procedures and the rights of biological and adoptive parents.",
    "Section 1802, Family Law: Regulates marital property, spousal support, and divorce. Subsection 1802.1 discusses the division of assets. Subsection 1802.2 outlines spousal maintenance and child custody provisions.",
    "Section 1901, Immigration Law: Regulates the entry and residency of foreign nationals. Subsection 1901.1 specifies visa categories and requirements. Subsection 1901.2 addresses deportation procedures and appeals.",
    "Section 1902, Immigration Law: Covers refugee and asylum policies. Subsection 1902.1 provides protections for asylum seekers. Subsection 1902.2 details the process for refugee status determination and resettlement.",
    "Section 2001, Tax Law: Governs corporate tax rates, filings, and deductions. Subsection 2001.1 specifies the taxation structure for corporations. Subsection 2001.2 addresses tax credits and deductions available to businesses.",
    "Section 2002, Tax Law: Regulates the taxation of estate and inheritance. Subsection 2002.1 sets tax rates on estates and gifts. Subsection 2002.2 covers inheritance law and estate distribution procedures.",
    "Section 2101, Real Estate Law: Defines land ownership, leases, and zoning regulations. Subsection 2101.1 outlines property ownership rights and responsibilities. Subsection 2101.2 governs commercial leases and landlord-tenant relations.",
    "Section 2102, Real Estate Law: Addresses mortgage regulations and foreclosure procedures. Subsection 2102.1 defines the mortgage process, including terms and agreements. Subsection 2102.2 specifies foreclosure rights and procedures for homeowners.",
    "Section 2201, Criminal Law: Defines offenses related to fraud and financial crimes. Subsection 2201.1 covers embezzlement, insider trading, and fraud. Subsection 2201.2 outlines the penalties for financial crimes and white-collar offenses.",
    "Section 2202, Criminal Law: Regulates drug-related offenses and controlled substances. Subsection 2202.1 defines the possession, distribution, and manufacturing of illegal drugs. Subsection 2202.2 establishes sentencing guidelines for drug-related crimes.",
    "Section 2301, Health Law: Defines patient rights and healthcare provider responsibilities. Subsection 2301.1 establishes the right to informed consent. Subsection 2301.2 addresses the confidentiality of patient records and the role of HIPAA.",
    "Section 2302, Health Law: Regulates healthcare financing, including insurance and public programs. Subsection 2302.1 defines the Affordable Care Act and its implementation. Subsection 2302.2 outlines eligibility for public health programs like Medicaid.",
    "Section 2401, Consumer Protection Law: Regulates consumer rights and deceptive practices. Subsection 2401.1 mandates truthful advertising and labeling. Subsection 2401.2 defines fair lending practices and prohibits predatory lending.",
    "Section 2402, Consumer Protection Law: Deals with product liability and safety. Subsection 2402.1 outlines the standards for safe consumer products. Subsection 2402.2 addresses warranties, refunds, and consumer protection in case of defects.",
    "Section 2501, Education Law: Sets standards for public education, including curriculum and teacher qualifications. Subsection 2501.1 specifies minimum standards for K-12 education. Subsection 2501.2 covers teacher certifications and professional development.",
    "Section 2502, Education Law: Defines higher education standards and accreditation. Subsection 2502.1 covers student financial aid programs. Subsection 2502.2 establishes accreditation criteria for colleges and universities.",
    "Section 2601, Contract Law: Defines contracts related to sales and services. Subsection 2601.1 outlines general contract formation and validity. Subsection 2601.2 specifies terms for service contracts and the rights of the parties involved.",
    "Section 2602, Contract Law: Regulates breach of contract, including damages and remedies. Subsection 2602.1 defines compensatory damages, punitive damages, and nominal damages. Subsection 2602.2 outlines remedies for breach of contract in commercial transactions.",
    "Section 2701, Maritime Law: Regulates the shipping industry, including vessel registration and operations. Subsection 2701.1 defines the requirements for vessel licensing and registration. Subsection 2701.2 covers maritime insurance and liability for accidents at sea.",
    "Section 2702, Maritime Law: Deals with maritime offenses, including piracy and illegal fishing. Subsection 2702.1 defines piracy and maritime terrorism. Subsection 2702.2 specifies penalties for illegal fishing and resource exploitation in international waters."
]
query = "Somebody breached our contract and caused financial loss. What legal actions can we take?"

In [2]:
# Step 1: Legal document corpus
legal_documents = corpus

# Step 2: User's issue as input query
user_issue = query

# Step 3: Fine-tuned TF-IDF Vectorizer for document-query similarity
# Creating a combined vectorizer for documents and query
vectorizer = TfidfVectorizer(ngram_range=(1, 2), max_df=0.85, stop_words='english')
combined_corpus = legal_documents + [user_issue]
combined_tfidf_matrix = vectorizer.fit_transform(combined_corpus)

# Separate TF-IDF matrices for documents and query
document_tfidf_matrix = combined_tfidf_matrix[:-1]  # All but the last element (documents)
query_tfidf_matrix = combined_tfidf_matrix[-1]  # The last element (query)

# Calculate cosine similarity between each document and the query
similarity_scores = cosine_similarity(document_tfidf_matrix, query_tfidf_matrix).flatten()

# Step 4: Create a distance matrix for ACO (1 - similarity scores)
distance_matrix = 1 - similarity_scores
num_documents = len(legal_documents)

# ACO Parameters
num_ants = 5
num_iterations = 20
alpha = 1  # Pheromone importance
beta = 3  # Distance importance increased for more sensitivity to similarity
evaporation_rate = 0.2
pheromone_deposit = 50
top_k = 3  # Number of relevant documents to retrieve

# Initialize pheromone levels
pheromone_matrix = np.ones(num_documents) / num_documents

# Function to choose the next document based on probabilities
def choose_next_document(pheromones, distances, alpha, beta):
    pheromone_factor = pheromones ** alpha
    distance_factor = (1 / distances) ** beta
    probabilities = pheromone_factor * distance_factor
    probabilities /= probabilities.sum()
    return np.random.choice(range(len(legal_documents)), p=probabilities)

# Function to update pheromone levels
def update_pheromones(pheromones, documents_visited, evaporation_rate, pheromone_deposit):
    pheromones *= (1 - evaporation_rate)  # Evaporate pheromones
    for document in documents_visited:
        pheromones[document] += pheromone_deposit  # Add pheromone to visited documents

# ACO Algorithm for document retrieval
def ant_colony_optimization(num_iterations, num_ants, distance_matrix, pheromone_matrix, alpha, beta, evaporation_rate, pheromone_deposit, top_k):
    best_documents = set()
    best_similarities = []
    for iteration in range(num_iterations):
        ant_paths = []
        ant_similarities = []
        for ant in range(num_ants):
            current_path = []
            current_similarity = 0
            for _ in range(top_k):  # Select top_k documents per ant
                current_document = choose_next_document(pheromone_matrix, distance_matrix, alpha, beta)
                current_similarity += similarity_scores[current_document]
                current_path.append(current_document)
            # Save the chosen documents and their total similarity
            ant_paths.append(current_path)
            ant_similarities.append(current_similarity)
        # Update pheromones for all documents visited by all ants
        for path in ant_paths:
            update_pheromones(pheromone_matrix, path, evaporation_rate, pheromone_deposit)
        # Keep track of the best paths (relevant documents)
        for i in range(len(ant_paths)):
            if len(best_similarities) < top_k or ant_similarities[i] > min(best_similarities):
                best_documents.update(ant_paths[i])
                best_similarities.append(ant_similarities[i])
    # Sort and return the top K most relevant documents
    sorted_best_documents = sorted(best_documents, key=lambda x: similarity_scores[x], reverse=True)
    return sorted_best_documents[:top_k]

# Run the ACO algorithm to find the top K relevant documents
best_documents_indices = ant_colony_optimization(num_iterations, num_ants, distance_matrix, pheromone_matrix, alpha, beta, evaporation_rate, pheromone_deposit, top_k)

# Output the most relevant legal documents for the user's issue
print(f"\nTop {top_k} most relevant legal documents for your issue:")
for idx in best_documents_indices:
    print(f"Document {idx}: {legal_documents[idx]}")

# Performance Metrics Calculation

# Define a similarity threshold to consider a document relevant
relevance_threshold = 0.15929

# Rank documents by similarity score
ranked_indices = np.argsort(similarity_scores)[::-1]  # Indices sorted in descending order of similarity scores
ranked_scores = similarity_scores[ranked_indices]     # Corresponding similarity scores in descending order

# Determine relevance based on the similarity threshold
relevance_labels = [1 if score >= relevance_threshold else 0 for score in ranked_scores]

# Calculate Mean Reciprocal Rank (MRR)
def mean_reciprocal_rank(relevance_labels):
    for rank, label in enumerate(relevance_labels, start=1):
        if label == 1:  # First relevant document
            return 1 / rank
    return 0  # No relevant document found

mrr = mean_reciprocal_rank(relevance_labels)
print(f"\nMean Reciprocal Rank (MRR): {mrr:.2f}")

# Calculate Mean Average Precision (MAP)
def mean_average_precision(relevance_labels):
    relevant_docs = 0
    cumulative_precision = 0
    for rank, label in enumerate(relevance_labels, start=1):
        if label == 1:
            relevant_docs += 1
            cumulative_precision += relevant_docs / rank
    return cumulative_precision / relevant_docs if relevant_docs > 0 else 0

map_score = mean_average_precision(relevance_labels)
print(f"Mean Average Precision (MAP): {map_score:.2f}")



Top 3 most relevant legal documents for your issue:
Document 51: Section 2602, Contract Law: Regulates breach of contract, including damages and remedies. Subsection 2602.1 defines compensatory damages, punitive damages, and nominal damages. Subsection 2602.2 outlines remedies for breach of contract in commercial transactions.
Document 50: Section 2601, Contract Law: Defines contracts related to sales and services. Subsection 2601.1 outlines general contract formation and validity. Subsection 2601.2 specifies terms for service contracts and the rights of the parties involved.
Document 27: Section 1402, Consumer Protection Law: Discusses debt collection practices, financial disclosures, and lending standards. Subsection 1402.1 mandates clear communication of loan terms. Subsection 1402.2 provides protections against predatory lending.

Mean Reciprocal Rank (MRR): 0.00
Mean Average Precision (MAP): 0.00
