In [2]:
import numpy as np
import random
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

# Open the file in read mode
with open('../corpus.txt', 'r') as file:
    # Read the lines from the file and store them in a list
    corpus = file.readlines()

# Optionally, you can remove newline characters from each line
corpus = [line.strip() for line in corpus]



query = "Somebody breached our contract and caused financial loss. What legal actions can we take?"

In [23]:
# Sample Legal document corpus for demonstration
legal_documents = corpus  # Assume 'corpus' is defined elsewhere

# User's issue as input query
user_issue = query

# TF-IDF Vectorizer for document-query similarity
vectorizer = TfidfVectorizer(ngram_range=(1, 2), max_df=0.8, max_features=500, stop_words='english')
combined_corpus = legal_documents + [user_issue]
combined_tfidf_matrix = vectorizer.fit_transform(combined_corpus)

document_tfidf_matrix = combined_tfidf_matrix[:-1]
query_tfidf_matrix = combined_tfidf_matrix[-1]

similarity_scores = cosine_similarity(document_tfidf_matrix, query_tfidf_matrix).flatten()

# Cuckoo Search Parameters
num_nests = 10
num_iterations = 20
top_k = 3
pa = 0.25  # Probability of abandoning nests

# Initialize nests based on top-ranked documents
initial_candidates = np.argsort(similarity_scores)[-top_k:]
nests = [list(np.random.choice(initial_candidates, size=top_k, replace=False)) for _ in range(num_nests)]

# Function to calculate fitness (similarity score sum)
def fitness(nest):
    return sum(similarity_scores[nest])

# Levy flight function to generate new solution
def levy_flight(nest):
    new_nest = nest.copy()
    new_nest[random.randint(0, top_k - 1)] = random.choice(range(len(legal_documents)))
    return new_nest

# Cuckoo Search Optimization Process
for iteration in range(num_iterations):
    for i in range(num_nests):
        # Generate a new solution via Levy flight
        new_nest = levy_flight(nests[i])

        # If new solution is better, replace the current nest
        if fitness(new_nest) > fitness(nests[i]):
            nests[i] = new_nest

    # Abandon some nests with probability `pa` and replace them with new random solutions
    num_abandoned = int(pa * num_nests)
    for _ in range(num_abandoned):
        nest_index = random.randint(0, num_nests - 1)
        nests[nest_index] = list(np.random.choice(range(len(legal_documents)), size=top_k, replace=False))

# Output the best-performing nest
best_nest = max(nests, key=fitness)
best_documents = [legal_documents[i] for i in best_nest]
for i in best_documents:
    print(i, end='\n\n')

# Performance Metrics Calculation

# Define a relevance threshold to consider a document relevant
relevance_threshold = 0.159  # Adjust based on similarity score distribution

# Rank documents by similarity score
ranked_indices = np.argsort(similarity_scores)[::-1]  # Indices sorted in descending order of similarity
ranked_scores = similarity_scores[ranked_indices]     # Corresponding similarity scores in descending order

# Determine relevance based on the adjusted threshold
relevance_labels = [1 if score >= relevance_threshold else 0 for score in ranked_scores]

# Calculate Mean Reciprocal Rank (MRR)
def mean_reciprocal_rank(relevance_labels):
    for rank, label in enumerate(relevance_labels, start=1):
        if label == 1:  # First relevant document
            return 1 / rank
    return 0  # No relevant document found

# Calculate Mean Average Precision (MAP)
def mean_average_precision(relevance_labels):
    relevant_docs = 0
    cumulative_precision = 0
    for rank, label in enumerate(relevance_labels, start=1):
        if label == 1:
            relevant_docs += 1
            cumulative_precision += relevant_docs / rank
    return cumulative_precision / relevant_docs if relevant_docs > 0 else 0

# Calculate MRR and MAP
mrr = mean_reciprocal_rank(relevance_labels)
map_score = mean_average_precision(relevance_labels)

print(f"\nMean Reciprocal Rank (MRR): {mrr:.2f}")
print(f"Mean Average Precision (MAP): {map_score:.2f}")


Section 102, Contract Law: This section covers implied contracts and quasi-contracts, focusing on obligations created without explicit agreements. Subsection 102.1 explains situations of unjust enrichment where courts may enforce restitution.

Section 2601, Contract Law: Defines contracts related to sales and services. Subsection 2601.1 outlines general contract formation and validity. Subsection 2601.2 specifies terms for service contracts and the rights of the parties involved.

Section 2201, Criminal Law: Defines offenses related to fraud and financial crimes. Subsection 2201.1 covers embezzlement, insider trading, and fraud. Subsection 2201.2 outlines the penalties for financial crimes and white-collar offenses.


Mean Reciprocal Rank (MRR): 1.00
Mean Average Precision (MAP): 1.00
