In [1]:
import numpy as np
import random
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

# Open the file in read mode
with open('../corpus.txt', 'r') as file:
    # Read the lines from the file and store them in a list
    corpus = file.readlines()

# Optionally, you can remove newline characters from each line
corpus = [line.strip() for line in corpus]



query = "Somebody breached our contract and caused financial loss. What legal actions can we take?"

In [2]:
# Sample Legal document corpus for demonstration
legal_documents = corpus

# User's issue as input query
user_issue = query

# TF-IDF Vectorizer for document-query similarity
vectorizer = TfidfVectorizer(ngram_range=(1, 2), max_df=0.8, max_features=500, stop_words='english')
combined_corpus = legal_documents + [user_issue]
combined_tfidf_matrix = vectorizer.fit_transform(combined_corpus)

document_tfidf_matrix = combined_tfidf_matrix[:-1]
query_tfidf_matrix = combined_tfidf_matrix[-1]

similarity_scores = cosine_similarity(document_tfidf_matrix, query_tfidf_matrix).flatten()

# Bat Algorithm Parameters
num_bats = 5
num_iterations = 20
top_k = 3
frequency_min, frequency_max = 0, 2
alpha, gamma = 0.9, 0.9

# Initialize bats (documents) based on similarity scores
initial_candidates = np.argsort(similarity_scores)[-top_k:]
bats = [list(initial_candidates) for _ in range(num_bats)]

# Initialize velocities, frequencies, loudness, and pulse rates
velocities = [np.zeros(top_k) for _ in range(num_bats)]
frequencies = np.zeros(num_bats)
loudness = np.ones(num_bats)
pulse_rate = np.random.rand(num_bats)

# Function to calculate fitness (similarity score sum)
def fitness(bat):
    return sum(similarity_scores[bat])

# Bat Algorithm Optimization
for iteration in range(num_iterations):
    for i in range(num_bats):
        # Adjust frequency, update velocities, and position
        frequencies[i] = frequency_min + (frequency_max - frequency_min) * random.random()
        velocities[i] += (bats[i] - np.mean(bats, axis=0)) * frequencies[i]
        candidate = np.clip(np.array(bats[i]) + velocities[i], 0, len(legal_documents) - 1).astype(int)

        # Generate a new solution by local search
        if random.random() > pulse_rate[i]:
            candidate = list(np.random.choice(initial_candidates, size=top_k, replace=False))

        # Check if the new solution is better and update
        if fitness(candidate) > fitness(bats[i]) and random.random() < loudness[i]:
            bats[i] = candidate
            loudness[i] *= alpha
            pulse_rate[i] *= (1 - np.exp(-gamma * iteration))

# Output the best-performing bat
best_bat = max(bats, key=fitness)
best_documents = [legal_documents[i] for i in best_bat]
for i in best_documents:
    print(i, end='\n\n')

# Performance Metrics Calculation

# Define a relevance threshold to consider a document relevant
relevance_threshold = 0.15929  # Adjust based on similarity score distribution

# Rank documents by similarity score
ranked_indices = np.argsort(similarity_scores)[::-1]  # Indices sorted in descending order
ranked_scores = similarity_scores[ranked_indices]

# Determine relevance based on the adjusted threshold
relevance_labels = [1 if score >= relevance_threshold else 0 for score in ranked_scores]

# Calculate Top-k Precision (P@k)
def precision_at_k(relevance_labels, k=top_k):
    top_k_relevance = relevance_labels[:k]
    return sum(top_k_relevance) / k if k > 0 else 0

# Calculate Precision at k
precision_k = precision_at_k(relevance_labels, k=top_k)

# Calculate Mean Reciprocal Rank (MRR)
def mean_reciprocal_rank(relevance_labels):
    for rank, label in enumerate(relevance_labels, start=1):
        if label == 1:  # First relevant document
            return 1 / rank
    return 0

# Calculate Mean Average Precision (MAP)
def mean_average_precision(relevance_labels):
    relevant_docs = 0
    cumulative_precision = 0
    for rank, label in enumerate(relevance_labels, start=1):
        if label == 1:
            relevant_docs += 1
            cumulative_precision += relevant_docs / rank
    return cumulative_precision / relevant_docs if relevant_docs > 0 else 0

# Calculate MRR and MAP
mrr = mean_reciprocal_rank(relevance_labels)
map_score = mean_average_precision(relevance_labels)

print(f"\nMean Reciprocal Rank (MRR): {mrr:.2f}")
print(f"Mean Average Precision (MAP): {map_score:.2f}")


Section 2601, Contract Law: Defines contracts related to sales and services. Subsection 2601.1 outlines general contract formation and validity. Subsection 2601.2 specifies terms for service contracts and the rights of the parties involved.

Section 2602, Contract Law: Regulates breach of contract, including damages and remedies. Subsection 2602.1 defines compensatory damages, punitive damages, and nominal damages. Subsection 2602.2 outlines remedies for breach of contract in commercial transactions.

Section 101, Contract Law: Governs contract formation requirements, including offer, acceptance, and consideration. Subsection 101.1 details enforceable contract types. Subsection 101.2 addresses remedies for breach, specifying compensatory, punitive, and nominal damages.


Mean Reciprocal Rank (MRR): 0.00
Mean Average Precision (MAP): 0.00
