In [None]:
import numpy as np
import random
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

# Open the file in read mode
with open('../corpus.txt', 'r') as file:
    # Read the lines from the file and store them in a list
    corpus = file.readlines()

# Optionally, you can remove newline characters from each line
corpus = [line.strip() for line in corpus]



query = "Somebody breached our contract and caused financial loss. What legal actions can we take?"

In [2]:
# Sample Legal document corpus
legal_documents = corpus

# User's issue as input query
user_issue = query

# TF-IDF Vectorizer for document-query similarity
vectorizer = TfidfVectorizer(ngram_range=(1, 2), max_df=0.8, max_features=500, stop_words='english')
combined_corpus = legal_documents + [user_issue]
combined_tfidf_matrix = vectorizer.fit_transform(combined_corpus)

document_tfidf_matrix = combined_tfidf_matrix[:-1]
query_tfidf_matrix = combined_tfidf_matrix[-1]

similarity_scores = cosine_similarity(document_tfidf_matrix, query_tfidf_matrix).flatten()

# Firefly Algorithm Parameters
num_fireflies = 5
num_iterations = 20
top_k = 3

# Initialize fireflies based on top-ranked documents
initial_candidates = np.argsort(similarity_scores)[-top_k:]
fireflies = [list(initial_candidates) for _ in range(num_fireflies)]

# Initialize light intensities based on similarity scores
light_intensities = [sum(similarity_scores[firefly]) for firefly in fireflies]

# Firefly parameters
gamma = 0.8
alpha = 0.1

# Precomputed distances between documents
distance_matrix = cosine_similarity(document_tfidf_matrix)

# Function to update positions
def update_position(firefly, brighter_firefly):
    new_firefly = list(brighter_firefly)
    while len(new_firefly) < len(firefly):
        candidate = np.argmax(similarity_scores)
        if candidate not in new_firefly:
            new_firefly.append(candidate)
    return new_firefly

# Firefly Algorithm Optimization
for iteration in range(num_iterations):
    for i in range(num_fireflies):
        for j in range(num_fireflies):
            if light_intensities[j] > light_intensities[i]:  # Move towards brighter firefly
                distance = 1 - np.mean(distance_matrix[fireflies[i], fireflies[j]])
                if distance > 0:
                    fireflies[i] = update_position(fireflies[i], fireflies[j])
                    new_score = sum(similarity_scores[fireflies[i]])
                    if new_score > light_intensities[i]:
                        light_intensities[i] = new_score

# Output the most relevant legal documents for the user's issue
best_firefly_index = np.argmax(light_intensities)
best_documents = fireflies[best_firefly_index]

print(f"\nTop {top_k} most relevant legal documents for your issue:")
for idx in best_documents:
    print(f"Document {idx}: {legal_documents[idx]}")

# Performance Metrics Calculation

# Define a relevance threshold to consider a document relevant
relevance_threshold = 0.15929

# Rank documents by similarity score
ranked_indices = np.argsort(similarity_scores)[::-1]  # Indices sorted in descending order of similarity scores
ranked_scores = similarity_scores[ranked_indices]     # Corresponding similarity scores in descending order

# Determine relevance based on the similarity threshold
relevance_labels = [1 if score >= relevance_threshold else 0 for score in ranked_scores]

# Calculate Mean Reciprocal Rank (MRR)
def mean_reciprocal_rank(relevance_labels):
    for rank, label in enumerate(relevance_labels, start=1):
        if label == 1:  # First relevant document
            return 1 / rank
    return 0  # No relevant document found

mrr = mean_reciprocal_rank(relevance_labels)
print(f"\nMean Reciprocal Rank (MRR): {mrr:.2f}")

# Calculate Mean Average Precision (MAP)
def mean_average_precision(relevance_labels):
    relevant_docs = 0
    cumulative_precision = 0
    for rank, label in enumerate(relevance_labels, start=1):
        if label == 1:
            relevant_docs += 1
            cumulative_precision += relevant_docs / rank
    return cumulative_precision / relevant_docs if relevant_docs > 0 else 0

map_score = mean_average_precision(relevance_labels)
print(f"Mean Average Precision (MAP): {map_score:.2f}")



Top 3 most relevant legal documents for your issue:
Document 50: Section 2601, Contract Law: Defines contracts related to sales and services. Subsection 2601.1 outlines general contract formation and validity. Subsection 2601.2 specifies terms for service contracts and the rights of the parties involved.
Document 51: Section 2602, Contract Law: Regulates breach of contract, including damages and remedies. Subsection 2602.1 defines compensatory damages, punitive damages, and nominal damages. Subsection 2602.2 outlines remedies for breach of contract in commercial transactions.
Document 0: Section 101, Contract Law: Governs contract formation requirements, including offer, acceptance, and consideration. Subsection 101.1 details enforceable contract types. Subsection 101.2 addresses remedies for breach, specifying compensatory, punitive, and nominal damages.

Mean Reciprocal Rank (MRR): 0.00
Mean Average Precision (MAP): 0.00
