In [1]:
import numpy as np
import random
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

# Open the file in read mode
with open('../corpus.txt', 'r') as file:
    # Read the lines from the file and store them in a list
    corpus = file.readlines()

# Optionally, you can remove newline characters from each line
corpus = [line.strip() for line in corpus]



query = "Somebody breached our contract and caused financial loss. What legal actions can we take?"

In [2]:
# Sample Legal document corpus
legal_documents = corpus

# User's issue as input query
user_issue = query

# Fine-tuned TF-IDF Vectorizer for document-query similarity
vectorizer = TfidfVectorizer(ngram_range=(1, 2), max_df=0.85, stop_words='english')
combined_corpus = legal_documents + [user_issue]
combined_tfidf_matrix = vectorizer.fit_transform(combined_corpus)

document_tfidf_matrix = combined_tfidf_matrix[:-1]
query_tfidf_matrix = combined_tfidf_matrix[-1]

similarity_scores = cosine_similarity(document_tfidf_matrix, query_tfidf_matrix).flatten()

# PSO Parameters
num_particles = 10
num_iterations = 20
top_k = 3

# Initialize particles and velocities
particles = [random.sample(range(len(legal_documents)), top_k) for _ in range(num_particles)]
velocities = [random.sample(range(len(legal_documents)), top_k) for _ in range(num_particles)]

# Initialize best positions and scores
particle_best_positions = particles.copy()
particle_best_scores = [sum(similarity_scores[particle]) for particle in particles]

# Initialize global best
global_best_position = particle_best_positions[np.argmax(particle_best_scores)]
global_best_score = max(particle_best_scores)

# PSO Parameters
w = 0.5  # Inertia weight
c1 = 1.5  # Cognitive component
c2 = 2.0  # Social component

def update_velocity(velocity, particle, best_position, global_best_position):
    new_velocity = velocity.copy()
    for i in range(len(velocity)):
        if random.random() < 0.5:
            new_velocity[i] = best_position[i]
        else:
            new_velocity[i] = global_best_position[i]
    return new_velocity

def update_position(particle, velocity):
    new_particle = particle.copy()
    for i in range(len(velocity)):
        if velocity[i] not in new_particle:
            new_particle[i] = velocity[i]
    return new_particle

# PSO Algorithm
for iteration in range(num_iterations):
    for i in range(num_particles):
        # Update velocity and position
        velocities[i] = update_velocity(velocities[i], particles[i], particle_best_positions[i], global_best_position)
        particles[i] = update_position(particles[i], velocities[i])

        # Calculate fitness score
        current_score = sum(similarity_scores[particles[i]])

        # Update personal and global best
        if current_score > particle_best_scores[i]:
            particle_best_positions[i] = particles[i]
            particle_best_scores[i] = current_score

        if current_score > global_best_score:
            global_best_position = particles[i]
            global_best_score = current_score

# Output the most relevant legal documents for the user's issue
print(f"\nTop {top_k} most relevant legal documents for your issue:")
for idx in global_best_position:
    print(f"Document {idx}: {legal_documents[idx]}")

# Performance Metrics Calculation

# Define a relevance threshold to consider a document relevant
relevance_threshold = 0.15929

# Rank documents by similarity score
ranked_indices = np.argsort(similarity_scores)[::-1]  # Indices sorted in descending order of similarity scores
ranked_scores = similarity_scores[ranked_indices]     # Corresponding similarity scores in descending order

# Determine relevance based on the similarity threshold
relevance_labels = [1 if score >= relevance_threshold else 0 for score in ranked_scores]

# Calculate Mean Reciprocal Rank (MRR)
def mean_reciprocal_rank(relevance_labels):
    for rank, label in enumerate(relevance_labels, start=1):
        if label == 1:  # First relevant document
            return 1 / rank
    return 0  # No relevant document found

mrr = mean_reciprocal_rank(relevance_labels)
print(f"\nMean Reciprocal Rank (MRR): {mrr:.2f}")

# Calculate Mean Average Precision (MAP)
def mean_average_precision(relevance_labels):
    relevant_docs = 0
    cumulative_precision = 0
    for rank, label in enumerate(relevance_labels, start=1):
        if label == 1:
            relevant_docs += 1
            cumulative_precision += relevant_docs / rank
    return cumulative_precision / relevant_docs if relevant_docs > 0 else 0

map_score = mean_average_precision(relevance_labels)
print(f"Mean Average Precision (MAP): {map_score:.2f}")



Top 3 most relevant legal documents for your issue:
Document 45: Section 2302, Health Law: Regulates healthcare financing, including insurance and public programs. Subsection 2302.1 defines the Affordable Care Act and its implementation. Subsection 2302.2 outlines eligibility for public health programs like Medicaid.
Document 0: Section 101, Contract Law: Governs contract formation requirements, including offer, acceptance, and consideration. Subsection 101.1 details enforceable contract types. Subsection 101.2 addresses remedies for breach, specifying compensatory, punitive, and nominal damages.
Document 31: Section 1602, International Law: Deals with international dispute resolution mechanisms. Subsection 1602.1 discusses arbitration and mediation. Subsection 1602.2 provides guidelines for state-to-state legal proceedings.

Mean Reciprocal Rank (MRR): 0.00
Mean Average Precision (MAP): 0.00
