In [3]:
import os
import spacy
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

def load_legal_data(directory):
    """
    Load legal text data from a directory.

    Args:
    - directory (str): Path to the directory containing legal text files.

    Returns:
    - texts (list): List of text content loaded from files in the directory.
    """
    texts = []
    for file_name in os.listdir(directory):
        file_path = os.path.join(directory, file_name)
        with open(file_path, 'r') as file:
            content = file.read()
            texts.append(content)
    return texts

def preprocess_text(texts, nlp):
    """
    Preprocess and vectorize text using SpaCy and TF-IDF.

    Args:
    - texts (list): List of text documents.
    - nlp (spacy.Language): SpaCy language model.

    Returns:
    - tfidf_matrix (scipy.sparse.csr_matrix): TF-IDF matrix of vectorized text.
    - vectorizer (sklearn.feature_extraction.text.TfidfVectorizer): TF-IDF vectorizer.
    """
    processed_texts = [" ".join([token.lemma_ for token in nlp(text) if not token.is_stop]) for text in texts]
    vectorizer = TfidfVectorizer()
    tfidf_matrix = vectorizer.fit_transform(processed_texts)
    return tfidf_matrix, vectorizer

def get_most_relevant_text(query_vector, tfidf_matrix, texts):
    """
    Retrieve the most relevant text based on cosine similarity.

    Args:
    - query_vector (scipy.sparse.csr_matrix): Vectorized query.
    - tfidf_matrix (scipy.sparse.csr_matrix): TF-IDF matrix of vectorized text.
    - texts (list): List of text documents.

    Returns:
    - most_relevant_text (str): Most relevant text document.
    """
    similarities = cosine_similarity(query_vector, tfidf_matrix).flatten()
    top_idx = similarities.argmax()
    most_relevant_text = texts[top_idx]
    return most_relevant_text

def main():
    # Load legal data - Cases
    cases_directory = '/kaggle/input/legalai/Object_casedocs/'
    cases_texts = load_legal_data(cases_directory)

    # Load legal data - Statutes
    statutes_directory = '/kaggle/input/legalai/Object_statutes/'
    statutes_texts = load_legal_data(statutes_directory)

    # Load SpaCy language model
    nlp = spacy.load("en_core_web_sm")

    # Preprocess and vectorize text for cases
    tfidf_matrix_cases, vectorizer_cases = preprocess_text(cases_texts, nlp)

    # Preprocess and vectorize text for statutes
    tfidf_matrix_statutes, vectorizer_statutes = preprocess_text(statutes_texts, nlp)

    # User interaction loop
    while True:
        user_query = input("Ask a legal-related question (type 'exit' to quit): ")

        if user_query.lower() == 'exit':
            print("Exiting the program. Goodbye!")
            break

        # Vectorize user query
        query_vector_cases = vectorizer_cases.transform([user_query])
        query_vector_statutes = vectorizer_statutes.transform([user_query])

        # Retrieve the most relevant case and statute
        relevant_case = get_most_relevant_text(query_vector_cases, tfidf_matrix_cases, cases_texts)
        relevant_statute = get_most_relevant_text(query_vector_statutes, tfidf_matrix_statutes, statutes_texts)

        # Extract statutes from the relevant case
        doc = nlp(relevant_case)
        statutes = [ent.text for ent in doc.ents if ent.label_ == "LAW"]

        # Summarize the relevant case
        case_summary = "\n".join([sent.text for sent in doc.sents])

        # Generate Legal Document
        legal_document = f"Legal Document - User Query: {user_query}\n\n"
        legal_document += f"Case Summary:\n{case_summary}\n\n"
        legal_document += "Relevant Statute:\n"
        legal_document += f"{relevant_statute}\n"
        legal_document += "\nGuidance for the User:\n"
        legal_document += "To defend your friend in court, focus on presenting evidence that supports their actions were in self-defense.\n"
        legal_document += "Emphasize any mitigating circumstances and demonstrate their lack of intent to harm.\n"
        legal_document += "Consult with a qualified legal professional to build a strong defense strategy."

        # Save or display the document to the user
        with open("legal_document.txt", "w") as output_file:
            output_file.write(legal_document)

        print("\nLegal document created. You can find the document in 'legal_document.txt'.")

if __name__ == "__main__":
    main()


Ask a legal-related question (type 'exit' to quit):  my friend did it accident how to help him



Legal document created. You can find the document in 'legal_document.txt'.


Ask a legal-related question (type 'exit' to quit):  exit


Exiting the program. Goodbye!
