In [None]:
import pandas as pd
import re
import nltk
from nltk.tokenize import word_tokenize
import spacy
import os
from spacy.training.example import Example
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

In [None]:
nltk.download('punkt')


In [None]:
data = pd.read_csv('FIR_DATASET(updated).csv')

In [None]:
def preprocess_text(text):
    if isinstance(text, str):  #
        
        text = re.sub(r'[^a-zA-Z0-9\s]', '', text.lower())
        
        tokens = word_tokenize(text)
        return tokens
    else:
        return []

In [None]:
data['Description'] = data['Description'].apply(preprocess_text)


In [None]:
print(data.head())

In [None]:
X = data['Description'].tolist()  
y = data['section'].tolist()  

print("Input (X) sample:")
print(X[:5])
print("\nOutput (y) sample:")
print(y[:5])

In [None]:
nlp = spacy.load("en_core_web_sm") 
textcat = nlp.add_pipe("textcat_multilabel") 

In [None]:

for label in set(y):
    textcat.add_label(str(label)) 

In [None]:
train_data = list(zip(X, [{"cats": {label: (section == label) for label in set(y)}} for section in y]))


In [None]:
pipe_exceptions = ["textcat_multilabel", "tagger", "parser", "ner", "lemmatizer"]
unaffected_pipes = [pipe for pipe in nlp.pipe_names if pipe not in pipe_exceptions]


In [None]:
with nlp.disable_pipes(*unaffected_pipes):
    optimizer = nlp.begin_training()
    for epoch in range(10):  #
        losses = {}
        for texts, annotations in train_data:
            
            text = ' '.join(texts)
            example = Example.from_dict(nlp.make_doc(text), annotations)
            
            
            nlp.update([example], drop=0.5, losses=losses, sgd=optimizer, exclude=["tagger", "parser", "ner", "textcat"])
            
        print("Epoch:", epoch, "Loss:", losses)


In [None]:
nlp.to_disk("trained_model")

In [None]:
nlp = spacy.load("trained_model")

In [None]:
def suggest_sections(complaint_text, nlp_model, section_labels, data):
    processed_text = preprocess_text(complaint_text)
    processed_text = ' '.join(processed_text)

    
    similarities = []
    for _, row in data.iterrows():
        row_text = ' '.join(preprocess_text(row['Description']))
        similarity = nlp_model(processed_text).similarity(nlp_model(row_text))
        similarities.append(similarity)

   
    max_similarity_index = similarities.index(max(similarities))
    suggested_section = data.iloc[max_similarity_index]['section']

    return suggested_section

In [None]:
# example

complaint_description = "The suspect stole my wallet and assaulted me."
suggested_section = suggest_sections(complaint_description, nlp, textcat.labels, data)
print("Suggested Section:", suggested_section)

In [None]:
def generate_fir():
    # Input from the user
    complainant_name = input("Enter complainant's name: ")
    father_name = input("Enter father's/husband's name: ")
    address = input("Enter address: ")
    phone_number = input("Enter phone number and fax: ")
    email = input("Enter email: ")
    place_of_occurrence = input("Enter place of occurrence: ")
    date_of_occurrence = input("Enter date and hour of occurrence: ")

    # Fetch suggestions from the model based on the complaint description
    complaint_description = input("Enter the complaint description: ")
    suggested_section = suggest_sections(complaint_description, nlp, textcat.labels, data)

    # Retrieve additional information from the dataset based on the identified section
    section_info = data[data['section'] == suggested_section].iloc[0]
    bailable = section_info['Bailable']
    cognizable = section_info['Cognizable']
    court = section_info['Court']
    punishment = section_info['Punishment']
    offense_nature = section_info['Offense']  # Automatically fill the nature of the offense

    property_description = input("Enter particulars of the property: ")
    accused_description = input("Enter description of the accused: ")
    witness_details = input("Enter details of witnesses (if any): ")
    complaint = input("Enter complaint: ")

    # Create the FIR template
    fir_template = f"""Police Station: [Police Station]
District: [District]

1. Personal details of the Complainant / Informant:
(a) Name: {complainant_name}
(b) Father's / Husband's Name: {father_name}
(c) Address: {address}
(d) Phone number & Fax: {phone_number}
(e) Email: {email}

2. Place of Occurrence: {place_of_occurrence}

3. Date and Hour of Occurrence: {date_of_occurrence}

4. Offence:
(a) Nature of the offence: {offense_nature}  
(b) Section: {suggested_section}  
(c) Particulars of the property: {property_description}

5. Description of the accused: {accused_description} 

6. Additional Section Information:
   - Bailable: {bailable}
   - Cognizable: {cognizable}
   - Court: {court}
   - Punishment: {punishment}

7. Details of witnesses (if any): {witness_details}

8. Complaint: {complaint}
"""

    # Return the generated FIR
    return fir_template




In [None]:
# Example usage:
fir_text = generate_fir()
print(fir_text)
