In [48]:
import pandas as pd
import re
import nltk
from nltk.tokenize import word_tokenize
import spacy
import os
from spacy.training.example import Example
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

In [29]:
nltk.download('punkt')


[nltk_data] Downloading package punkt to /home/rebel/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


True

In [30]:
data = pd.read_csv('FIR_DATASET(updated).csv')

In [31]:
def preprocess_text(text):
    if isinstance(text, str):  # Check if the value is not null
        # Remove special characters and convert to lowercase
        text = re.sub(r'[^a-zA-Z0-9\s]', '', text.lower())
        # Tokenize the text
        tokens = word_tokenize(text)
        return tokens
    else:
        return []

In [32]:
data['Description'] = data['Description'].apply(preprocess_text)


In [33]:
print(data.head())

                                                 URL  \
0  https://lawrato.com/indian-kanoon/ipc/section-140   
1  https://lawrato.com/indian-kanoon/ipc/section-127   
2  https://lawrato.com/indian-kanoon/ipc/section-128   
3  https://lawrato.com/indian-kanoon/ipc/section-129   
4  https://lawrato.com/indian-kanoon/ipc/section-130   

                                         Description  \
0  [description, of, ipc, section, 140, according...   
1  [description, of, ipc, section, 127, according...   
2  [description, of, ipc, section, 128, according...   
3  [description, of, ipc, section, 129, according...   
4  [description, of, ipc, section, 130, according...   

                                             Offense  \
0  Wearing the dress or carrying any token used b...   
1  Receiving property taken by war or depredation...   
2  Public servant voluntarily allowing prisoner o...   
3  Public servant negligently suffering prisoner ...   
4  Aiding escape of, rescuing or harbouring, s

In [34]:
X = data['Description'].tolist()  # Input - Complainant-provided information
y = data['section'].tolist()  # Output - Relevant FIR section

print("Input (X) sample:")
print(X[:5])
print("\nOutput (y) sample:")
print(y[:5])

Input (X) sample:
[['description', 'of', 'ipc', 'section', '140', 'according', 'to', 'section', '140', 'of', 'indian', 'penal', 'code', 'whoever', 'not', 'being', 'a', 'soldier', 'sailor', 'or', 'airman', 'in', 'the', 'military', 'naval', 'or', 'air', 'service', 'of', 'the', 'government', 'of', 'india', 'wears', 'any', 'garb', 'or', 'carries', 'any', 'token', 'resembling', 'any', 'garb', 'or', 'token', 'used', 'by', 'such', 'a', 'soldier', 'sailor', 'or', 'airman', 'with', 'the', 'intention', 'that', 'it', 'may', 'be', 'believed', 'that', 'he', 'is', 'such', 'a', 'soldier', 'sailor', 'or', 'airman', 'shall', 'be', 'punished', 'with', 'imprisonment', 'of', 'either', 'description', 'for', 'a', 'term', 'which', 'may', 'extend', 'to', 'three', 'months', 'or', 'with', 'fine', 'which', 'may', 'extend', 'to', 'five', 'hundred', 'rupees', 'or', 'with', 'both', 'ipc', '140', 'in', 'simple', 'words', 'if', 'someone', 'who', 'is', 'not', 'a', 'military', 'member', 'wears', 'a', 'uniform', 'or', '

In [35]:
nlp = spacy.load("en_core_web_sm")  # Load the pre-trained spaCy model
textcat = nlp.add_pipe("textcat_multilabel") 

In [36]:

for label in set(y):
    textcat.add_label(str(label)) 

In [37]:
train_data = list(zip(X, [{"cats": {label: (section == label) for label in set(y)}} for section in y]))


In [38]:
pipe_exceptions = ["textcat_multilabel", "tagger", "parser", "ner", "lemmatizer"]
unaffected_pipes = [pipe for pipe in nlp.pipe_names if pipe not in pipe_exceptions]


In [42]:
with nlp.disable_pipes(*unaffected_pipes):
    optimizer = nlp.begin_training()
    for epoch in range(10):  #
        losses = {}
        for texts, annotations in train_data:
            
            text = ' '.join(texts)
            example = Example.from_dict(nlp.make_doc(text), annotations)
            
            
            nlp.update([example], drop=0.5, losses=losses, sgd=optimizer, exclude=["tagger", "parser", "ner", "textcat"])
            
        print("Epoch:", epoch, "Loss:", losses)


Epoch: 0 Loss: {'textcat_multilabel': 2.0126108452677727}
Epoch: 1 Loss: {'textcat_multilabel': 1.0018158110324293}
Epoch: 2 Loss: {'textcat_multilabel': 1.0013745443429798}
Epoch: 3 Loss: {'textcat_multilabel': 0.9954855925752781}
Epoch: 4 Loss: {'textcat_multilabel': 0.9543059317256848}
Epoch: 5 Loss: {'textcat_multilabel': 0.9158862404333377}
Epoch: 6 Loss: {'textcat_multilabel': 0.884065496340213}
Epoch: 7 Loss: {'textcat_multilabel': 0.8413868785200407}
Epoch: 8 Loss: {'textcat_multilabel': 0.784616856372395}
Epoch: 9 Loss: {'textcat_multilabel': 0.7129308046686447}


In [43]:
nlp.to_disk("trained_model")

In [44]:
nlp = spacy.load("trained_model")

In [61]:
def suggest_sections(complaint_text, nlp_model, section_labels, data):
    processed_text = preprocess_text(complaint_text)
    processed_text = ' '.join(processed_text)

    
    similarities = []
    for _, row in data.iterrows():
        row_text = ' '.join(preprocess_text(row['Description']))
        similarity = nlp_model(processed_text).similarity(nlp_model(row_text))
        similarities.append(similarity)

   
    max_similarity_index = similarities.index(max(similarities))
    suggested_section = data.iloc[max_similarity_index]['section']

    return suggested_section

In [62]:
# example

complaint_description = "The suspect stole my wallet and assaulted me."
suggested_section = suggest_sections(complaint_description, nlp, textcat.labels, data)
print("Suggested Section:", suggested_section)

  similarity = nlp_model(processed_text).similarity(nlp_model(row_text))
  similarity = nlp_model(processed_text).similarity(nlp_model(row_text))


Suggested Section: 140


In [64]:
def generate_fir():
   
    complainant_name = input("Enter complainant's name: ")
    father_name = input("Enter father's/husband's name: ")
    address = input("Enter address: ")
    phone_number = input("Enter phone number and fax: ")
    email = input("Enter email: ")
    place_of_occurrence = input("Enter place of occurrence: ")
    date_of_occurrence = input("Enter date and hour of occurrence: ")
    offence_nature = input("Enter nature of the offence: ")

    
    complaint_description = input("Enter the complaint description: ")
    suggested_section = suggest_sections(complaint_description, nlp, textcat.labels, data)

    property_description = input("Enter particulars of the property: ")
    accused_description = input("Enter description of the accused: ")
    witness_details = input("Enter details of witnesses (if any): ")
    complaint = input("Enter complaint: ")

    
    fir_template = f"""Police Station: [Police Station]
District: [District]

1. Personal details of the Complainant / Informant:
(a) Name: {complainant_name}
(b) Father's / Husband's Name: {father_name}
(c) Address: {address}
(d) Phone number & Fax: {phone_number}
(e) Email: {email}

2. Place of Occurrence: {place_of_occurrence}

3. Date and Hour of Occurrence: {date_of_occurrence}

4. Offence:
(a) Nature of the offence: {offence_nature}
(b) Section: {suggested_section}  # Use the suggested section here
(c) Particulars of the property: {property_description}

5. Description of the accused: {accused_description}  # Use the accused description here

6. Details of witnesses (if any): {witness_details}

7. Complaint: {complaint}
"""

    # Return the generated FIR
    return fir_template




In [65]:
# Example usage:
fir_text = generate_fir()
print(fir_text)

  similarity = nlp_model(processed_text).similarity(nlp_model(row_text))
  similarity = nlp_model(processed_text).similarity(nlp_model(row_text))


Police Station: [Police Station]
District: [District]

1. Personal details of the Complainant / Informant:
(a) Name: Dushyant
(b) Father's / Husband's Name: Sanjay
(c) Address: nhibtaunga
(d) Phone number & Fax: ye bhi nhi btaunga
(e) Email: na

2. Place of Occurrence: NIT

3. Date and Hour of Occurrence: 11:11

4. Offence:
(a) Nature of the offence: Murder
(b) Section: 140  # Use the suggested section here
(c) Particulars of the property: NIT

5. Description of the accused: reamesh killed suresh  # Use the accused description here

6. Details of witnesses (if any): ramesh killed suresh

7. Complaint: ramesh killed suresh

