In [1]:
import re
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
from nltk.stem import PorterStemmer

In [2]:
def preprocess_text(text):
    text=text.lower()
    words=word_tokenize(text)
    stop_word=set(stopwords.words('english'))
    words=[word for word in words if word not in stop_word]
    stemmer=PorterStemmer()
    words=[stemmer.stem(word) for word in words]

    proprocessed_text=' '.join(words)
    return proprocessed_text

In [3]:
import pandas as pd
try:
    ds=pd.read_csv('D:/My WorkSpace/Summer-Internship/29-July-2024/FIR-DATA.csv')
    print(ds.head(2))
except UnicodeDecodeError as err:
    print(f"Error: {err}")

                                                 URL  \
0  https://lawrato.com/indian-kanoon/ipc/section-140   
1  https://lawrato.com/indian-kanoon/ipc/section-127   

                                         Description  \
0  Description of IPC Section 140\nAccording to s...   
1  Description of IPC Section 127\nAccording to s...   

                                             Offense  \
0  Wearing the dress or carrying any token used b...   
1  Receiving property taken by war or depredation...   

                                Punishment  Cognizable      Bailable  \
0                 3 Months or Fine or Both  Cognizable      Bailable   
1  7 Years + Fine + forfeiture of property  Cognizable  Non-Bailable   

              Court  
0    Any Magistrate  
1  Court of Session  


In [4]:
ds.isnull().sum()

URL             0
Description     1
Offense        63
Punishment     63
Cognizable     63
Bailable       63
Court          63
dtype: int64

In [5]:
ds.fillna("Not Mentioned",inplace=True)


In [6]:
ds.isnull().sum()

URL            0
Description    0
Offense        0
Punishment     0
Cognizable     0
Bailable       0
Court          0
dtype: int64

In [7]:
ds['Combo']=ds['Description']+ds['Offense']
ds.head(1)

Unnamed: 0,URL,Description,Offense,Punishment,Cognizable,Bailable,Court,Combo
0,https://lawrato.com/indian-kanoon/ipc/section-140,Description of IPC Section 140\nAccording to s...,Wearing the dress or carrying any token used b...,3 Months or Fine or Both,Cognizable,Bailable,Any Magistrate,Description of IPC Section 140\nAccording to s...


In [8]:
ds['Combo']=ds['Combo'].apply(preprocess_text)
ds.head(1)

Unnamed: 0,URL,Description,Offense,Punishment,Cognizable,Bailable,Court,Combo
0,https://lawrato.com/indian-kanoon/ipc/section-140,Description of IPC Section 140\nAccording to s...,Wearing the dress or carrying any token used b...,3 Months or Fine or Both,Cognizable,Bailable,Any Magistrate,descript ipc section 140 accord section 140 in...


In [9]:
new_ds=ds[['Description','Offense','Punishment','Cognizable','Bailable','Court','Combo']]
new_ds.head(1)

Unnamed: 0,Description,Offense,Punishment,Cognizable,Bailable,Court,Combo
0,Description of IPC Section 140\nAccording to s...,Wearing the dress or carrying any token used b...,3 Months or Fine or Both,Cognizable,Bailable,Any Magistrate,descript ipc section 140 accord section 140 in...


In [10]:
new_ds['Combo']=new_ds['Combo'].apply(lambda x:"".join(x))
new_ds.head(1)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  new_ds['Combo']=new_ds['Combo'].apply(lambda x:"".join(x))


Unnamed: 0,Description,Offense,Punishment,Cognizable,Bailable,Court,Combo
0,Description of IPC Section 140\nAccording to s...,Wearing the dress or carrying any token used b...,3 Months or Fine or Both,Cognizable,Bailable,Any Magistrate,descript ipc section 140 accord section 140 in...


In [11]:
import pickle
with open('preprocess_data.pkl','wb')as file:
    pickle.dump(new_ds,file)
new_ds=pickle.load(open('preprocess_data.pkl','rb'))



pip install tf_keras
pip uninstall torch torchvision torchaudio
pip cache purge
pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu117


In [12]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from sentence_transformers import SentenceTransformer, util
model=SentenceTransformer('paraphrase-MiniLM-L6-v2')
    

  from tqdm.autonotebook import tqdm, trange





To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to see activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development


In [13]:
def suggest_sections(complaint,dataset,min_suggestions=5):
    preprocessed_complaint=preprocess_text(complaint)
    complaint_embedding=model.encode(preprocessed_complaint)
    section_embedding=model.encode(dataset['Combo'].tolist())
    similarities=util.pytorch_cos_sim(complaint_embedding,section_embedding)[0]
    similarity_threhold=0.2
    relevant_indices=[]
    while len(relevant_indices)<min_suggestions and similarity_threhold>0:
        relevant_indices=[i for i, sim in enumerate(similarities)if sim>similarity_threhold]
        similarity_threhold-=0.5 #st=st-0.5
        sorted_indices=sorted(relevant_indices,key=lambda i: similarities[i],reverse=True)
        suggestions=dataset.iloc[sorted_indices][['Description','Offense','Punishment','Cognizable','Bailable','Court','Combo']].to_dict(orient='records')
        return suggestions


In [14]:
complaint=input("Enter crime description")
suggest_sections=suggest_sections(complaint,new_ds)
if suggest_sections:
    print("Suggested Section are:")
    for suggestion in suggest_sections:
        print(f"Description: {suggestion['Description']}")
        print(f"Offense: {suggestion['Offense']}")
        print(f"Punishment: {suggestion['Punishment']}")
        print("_________________________________________________________________________________________\n")
else:
    print("No record is found")


Suggested Section are:
Description: Description of IPC Section 356
According to section 356 of Indian penal code, Whoever assaults or uses criminal force to any person, in attempting to commit theft on any property which that person is then wearing or carrying shall be punished with imprisonment of either description for a term which may extend to two years, or with fine, or with both.


IPC 356 in Simple Words
Section 356 of the Indian Penal Code states that if someone assaults or uses criminal force against another person while attempting to steal something that person is wearing or carrying, they can be punished with imprisonment for up to two years, or a fine, or both.
Offense: Assault or criminal force in attempt to commit theft of property worn or carried by a person
Punishment: 2 Years or Fine or Both
_________________________________________________________________________________________

Description: Description of IPC Section 236
According to section 236 of Indian penal code