In [5]:
import pandas as pd
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
from sentence_transformers import SentenceTransformer, util

def preprocess_text(text):
    text = text.lower()
    words = word_tokenize(text)
    stop_words = set(stopwords.words('english'))
    words = [word for word in words if word not in stop_words]
    lemmatizer = WordNetLemmatizer()
    words = [lemmatizer.lemmatize(word) for word in words]
    preprocessed_text = ' '.join(words)
    return preprocessed_text

def load_and_preprocess_data(file_path):
    try:
        df = pd.read_csv(file_path)
    except UnicodeDecodeError as err:
        print(f"Error: {err}")
        return None

    df.fillna("Not Mentioned", inplace=True)  # Replace with more robust handling if needed
    df['Combo'] = df['Description'] + ' ' + df['Offense']
    df['Combo'] = df['Combo'].apply(preprocess_text)
    return df[['Description', 'Offense', 'Punishment', 'Cognizable', 'Bailable', 'Court', 'Combo']]

def create_embeddings(df):
    model = SentenceTransformer('paraphrase-MiniLM-L6-v2')
    embeddings = model.encode(df['Combo'].tolist())
    return embeddings

def suggest_sections(complaint, dataset, embeddings, min_suggestions=5):
    preprocessed_complaint = preprocess_text(complaint)
    complaint_embedding = model.encode(preprocessed_complaint)
    similarities = util.pytorch_cos_sim(complaint_embedding, embeddings)[0]
    sorted_indices = similarities.argsort()[::-1]
    suggestions = dataset.iloc[sorted_indices[:min_suggestions]].to_dict(orient='records')
    return suggestions

# Load and preprocess data
file_path = 'D:/My WorkSpace/Summer-Internship/29-July-2024/FIR-DATA.csv'
df = load_and_preprocess_data(file_path)

# Create embeddings
embeddings = create_embeddings(df)

# Example usage
complaint = "theft of mobile phone"
suggestions = suggest_sections(complaint, df, embeddings)
print(suggestions)


OSError: [WinError 126] The specified module could not be found. Error loading "c:\Users\sachi\AppData\Local\Programs\Python\Python311\Lib\site-packages\torch\lib\fbgemm.dll" or one of its dependencies.