In [3]:
import re
import pandas as pd
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
from nltk.stem import PorterStemmer
from sklearn.metrics.pairwise import cosine_similarity
from sentence_transformers import SentenceTransformer, util

# Text preprocessing function
def preprocess_text(text):
    text = text.lower()
    words = word_tokenize(text)
    stop_word = set(stopwords.words('english'))
    words = [word for word in words if word not in stop_word]
    stemmer = PorterStemmer()
    words = [stemmer.stem(word) for word in words]
    processed_text = ' '.join(words)
    return processed_text

# Load dataset
try:
    ds = pd.read_csv('C:/Users/abhis/project/student evaluation/questions.csv', encoding='ISO-8859-1') 
    print(ds.head(2))
except UnicodeDecodeError as err:
    print(f"Error: {err}")

# Handle missing values
ds.fillna("Not Mentioned", inplace=True)
ds['ans1'] = ds['ans1'].apply(preprocess_text)

# Initialize the model
model = SentenceTransformer('paraphrase-MiniLM-L6-v2')

# Function to suggest sections
def suggest_sections(ans, ds, min_suggestions=5):
    preprocessed_ans = preprocess_text(ans)
    ans_embedding = model.encode(preprocessed_ans)
    section_embeddings = model.encode(ds['ans1'].tolist())
    similarities = util.pytorch_cos_sim(ans_embedding, section_embeddings)[0]
    
    similarity_threshold = 0.2
    relevant_indices = []

    while len(relevant_indices) < min_suggestions and similarity_threshold > 0:
        relevant_indices = [i for i, sim in enumerate(similarities) if sim > similarity_threshold]
        similarity_threshold -= 0.05  # Adjust step size if needed

    # Sort the indices by similarity score
    sorted_indices = sorted(relevant_indices, key=lambda i: similarities[i], reverse=True)
    
    # Collect the top suggestions
    suggestions = [
        {
            'index': i,
            'question': ds.iloc[i]['question'],
            'ans': ds.iloc[i]['ans'],
            'similarity_score': similarities[i].item()  # Convert tensor to float
        }
        for i in sorted_indices
    ]
    
    return suggestions

# Example usage
ans = "Your input text here"
suggestions = suggest_sections(ans, ds)

# Display suggestions
if suggestions:
    print("Suggested Sections:")
    for suggestion in suggestions:
        print(f"Index: {suggestion['index']}")
        print(f"Question: {suggestion['question']}")
        print(f"Answer: {suggestion['ans']}")
        print(f"Similarity Score: {suggestion['similarity_score']:.4f}")
        print("_________________________________________________________________________________________\n")
else:
    print("No relevant records found.")


OSError: [WinError 126] The specified module could not be found. Error loading "c:\Users\abhis\AppData\Local\Programs\Python\Python311\Lib\site-packages\torch\lib\fbgemm.dll" or one of its dependencies.