In [None]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.neighbors import NearestNeighbors
import nltk
from nltk.corpus import stopwords
from nltk.stem import PorterStemmer

nltk.download('stopwords')
nltk.download('punkt')

stop_words = set(stopwords.words('english'))
stemmer = PorterStemmer()

def preprocess(txt):
    tokens = nltk.word_tokenize(txt.lower())
    tokens = [stemmer.stem(w) for w in tokens if w not in stop_words]
    return ' '.join(tokens)

df = pd.read_csv('code.csv')
df['Problem'] = df['Problem'].apply(preprocess)

vec = TfidfVectorizer()
X = vec.fit_transform(df['Problem'])
knn = NearestNeighbors(n_neighbors=5, metric='cosine')
knn.fit(X)

THRESHOLD = 0.5

query = preprocess(input("Enter your question: "))
query_vec = vec.transform([query])
distances, indices = knn.kneighbors(query_vec)

if distances[0][0] > THRESHOLD:
    print("No relevant problem found.")
else:
    prob = df.iloc[indices[0][0]]['Problem']
    code = df.iloc[indices[0][0]]['Python Code']
    print(f"Question: \n{prob}")
    print(f"Python code for the problem:\n{code}")
