In [None]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from nltk.tokenize import word_tokenize
def load_data(file_path):
    return pd.read_csv(file_path)
def lowercase_all(df):
    df = df.map(lambda x: x.lower() if isinstance(x, str) else x)
    return df
def tokenize_text(text):
    return word_tokenize(text) if isinstance(text, str) else []
def compute_cosine_similarity(query_vector, entry_vector):
    return cosine_similarity(query_vector, entry_vector).flatten()[0]
def calculate_cosine_distances(df, query):
    vectorizer = TfidfVectorizer(tokenizer=tokenize_text, token_pattern=None)
    corpus = df.apply(lambda x: ' '.join(x.dropna().astype(str)), axis=1)
    corpus_vectorized = vectorizer.fit_transform(corpus)
    query_vector = vectorizer.transform([query])
    results = []
    for index, row_vector in enumerate(corpus_vectorized):
        distance = compute_cosine_similarity(query_vector, row_vector)
        results.append((index, distance))
    return results

def find_best_match(distances):
    best_match = max(distances, key=lambda x: x[1], default=None)
    return best_match
def process_data(file_path, query):
    df = load_data(file_path)
    df = lowercase_all(df)
    distances = calculate_cosine_distances(df, query)
    best_match = find_best_match(distances)
    return best_match, df
def main():
    file_path = input("Enter the path to the CSV file: ")
    user_query = input("Enter the query to match: ")
    best_match, df = process_data(file_path, user_query)
    if best_match is not None:
        index, similarity = best_match
        print("Query given:", user_query)
        print("Best Match Found at index:", index)
        print("Cosine Similarity:", similarity)
        print(df.iloc[index])
    else:
        print("No match found")

if __name__ == "__main__":
    main()
