In [9]:
import pandas as pd

# Load CSV file
df = pd.read_csv(r"C:\Users\Aadhya\Downloads\Infosys_AI_Policies\updated_data.csv")

# Combine multiple text columns for NLP
df['text_for_nlp'] = (
    df['scheme_name'].astype(str) + " " +
    df['details'].astype(str) + " " +
    df['benefits'].astype(str) + " " +
    df['eligibility'].astype(str) + " " +
    df['application'].astype(str) + " " +
    df['documents'].astype(str)
).str.lower()


In [10]:
# ----------------- TF-IDF Vectorization -----------------
from sklearn.feature_extraction.text import TfidfVectorizer
import joblib

vectorizer = TfidfVectorizer(stop_words="english")
tfidf_matrix = vectorizer.fit_transform(df["text_for_nlp"])

# Save vectorizer and matrix
joblib.dump(vectorizer, "scheme_vectorizer.pkl")
joblib.dump({"matrix": tfidf_matrix, "df": df}, "scheme_tfidf_matrix.pkl")

['scheme_tfidf_matrix.pkl']

In [11]:
# ----------------- Cosine Similarity Search -----------------
from sklearn.metrics.pairwise import cosine_similarity
import joblib

# Load saved vectorizer and matrix
vectorizer = joblib.load(r"scheme_vectorizer.pkl")
data = joblib.load(r"scheme_tfidf_matrix.pkl")
tfidf_matrix = data["matrix"]
df = data["df"]

def query_scheme(question, top_k=3):
    query_vec = vectorizer.transform([question.lower()])
    sims = cosine_similarity(query_vec, tfidf_matrix).flatten()
    top_idx = sims.argsort()[::-1][:top_k]

    results = []
    for idx in top_idx:
        row = df.iloc[idx]
        results.append({
            "scheme_name": row["scheme_name"],
            "benefits": row["benefits"],
            "eligibility": row["eligibility"],
            "similarity": float(sims[idx])
        })
    return results


# ----------------- Example Usage -----------------
question = "scholarship for students"
results = query_scheme(question, top_k=3)

if results:
    for i, res in enumerate(results, 1):
        print(f"{i}. {res['scheme_name']} | {res['similarity']:.2f}")
        print(f" Benefits: {res['benefits']}")
        print(f" Eligibility: {res['eligibility']}\n")
else:
    print("No relevant scheme found.")


1. State Merit Scholarship Manipur | 0.60
 Benefits: For Class X passed students: The scholarship provides Rs. 6,000 per annum to the first 300 students who passed the High School Leaving Certificate Examination (HSLC) conducted by BSEM for a period of two years. For Class XII passed students: The scholarship provides Rs. 12,000 per annum to the first 125 students of Science, 125 students of Social Sciences, and 50 students of Commerce who passed the class XII examination conducted by COHSEM for a period of three years. The scholarship is available to students who continue their studies within or outside the state but within the country, starting from the academic session of 2015-16. The scholarship aims to provide equal opportunities for higher education to deserving students, regardless of their economic background. In addition to the initial list of recipients, 50 students will be kept on the waiting list for possible refusal, ineligibility, or other students getting other scholarsh