In [1]:
import pandas as pd

df = pd.read_csv(r"C:\Policies\indian_govt_schemes.csv")

df['text_for_nlp'] = (
    df['scheme_name'].astype(str) + ". " +
    df['details'].astype(str) + ". " +
    df['benefits'].astype(str) + ". " +
    df['eligibility'].astype(str) + ". " +
    df['application'].astype(str) + ". " +
    df['documents'].astype(str) 
).str.lower()

In [2]:
from sklearn.feature_extraction.text import TfidfVectorizer
import joblib

vectorizer = TfidfVectorizer(stop_words="english")
tfidf_matrix = vectorizer.fit_transform(df["text_for_nlp"])

joblib.dump(vectorizer, "scheme_vectorizer.pkl")
joblib.dump({"matrix": tfidf_matrix, "df": df}, "scheme_tfidf_matrix.pkl")

['scheme_tfidf_matrix.pkl']

In [4]:
from sklearn.metrics.pairwise import cosine_similarity
import joblib

vectorizer = joblib.load("scheme_vectorizer.pkl")
data = joblib.load("scheme_tfidf_matrix.pkl")
tfidf_matrix = data["matrix"]
df = data["df"]

def query_scheme(question, top_k=3):
    query_vec = vectorizer.transform([question.lower()])
    sims = cosine_similarity(query_vec, tfidf_matrix).flatten()
    top_idx = sims.argsort()[::-1][:top_k]
    
    results = []
    for idx in top_idx:
        row = df.iloc[idx]
        results.append({
            "scheme_name": row["scheme_name"],
            "benefits": row["benefits"],
            "eligibility": row["eligibility"],
            "similarity": float(sims[idx])
        })
    return results

# Example
question = "scholartship for students"
for i, res in enumerate(query_scheme(question, top_k=3), 1):
    print(f"{i}. {res['scheme_name']} | {res['similarity']:.2f}")
    print(f"   Benefits: {res['benefits']}")
    print(f"   Eligibility: {res['eligibility']}\n")


1. Pre-matric Scholarship For SC Students - Uttarakhand | 0.54
   Benefits: Students of Classes 1 - 5 will receive Rs. 600/- per annum. Students of Classes 6 - 8 will receive Rs. 960/- per annum. Day scholar students of Classes 9 - 10 will receive Rs. 3,000/- per annum. Hostel living students of Classes 9 - 10 will receive Rs. 6,250/- per annum.
   Eligibility: The applicant students should be a resident of Uttarakhand state. The applicant students should belong to the Scheduled Caste Category. The applicant students should be studying in a recognized school/institution in the state. The applicant students must have passed the previous year's examination. The Applicant students should not be studying in more than one school in the same academic year. Scholarship applicants should make sure that the name in the scholarship application, the name mentioned in Aadhaar and the name mentioned in the bank account are the same. It is necessary for the applicant student to give his or her guard