In [2]:
# recommender.py

import pandas as pd

# Load the cleaned dataset
def load_data(csv_path="all_states_schemes_cleaned.csv"):
    try:
        df = pd.read_csv(csv_path)
    except FileNotFoundError:
        raise FileNotFoundError(f"‚ùå File not found at: {csv_path}")
    
    # Fill missing data to avoid errors during filtering
    df["Eligibility"] = df["Eligibility"].fillna("No eligibility info provided.")
    df["Benefit"] = df["Benefit"].fillna("No benefit info provided.")
    df["Scheme Name"] = df["Scheme Name"].fillna("Unnamed Scheme")
    
    return df


# Recommender logic
def recommend_schemes(df, state=None, keyword=None, top_n=5):
    df_filtered = df.copy()

    # Apply filters
    if state:
        df_filtered = df_filtered[
            df_filtered['State'].str.contains(state, case=False, na=False)
        ]

    if keyword:
        keyword = keyword.lower()
        df_filtered = df_filtered[
            df_filtered['Scheme Name'].str.lower().str.contains(keyword)
            | df_filtered['Eligibility'].str.lower().str.contains(keyword)
            | df_filtered['Benefit'].str.lower().str.contains(keyword)
        ]

    # If no results, return message
    if df_filtered.empty:
        print("üîç No schemes found matching your criteria.")
        return pd.DataFrame()

    # Return top N
    return df_filtered[['State', 'Scheme Name', 'Eligibility', 'Benefit']].head(top_n)


# For testing in local script
if __name__ == "__main__":
    df = load_data("all_states_schemes_cleaned.csv")
    print("‚úÖ Dataset loaded with", len(df), "schemes.\n")

    # Sample query
    results = recommend_schemes(df, state="Karnataka", keyword="education", top_n=5)
    print(results.to_string(index=False))


‚úÖ Dataset loaded with 1524 schemes.

    State                                                                                                                                                                                                                                                                                                                                                                                                                  Scheme Name                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                           

In [6]:
def recommend_schemes(df, state=None, keyword=None, top_n=5):
    df_filtered = df.copy()

    # Apply filters
    if state:
        df_filtered = df_filtered[
            df_filtered['State'].str.contains(state, case=False, na=False)
        ]

    if keyword:
        keyword = keyword.lower()
        df_filtered = df_filtered[
            df_filtered['Scheme Name'].str.lower().str.contains(keyword) |
            df_filtered['Eligibility'].str.lower().str.contains(keyword) |
            df_filtered['Benefit'].str.lower().str.contains(keyword)
        ]

    if df_filtered.empty:
        print("üîç No schemes found.")
        return pd.DataFrame()

    # Clean: limit long text fields
    def smart_truncate(text, max_chars=250):
        # Truncates at the nearest sentence end (".") or space
        if not isinstance(text, str):
            return ""
        if len(text) <= max_chars:
            return text
        cutoff = text[:max_chars].rfind('.')
        if cutoff == -1:
            cutoff = text[:max_chars].rfind(' ')
        return text[:cutoff] + "..."

    df_filtered["Eligibility"] = df_filtered["Eligibility"].apply(lambda x: smart_truncate(x))
    df_filtered["Benefit"] = df_filtered["Benefit"].apply(lambda x: smart_truncate(x))

    return df_filtered[['State', 'Scheme Name', 'Eligibility', 'Benefit']].head(top_n)


# For testing in local script
if __name__ == "__main__":
    
    df = load_data("all_states_schemes_cleaned.csv")
    print("‚úÖ Dataset loaded with", len(df), "schemes.\n")

    # Sample query
    results = recommend_schemes(df, state="Karnataka", keyword="education", top_n=5)

    if not results.empty:
        print("\nüìã Top Schemes:\n")
        for i, row in results.iterrows():
            print(f"{i+1}. üè∑Ô∏è  Scheme: {row['Scheme Name'][:60]}...")
            print(f"   üìç State: {row['State']}")
            print(f"   üßæ Eligibility: {row['Eligibility']}")
            print(f"   üéÅ Benefit: {row['Benefit']}")
            print("-" * 80)


‚úÖ Dataset loaded with 1524 schemes.


üìã Top Schemes:

804. üè∑Ô∏è  Scheme: Karnataka govt. launches Prabuddha Scheme to help students f...
   üìç State: Karnataka
   üßæ Eligibility: 100% of education expenses for students whose annual family income is less than Rs. 8 lakhs. 50% of expenses of students whose annual family income is between Rs. 8 lakhs to Rs. 15 lakhs.
   üéÅ Benefit: Karnataka govt. launches Prabuddha Scheme to help students from scheduled caste (SC) & schedule tribe (ST) communities to study abroad...
--------------------------------------------------------------------------------
807. üè∑Ô∏è  Scheme: Karnataka govt. has launched an ambitious Unnati scheme to p...
   üìç State: Karnataka
   üßæ Eligibility: First Category ‚Äì Those startups where SC / ST entrepreneurs have substantial stake with hands on-role in management. For this category, there is only 1 criteria which is that the idea has to be good enough regardless of the sector...
   üéÅ Benefit: 