In [5]:
from firebase_admin import credentials, initialize_app, firestore

# Initialize Firebase
try:
    cred = credentials.Certificate("/content/sample_data/resources/ey-hackathon-ffe58-firebase-adminsdk-gk8uv-dbb395bd43.json")
    initialize_app(cred)
    print("Firebase initialized successfully.")
except Exception as e:
    print(f"Error initializing Firebase: {e}")

# Connect to Firestore
try:
    db = firestore.client()
    print("Connected to Firestore.")
except Exception as e:
    print(f"Error connecting to Firestore: {e}")

# Fetch Users Data
def fetch_users():
    try:
        users_ref = db.collection("users")
        docs = users_ref.stream()
        users = []
        for doc in docs:
            user_data = doc.to_dict()
            user_data["id"] = doc.id  # Add Firestore document ID for reference
            users.append(user_data)
        print(f"Fetched {len(users)} user profiles.")
        return users
    except Exception as e:
        print(f"Error fetching users: {e}")
        return []

# Fetch Schemes Data
def fetch_schemes():
    try:
        schemes_ref = db.collection("gov_schemes")
        docs = schemes_ref.stream()
        schemes = []
        for doc in docs:
            scheme_data = doc.to_dict()
            scheme_data["id"] = doc.id  # Add Firestore document ID for reference
            schemes.append(scheme_data)
        print(f"Fetched {len(schemes)} schemes.")
        return schemes
    except Exception as e:
        print(f"Error fetching schemes: {e}")
        return []

# Fetch and display data
users_data = fetch_users()
schemes_data = fetch_schemes()

print(f"Number of Users Fetched: {len(users_data)}")
print(f"Number of Schemes Fetched: {len(schemes_data)}")


Firebase initialized successfully.
Connected to Firestore.
Fetched 10000 user profiles.
Fetched 9 schemes.
Number of Users Fetched: 10000
Number of Schemes Fetched: 9


In [6]:
def preprocess_users(users_data):
    """Preprocess user profiles to ensure required fields are present."""
    valid_users = []
    for user in users_data:
        if all(key in user for key in ["Age", "Income", "Occupation"]):
            if isinstance(user["Age"], (int, float)) and user["Age"] >= 0:
                valid_users.append(user)
            else:
                print(f"Invalid age in user: {user}")
        else:
            print(f"Missing fields in user: {user}")
    print(f"Preprocessed {len(valid_users)} valid user profiles.")
    return valid_users


def preprocess_schemes(schemes_data):
    """Preprocess scheme data to ensure required fields are present and parse eligibility."""
    valid_schemes = []
    for scheme in schemes_data:
        if all(key in scheme for key in ["Age Range", "Eligibility", "Scheme Name"]):
            try:
                # Parse Eligibility from JSON-like structure if needed
                scheme["Eligibility"] = eval(scheme["Eligibility"]) if isinstance(scheme["Eligibility"], str) else scheme["Eligibility"]
                valid_schemes.append(scheme)
            except Exception as e:
                print(f"Error parsing eligibility in scheme: {scheme}, Error: {e}")
        else:
            print(f"Missing fields in scheme: {scheme}")
    print(f"Preprocessed {len(valid_schemes)} valid schemes.")
    return valid_schemes


# Preprocess data
preprocessed_users = preprocess_users(users_data)
preprocessed_schemes = preprocess_schemes(schemes_data)

# Display counts for verification
print(f"Valid Users: {len(preprocessed_users)}")
print(f"Valid Schemes: {len(preprocessed_schemes)}")

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
Missing fields in user: {'age': 29, 'Occupation': 'Worker', 'Name': 'User_7177', 'income': 9357, 'Location': 'Bangalore', 'id': 'VCXaEnLvDKSL2I3YmiOF'}
Missing fields in user: {'age': 88, 'Occupation': 'Unemployed', 'Name': 'User_9856', 'income': 9698, 'Location': 'Bangalore', 'id': 'VD9oZdGJEEIPtBYTeo0f'}
Missing fields in user: {'age': 70, 'Occupation': 'Retired', 'Name': 'User_1580', 'income': 3173, 'Location': 'Bangalore', 'id': 'VDGZFwaUOokcfKV8oVXI'}
Missing fields in user: {'age': 100, 'Occupation': 'Unemployed', 'Name': 'User_8558', 'income': 3858, 'Location': 'Delhi', 'id': 'VDNyKhbD7j9O5ZQGn85v'}
Missing fields in user: {'age': 18, 'Occupation': 'Unemployed', 'Name': 'User_1841', 'income': 7718, 'Location': 'Delhi', 'id': 'VDSOnzRzvrZqXKaWC4v4'}
Missing fields in user: {'age': 77, 'Occupation': 'Worker', 'Name': 'User_76', 'income': 9898, 'Location': 'Delhi', 'id': 'VDUpqcAQZCXxYGcWiYFj'}
Missing fields in user:

In [7]:
print("Raw Users Data:")
print(users_data[:5])  # Print the first 5 user records

print("Raw Schemes Data:")
print(schemes_data[:5])  # Print the first 5 scheme records


Raw Users Data:
[{'age': 15, 'Occupation': 'Unemployed', 'Name': 'User_3556', 'income': 5287, 'Location': 'Chennai', 'id': '00EPfRXylUvn6nxTXkaZ'}, {'age': 5, 'Occupation': 'Retired', 'Name': 'User_2525', 'income': 7161, 'Location': 'Bangalore', 'id': '00w3gORwHXNQqQldz5CR'}, {'age': 81, 'Occupation': 'Retired', 'Name': 'User_3584', 'income': 1181, 'Location': 'Kolkata', 'id': '02LwRORnXdE2OhHCrm25'}, {'age': 99, 'Occupation': 'Worker', 'Name': 'User_5018', 'income': 3903, 'Location': 'Bangalore', 'id': '02sC5s1KcYQKATH97hOR'}, {'age': 76, 'Occupation': 'Student', 'Name': 'User_3955', 'income': 762, 'Location': 'Delhi', 'id': '03SrcDBSH7mh02X6gcDE'}]
Raw Schemes Data:
[{'Scheme Name': 'National Scheme for Incentive to Girls for Secondary Education', 'Benefits': 'Financial incentive for girls in secondary schools.', 'eligibility': {'income': 10000, 'occupation': 'Student'}, 'Requirements': 'School certificate, Income proof', 'Age Range': '0-18', 'Purpose': 'Secondary Education', 'id': '

In [8]:
def preprocess_users(users_data):
    """Preprocess user profiles to ensure required fields are present."""
    valid_users = []
    for user in users_data:
        # Normalize field names
        user["Age"] = user.get("age", 0)
        user["Income"] = user.get("income", 0)
        user["Occupation"] = user.get("Occupation", "Unknown")

        # Validate required fields
        if isinstance(user["Age"], (int, float)) and user["Age"] >= 0 and user["Income"] <= 10000:
            valid_users.append(user)
        else:
            print(f"Invalid user profile: {user}")
    print(f"Preprocessed {len(valid_users)} valid user profiles.")
    return valid_users


def preprocess_schemes(schemes_data):
    """Preprocess scheme data to ensure required fields are present and parse eligibility."""
    valid_schemes = []
    for scheme in schemes_data:
        # Normalize field names
        scheme["Eligibility"] = scheme.get("eligibility", {})

        # Validate required fields
        if all(key in scheme for key in ["Age Range", "Eligibility", "Scheme Name"]):
            valid_schemes.append(scheme)
        else:
            print(f"Invalid scheme data: {scheme}")
    print(f"Preprocessed {len(valid_schemes)} valid schemes.")
    return valid_schemes


# Preprocess data
preprocessed_users = preprocess_users(users_data)
preprocessed_schemes = preprocess_schemes(schemes_data)

# Display counts for verification
print(f"Valid Users: {len(preprocessed_users)}")
print(f"Valid Schemes: {len(preprocessed_schemes)}")


Preprocessed 10000 valid user profiles.
Preprocessed 9 valid schemes.
Valid Users: 10000
Valid Schemes: 9


In [9]:
def filter_schemes_by_age(users, schemes):
    """Filter schemes for each user based on their age group."""
    age_groups = {
        "0-18": lambda age: 0 <= age <= 18,
        "18-55": lambda age: 18 < age <= 55,
        "55+": lambda age: age > 55,
    }

    user_filtered_schemes = []

    for user in users:
        user_age = user["Age"]
        user_id = user["id"]
        relevant_schemes = []

        # Match schemes based on age group
        for scheme in schemes:
            age_range = scheme["Age Range"]
            if age_range in age_groups and age_groups[age_range](user_age):
                relevant_schemes.append(scheme)

        user_filtered_schemes.append({
            "user_id": user_id,
            "user_age": user_age,
            "relevant_schemes": relevant_schemes
        })

    print(f"Filtered schemes for {len(user_filtered_schemes)} users.")
    return user_filtered_schemes


# Filter schemes by age group
filtered_data = filter_schemes_by_age(preprocessed_users, preprocessed_schemes)

# Example output for a single user
print(f"Example filtered data for User 1: {filtered_data[0]}")

Filtered schemes for 10000 users.
Example filtered data for User 1: {'user_id': '00EPfRXylUvn6nxTXkaZ', 'user_age': 15, 'relevant_schemes': [{'Scheme Name': 'National Scheme for Incentive to Girls for Secondary Education', 'Benefits': 'Financial incentive for girls in secondary schools.', 'eligibility': {'income': 10000, 'occupation': 'Student'}, 'Requirements': 'School certificate, Income proof', 'Age Range': '0-18', 'Purpose': 'Secondary Education', 'id': '2OKbKgjgFF5HVnX8cxcO', 'Eligibility': {'income': 10000, 'occupation': 'Student'}}, {'Scheme Name': 'Beti Bachao Beti Padhao', 'Benefits': 'Promotes education for girls, provides financial aid.', 'eligibility': {'income': 10000, 'occupation': 'Student'}, 'Requirements': 'Birth certificate, Income proof', 'Age Range': '0-18', 'Purpose': 'Education and Empowerment', 'id': 'DZWalCW0RO1IfNGhjKfw', 'Eligibility': {'income': 10000, 'occupation': 'Student'}}, {'Scheme Name': 'Mid-Day Meal Scheme', 'Benefits': 'Provides free meals in school

In [10]:
def calculate_match_rate(user, schemes):
    """Calculate match rate for the user against relevant schemes."""
    match_results = []

    for scheme in schemes:
        match_rate = 50  # Base score for age group match

        # Occupation match (30%)
        if scheme["Eligibility"].get("occupation") == user["Occupation"]:
            match_rate += 30

        # Income eligibility (20%)
        if user["Income"] <= scheme["Eligibility"].get("income", 0):
            match_rate += 20

        match_results.append({
            "Scheme Name": scheme["Scheme Name"],
            "Match Rate (%)": match_rate,
            "Benefits": scheme["Benefits"],
            "Requirements": scheme["Requirements"]
        })

    return match_results


def apply_eligibility_matching(filtered_data):
    """Apply eligibility matching to all users."""
    matched_results = []

    for entry in filtered_data:
        user_id = entry["user_id"]
        user_age = entry["user_age"]
        user_relevant_schemes = entry["relevant_schemes"]

        # Calculate match rates
        user_matches = calculate_match_rate(
            {"Age": user_age, "Income": preprocessed_users[0]["Income"], "Occupation": preprocessed_users[0]["Occupation"]},
            user_relevant_schemes
        )

        matched_results.append({
            "user_id": user_id,
            "matches": user_matches
        })

    print(f"Applied eligibility matching for {len(matched_results)} users.")
    return matched_results


# Apply eligibility matching
matched_data = apply_eligibility_matching(filtered_data)

# Example output for a single user
print(f"Example matched data for User 1: {matched_data[0]}")

Applied eligibility matching for 10000 users.
Example matched data for User 1: {'user_id': '00EPfRXylUvn6nxTXkaZ', 'matches': [{'Scheme Name': 'National Scheme for Incentive to Girls for Secondary Education', 'Match Rate (%)': 70, 'Benefits': 'Financial incentive for girls in secondary schools.', 'Requirements': 'School certificate, Income proof'}, {'Scheme Name': 'Beti Bachao Beti Padhao', 'Match Rate (%)': 70, 'Benefits': 'Promotes education for girls, provides financial aid.', 'Requirements': 'Birth certificate, Income proof'}, {'Scheme Name': 'Mid-Day Meal Scheme', 'Match Rate (%)': 70, 'Benefits': 'Provides free meals in schools.', 'Requirements': 'School enrollment proof'}]}


In [11]:
def save_to_firestore(matched_data):
    """Save matched results to Firestore."""
    user_matches_ref = db.collection("user_matches")  # Firestore collection name: user_matches

    for user_data in matched_data:
        user_id = user_data["user_id"]
        matches = user_data["matches"]

        try:
            # Add or update user matches in Firestore
            user_matches_ref.document(user_id).set({
                "user_id": user_id,
                "matches": matches
            })
        except Exception as e:
            print(f"Error saving data for user {user_id}: {e}")

    print("Matched data successfully saved to Firestore.")


# Save matched data to Firestore
save_to_firestore(matched_data)

Matched data successfully saved to Firestore.
