<a href="https://colab.research.google.com/github/Nandini-Pandey/VolunteerLink/blob/main/KNN_Recommendation.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
from google.colab import files

# Upload CSV file
uploaded = files.upload()

# Verify the uploaded file
import pandas as pd

df = pd.read_csv("volunteer_jobs.csv")
print(df.head())




Saving volunteer_jobs.csv to volunteer_jobs.csv
                title            NGO            skillsRequired areaOfImpact  \
0  Teaching Assistant  Teach for All   Teaching, Communication    Education   
1   Food Distribution  Food for Life    Teamwork, Organization       Hunger   
2     Tree Plantation    Green Earth  Gardening, Physical Work  Environment   

        location    deadline  
0       New York  2025-03-01  
1    Los Angeles  2025-04-15  
2  San Francisco  2025-05-10  


In [5]:

import pandas as pd
import pickle
from sklearn.neighbors import NearestNeighbors
from sklearn.preprocessing import MultiLabelBinarizer

# Load dataset
data = pd.read_csv("volunteer_jobs.csv")

# Convert skillsRequired into list format for one-hot encoding
data["skillsRequired"] = data["skillsRequired"].apply(lambda x: x.split(", "))

# One-hot encode skills
mlb = MultiLabelBinarizer()
skills_encoded = pd.DataFrame(mlb.fit_transform(data["skillsRequired"]), columns=mlb.classes_)

# One-hot encode 'areaOfImpact' and 'location'
data_encoded = pd.get_dummies(data, columns=["areaOfImpact", "location"])

# Combine encoded skills with the dataset
data_final = pd.concat([data_encoded, skills_encoded], axis=1)

# ❗ Remove non-numeric columns before training
data_final = data_final.drop(columns=["title", "NGO", "skillsRequired", "deadline"])

# Train KNN model
model = NearestNeighbors(n_neighbors=3, metric="cosine")
model.fit(data_final)

# Save model and label encoders
with open("model.pkl", "wb") as f:
    pickle.dump((model, mlb, data_final, data), f)

print("✅ Model trained and saved successfully!")


✅ Model trained and saved successfully!


In [9]:
import pandas as pd
import pickle
from sklearn.preprocessing import MultiLabelBinarizer

# Load trained model and preprocessing objects
with open("model.pkl", "rb") as f:
    model, mlb, data_final, data = pickle.load(f)

def get_recommendations(volunteer_skills, volunteer_location):
    # Ensure skills are properly formatted
    volunteer_skills = [skill.strip().lower() for skill in volunteer_skills]

    # Handle missing skills in encoding
    volunteer_skills_encoded = pd.DataFrame(0, index=[0], columns=mlb.classes_)  # Create empty encoded row
    for skill in volunteer_skills:
        if skill in volunteer_skills_encoded.columns:
            volunteer_skills_encoded[skill] = 1  # Set skill column to 1 if it exists

    # One-hot encode the location
    location_encoded = pd.DataFrame(0, index=[0], columns=[f"location_{volunteer_location}"])  # Only create location column


    # Check if the location column exists in the original data and update the value
    if f"location_{volunteer_location}" in data_final.columns:
        location_encoded[f"location_{volunteer_location}"] = 1  # Set location column to 1 if it exists
    else:
        # Handle the case where the location is not in the original data
        # Here, we'll assume that a missing location means the user is flexible
        # and we won't consider location in the recommendation
        pass  # Or you could print a warning message to the user

    # Merge both encoded features (skills + location)
    volunteer_profile = pd.concat([volunteer_skills_encoded, location_encoded], axis=1)

    # Reindex to align with the original data, but only for missing columns
    missing_cols = list(set(data_final.columns) - set(volunteer_profile.columns))
    volunteer_profile = volunteer_profile.reindex(columns=volunteer_profile.columns.tolist() + missing_cols, fill_value=0)

    # Ensure the order of columns matches the original data
    volunteer_profile = volunteer_profile[data_final.columns]

    # Get top 3 recommendations
    distances, indices = model.kneighbors(volunteer_profile, n_neighbors=3)
    recommendations = data.iloc[indices[0]].to_dict(orient="records")

    return recommendations

if __name__ == "__main__":
    # Take input from the user
    skills_input = input("Enter your skills (comma-separated): ").strip().lower()
    skills = skills_input.split(",")  # Split and create a list
    location = input("Enter your preferred location: ").strip().lower()

    # Get recommendations
    recommendations = get_recommendations(skills, location)

    # Print recommendations
    if recommendations:
        print("\n🔹 Recommended Volunteer Jobs for You:\n")
        for idx, job in enumerate(recommendations, start=1):
            print(f"{idx}. {job['title']} at {job['NGO']} ({job['location']})")
            print(f"   Skills Required: {job['skillsRequired']}")
            print(f"   Area of Impact: {job['areaOfImpact']}")
            print(f"   Deadline: {job['deadline']}\n")
    else:
        print("\n❌ No matching volunteer jobs found for your skills and location.")

Enter your skills (comma-separated): Teaching, Communication, Public Speaking
Enter your preferred location: New York

🔹 Recommended Volunteer Jobs for You:

1. Teaching Assistant at Teach for All (New York)
   Skills Required: ['Teaching', 'Communication']
   Area of Impact: Education
   Deadline: 2025-03-01

2. Food Distribution at Food for Life (Los Angeles)
   Skills Required: ['Teamwork', 'Organization']
   Area of Impact: Hunger
   Deadline: 2025-04-15

3. Tree Plantation at Green Earth (San Francisco)
   Skills Required: ['Gardening', 'Physical Work']
   Area of Impact: Environment
   Deadline: 2025-05-10

