# ðŸŽ¯ Phase 3 â€“ Matching Engine

**Objective:**  
Build an engine to compute similarity between employee profiles and project requirements, applying business rules to recommend top candidates for each project.

---

## âœ… Steps:
1. Import Libraries & Load Processed Data  
2. Compute Similarity Scores  
3. Apply Business Rules (Filters)  
4. Generate Top-N Recommendations  
5. Export Results  
6. (Optional) Create Recommendation Report


In [1]:
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
import pickle

# Load skill matrices
with open("../model/employee_skill_matrix.pkl", "rb") as f:
    employee_matrix = pickle.load(f)

with open("../model/project_skill_matrix.pkl", "rb") as f:
    project_matrix = pickle.load(f)

# Load employee metadata for filtering (experience, dept, location)
df_emp = pd.read_csv("../data/processed/employee_master_cleaned.csv")

# Ensure matrices align
employee_matrix = employee_matrix.loc[df_emp["Employee_ID"]]


In [2]:
# Calculate cosine similarity between projects and employees
similarity_matrix = cosine_similarity(project_matrix.values, employee_matrix.values)

# Store as DataFrame for easy access
similarity_df = pd.DataFrame(similarity_matrix, index=project_matrix.index, columns=employee_matrix.index)

print("âœ… Similarity matrix computed:", similarity_df.shape)
similarity_df.head()


âœ… Similarity matrix computed: (100, 1000)


Employee_ID,E101,E102,E103,E104,E105,E106,E107,E108,E109,E110,...,E1091,E1092,E1093,E1094,E1095,E1096,E1097,E1098,E1099,E1100
Project_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
P301,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.264906,0.0,0.0,0.160128,0.0,0.0,0.0,0.0,0.0,0.0
P302,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.149071,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
P303,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.160128,...,0.0,0.0,0.0,0.0,0.0,0.144338,0.0,0.0,0.0,0.0
P304,0.0,0.0,0.0,0.0,0.0,0.0,0.144338,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.149071,0.0,0.0,0.0,0.0,0.0
P305,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.132453,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [3]:
# Example: create a filter for employees with >=3 years experience
eligible_employees = df_emp[df_emp["Years_Experience"] >= 3]["Employee_ID"].tolist()

def apply_filters(similarity_scores):
    # Keep only eligible employees
    return similarity_scores[similarity_scores.index.isin(eligible_employees)]


In [4]:
def get_top_n_employees(project_id, top_n=5):
    """Return top N employees for a given project ID after applying filters."""
    if project_id not in similarity_df.index:
        return None
    scores = similarity_df.loc[project_id]
    scores = apply_filters(scores)  # Apply business rules
    return scores.sort_values(ascending=False).head(top_n)

# Example usage: Top 5 matches for a sample project
project_id_sample = similarity_df.index[0]
print(f"ðŸ”¹ Top candidates for {project_id_sample}:\n", get_top_n_employees(project_id_sample, 5))


ðŸ”¹ Top candidates for P301:
 Employee_ID
E182     0.433013
E743     0.408248
E1088    0.365148
E225     0.288675
E765     0.288675
Name: P301, dtype: float64


In [6]:
recommendations = []

for proj_id in similarity_df.index:
    top_candidates = get_top_n_employees(proj_id, top_n=5)
    for emp_id, score in top_candidates.items():
        recommendations.append([proj_id, emp_id, score])

df_recommendations = pd.DataFrame(recommendations, columns=["Project_ID", "Employee_ID", "Similarity_Score"])
df_recommendations.to_csv("../data/outputs/matching_recommendations.csv", index=False)

print("âœ… Recommendations exported to matching_recommendations.csv")


âœ… Recommendations exported to matching_recommendations.csv


In [7]:
# Merge recommendations with employee metadata
df_final_recommendations = df_recommendations.merge(df_emp, on="Employee_ID", how="left")

# Save enhanced report
df_final_recommendations.to_csv("../data/outputs/matching_recommendations_report.csv", index=False)

df_final_recommendations.head()


Unnamed: 0,Project_ID,Employee_ID,Similarity_Score,Department,Years_Experience,Location
0,P301,E182,0.433013,Full Stack Dev,14.8,Mumbai
1,P301,E743,0.408248,Data Science,14.9,Pune
2,P301,E1088,0.365148,Data Science,14.1,Mumbai
3,P301,E225,0.288675,AI Research,13.9,Pune
4,P301,E765,0.288675,Full Stack Dev,13.9,Mumbai
