In [1]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

# -------------------------
# Step 1: Load Data
# -------------------------
employee_master = pd.read_csv("data/employee_master.csv")
employee_experience = pd.read_csv("data/employee_experience.csv")
client_projects = pd.read_csv("data/client_projects.csv")

# Merge
employees = pd.merge(employee_master, employee_experience, on="Employee_ID", how="left")

# -------------------------
# Step 2: Build Profile Text
# -------------------------
def preprocess_text(text):
    if pd.isna(text):
        return ""
    return str(text).lower().replace(",", " ")

employees["profile_text"] = (
    employees["Department"].fillna("") + " " +
    employees["Skills"].fillna("") + " " +
    employees["Experience_Text"].fillna("") + " " +
    employees["Years_Experience"].astype(str)
).apply(preprocess_text)

client_projects["project_text"] = (
    client_projects["Project_Description"].fillna("") + " " +
    client_projects["Required_Skills"].fillna("")
).apply(preprocess_text)

# -------------------------
# Step 3: TF-IDF + Cosine Similarity
# -------------------------
vectorizer = TfidfVectorizer(stop_words="english", ngram_range=(1, 2))
tfidf_matrix = vectorizer.fit_transform(
    employees["profile_text"].tolist() + client_projects["project_text"].tolist()
)

employee_tfidf = tfidf_matrix[:len(employees)]
project_tfidf = tfidf_matrix[len(employees):]

similarity_matrix = cosine_similarity(project_tfidf, employee_tfidf)

# -------------------------
# Step 4: Weighted Scoring Function
# -------------------------
def compute_weighted_score(skill_score, emp_row, project_row,
                           w_skill=0.7, w_exp=0.2, w_location=0.1):
    score = skill_score * w_skill
    exp_score = min(float(emp_row["Years_Experience"]) / 15.0, 1.0)
    score += exp_score * w_exp
    loc_score = 1.0 if emp_row["Location"].lower() == project_row["Location"].lower() else 0.0
    score += loc_score * w_location
    return round(score, 3)

# -------------------------
# Step 5: Recommendation Generation
# -------------------------
top_k = 3
recommendations = {}

for proj_idx, project in client_projects.iterrows():
    sims = list(enumerate(similarity_matrix[proj_idx]))
    ranked = []
    for emp_idx, skill_score in sims:
        emp_row = employees.iloc[emp_idx]
        weighted_score = compute_weighted_score(skill_score, emp_row, project)
        ranked.append((emp_idx, weighted_score))
    ranked_sorted = sorted(ranked, key=lambda x: x[1], reverse=True)[:top_k]

    recommended_employees = [employees.iloc[emp_idx]["Employee_ID"] for emp_idx, _ in ranked_sorted]
    recommendations[project["Project_ID"]] = recommended_employees

# -------------------------
# Step 6: Evaluation (Precision@K, Recall@K)
# -------------------------
def evaluate_precision_recall_at_k(employees, client_projects, recommendations, k=3):
    precision_scores = []
    recall_scores = []

    for proj_id, rec_employees in recommendations.items():
        # Ground truth: who is actually working on this project
        relevant_employees = employees[employees["Current_Project_ID"] == proj_id]["Employee_ID"].tolist()

        if len(relevant_employees) == 0:
            continue  # Skip if no ground truth available

        # Intersection
        hits = len(set(rec_employees[:k]) & set(relevant_employees))

        precision = hits / k
        recall = hits / len(relevant_employees)

        precision_scores.append(precision)
        recall_scores.append(recall)

    avg_precision = round(sum(precision_scores) / len(precision_scores), 3) if precision_scores else 0
    avg_recall = round(sum(recall_scores) / len(recall_scores), 3) if recall_scores else 0

    return avg_precision, avg_recall

precision_at_k, recall_at_k = evaluate_precision_recall_at_k(employees, client_projects, recommendations, k=top_k)

print("\n=== Evaluation Metrics ===")
print(f"Precision@{top_k}: {precision_at_k}")
print(f"Recall@{top_k}: {recall_at_k}")



=== Evaluation Metrics ===
Precision@3: 0
Recall@3: 0
