
AI-Powered Team Formation System
Interactive tool for creating balanced IT teams

In [151]:
#Setup and Imports
import pandas as pd
import numpy as np
import ipywidgets as widgets
from IPython.display import display, clear_output
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans
import matplotlib.pyplot as plt
import seaborn as sns
from collections import defaultdict


print("✅ Libraries imported")


✅ Libraries imported


In [None]:
# CELL 2: Load and Prepare Data
# Load your dataset (replace path)
try:
    df = pd.read_csv(r"C:\Users\hp\Downloads\labflow-main\labflow-main\filtered_employee_data.csv")
    
    # List of all possible technical skills in dataset
    all_tech_skills = ['Blockchain', 'C#', 'C++', 'DevOps', 'Figma', 'Golang', 
                      'Java', 'JavaScript', 'Laravel', 'ML', 'NLP', 'Node.js', 
                      'PHP', 'PostgreSQL', 'Python', 'R', 'React', 'Robotics', 
                      'Rust', 'SQL', 'Spring', 'Swift', 'TensorFlow', 
                      'TypeScript', 'UI/UX', 'Unity', 'iOS Dev']
    
    # Create binary skill columns (case insensitive)
    for skill in all_tech_skills:
        df[skill] = df['technical_skills'].str.contains(skill, case=False).astype(int)
    
    print(f"✅ Data loaded with {len(df)} employees")
    print("Available skills:", all_tech_skills)
    
except Exception as e:
    print(f"❌ Error loading data: {str(e)}")
    

✅ Data loaded with 1000 employees
Available skills: ['Blockchain', 'C#', 'C++', 'DevOps', 'Figma', 'Golang', 'Java', 'JavaScript', 'Laravel', 'ML', 'NLP', 'Node.js', 'PHP', 'PostgreSQL', 'Python', 'R', 'React', 'Robotics', 'Rust', 'SQL', 'Spring', 'Swift', 'TensorFlow', 'TypeScript', 'UI/UX', 'Unity', 'iOS Dev']


In [153]:
# CELL 3: Create Interactive UI
# Project Inputs
project_name = widgets.Text(
    placeholder='e.g., AI Chatbot Development',
    description="Project Name:"
)

project_type = widgets.Dropdown(
    options=['Web Dev', 'Data Science', 'DevOps', 'AI/ML', 'Cybersecurity', 'Mobile'],
    value='Web Dev',
    description="Project Type:"
)

team_size = widgets.IntSlider(
    value=4,
    min=2,
    max=10,
    description="Team Size:"
)

# Multi-select skill picker with all available skills
required_skills = widgets.SelectMultiple(
    options=all_tech_skills,
    value=['Python', 'JavaScript'],
    description="Required Skills:",
    rows=10,
    layout={'width': '500px'}
)

personality_pref = widgets.Dropdown(
    options=['Balanced', 'Innovative', 'Structured', 'High Collaboration'],
    value='Balanced',
    description="Team Style:"
)

form_button = widgets.Button(
    description="Form Team",
    button_style='success',
    icon='users'
)

output_area = widgets.Output()


In [154]:
def form_team(b):
    with output_area:
        clear_output()
        
        try:
            # Get inputs
            req_skills = list(required_skills.value)
            pref = personality_pref.value
            
            if not req_skills:
                print("❌ Please select at least one skill")
                return
            
            print(f"\n🔧 Forming {project_type.value} team for '{project_name.value}'")
            print(f"⚙️ Requirements: {', '.join(req_skills)} | {pref} style | {team_size.value} members")
            
            # === FILTERING PHASE ===
            candidates = df[
                (df['availability'] > 0.7) &
                (df[req_skills].sum(axis=1) >= 1)
            ].copy()
            
            if len(candidates) == 0:
                print("❌ No candidates match the selected skills!")
                return
            
            # Apply personality preference scoring
            if pref == 'Innovative':
                candidates['personality_score'] = candidates['personality_type'].str.contains('NT').astype(float)
            elif pref == 'Structured':
                candidates['personality_score'] = candidates['personality_type'].str.contains('STJ').astype(float)
            elif pref == 'High Collaboration':
                candidates['personality_score'] = candidates['collaboration_score'] / 10.0
            else:  # Balanced
                candidates['personality_score'] = 0.5
            
            # === CLUSTERING PHASE ===
            features = req_skills + ['skill_proficiency', 'collaboration_score', 'personality_score']
            scaler = StandardScaler()
            X = scaler.fit_transform(candidates[features])
            
            # Dynamic cluster sizing
            n_clusters = min(max(3, team_size.value),  # At least 3 clusters
                           min(10, len(candidates)))  # Max 10 clusters
            kmeans = KMeans(n_clusters=n_clusters, random_state=42)
            candidates['cluster'] = kmeans.fit_predict(X)
            
            # === TEAM SELECTION PHASE ===
            selected = []
            covered_skills = set()
            
            def calculate_cluster_score(cluster_data):
                """Improved cluster scoring function that explicitly handles columns"""
                score_data = cluster_data[features]  # Only use the features we want
                skill_coverage = score_data[req_skills].sum().clip(upper=1).sum()
                avg_proficiency = score_data['skill_proficiency'].mean()
                personality_match = score_data['personality_score'].mean()
                return pd.Series({
                    'skill_coverage': skill_coverage,
                    'proficiency': avg_proficiency,
                    'personality': personality_match,
                    'composite_score': 0.5*skill_coverage + 0.3*avg_proficiency + 0.2*personality_match
                })
            
            # Calculate cluster scores properly
            cluster_scores = candidates.groupby('cluster').apply(calculate_cluster_score)
            cluster_scores = cluster_scores.sort_values('composite_score', ascending=False)
            
            candidate_indices = set(candidates.index)  # Track available candidates
            
            for cluster_id in cluster_scores.index:
                if len(selected) >= team_size.value:
                    break
                
                cluster_members = candidates[
                    (candidates['cluster'] == cluster_id) & 
                    (candidates.index.isin(candidate_indices))
                ]
                
                # First pass: Cover missing skills
                for skill in [s for s in req_skills if s not in covered_skills]:
                    expert = cluster_members[cluster_members[skill] == 1].nlargest(1, 'skill_proficiency')
                    if not expert.empty:
                        selected.append(expert.iloc[0])
                        covered_skills.add(skill)
                        candidate_indices.remove(expert.index[0])
                        break
                
                # Second pass: Add best remaining member
                if len(selected) < team_size.value and len(cluster_members) > 0:
                    remaining_members = cluster_members[cluster_members.index.isin(candidate_indices)]
                    if len(remaining_members) > 0:
                        best_member = remaining_members.nlargest(1, 'skill_proficiency')
                        if not best_member.empty:
                            selected.append(best_member.iloc[0])
                            candidate_indices.remove(best_member.index[0])
            
            team = pd.DataFrame(selected).head(team_size.value)
            
            # === VISUALIZATION ===
            plt.figure(figsize=(15, 5))
            
            # Cluster Visualization
            plt.subplot(1, 3, 1)
            plt.scatter(X[:, 0], X[:, 1], c=kmeans.labels_, cmap='viridis', alpha=0.5)
            plt.scatter(kmeans.cluster_centers_[:, 0], kmeans.cluster_centers_[:, 1],
                       c='red', marker='X', s=100)
            plt.title("Employee Clusters")
            
            # Skill Coverage
            plt.subplot(1, 3, 2)
            skill_coverage = team[req_skills].sum().sort_values()
            sns.barplot(x=skill_coverage.values, y=skill_coverage.index)
            plt.title("Skill Coverage")
            
            # Personality Distribution
            plt.subplot(1, 3, 3)
            team['personality_type'].value_counts().plot.pie(autopct='%1.1f%%')
            plt.title("Personality Mix")
            
            plt.tight_layout()
            plt.show()
            
            # === DISPLAY RESULTS ===
            print(f"\n✅ Optimal Team ({len(team)} members):")
            display(team[['Employee_ID', 'Job_Title', 'personality_type',
                         'skill_proficiency', 'collaboration_score'] + req_skills]
                   .rename(columns={
                       'skill_proficiency': 'Proficiency (1-10)',
                       'collaboration_score': 'Collaboration (1-10)'
                   }).style.format({
                       'Proficiency (1-10)': '{:.1f}',
                       'Collaboration (1-10)': '{:.1f}'
                   }).background_gradient(subset=['Proficiency (1-10)', 'Collaboration (1-10)'], cmap='Blues'))
            
            # Team Statistics
            print("\n📊 Team Stats:")
            print(f"- Avg Proficiency: {team['skill_proficiency'].mean():.1f}/10")
            print(f"- Avg Collaboration: {team['collaboration_score'].mean():.1f}/10")
            print(f"- Personality Diversity: {team['personality_type'].nunique()} types")
            print(f"- Skills Covered: {len([s for s in req_skills if team[s].sum() > 0])}/{len(req_skills)}")
            
            # Missing skills warning
            missing_skills = set(req_skills) - covered_skills
            if missing_skills:
                print(f"\n⚠️ Couldn't find experts in: {', '.join(missing_skills)}")
            
        except Exception as e:
            print(f"❌ Error forming team: {str(e)}")

form_button.on_click(form_team)

In [155]:
# CELL 5: Display the Interface
display(widgets.VBox([
    widgets.HTML("<h2>🚀 AI Team Formation System</h2>"),
    project_name,
    project_type,
    team_size,
    required_skills,
    personality_pref,
    form_button,
    output_area
]))


VBox(children=(HTML(value='<h2>🚀 AI Team Formation System</h2>'), Text(value='', description='Project Name:', …