# 실험 데이터 생성

In [22]:
import pandas as pd
import numpy as np

# 시드 고정
np.random.seed(42)

# 가상 데이터 수
num_samples = 40

# 기술 스택 수 고정
tech_stack_counts = {
    '백엔드': 15,
    '프론트엔드': 10,
    '디자인': 5,
    'PM': 5,
    '데이터 분석': 5
}

tech_stacks = (
    ['백엔드'] * tech_stack_counts['백엔드'] +
    ['프론트엔드'] * tech_stack_counts['프론트엔드'] +
    ['디자인'] * tech_stack_counts['디자인'] +
    ['PM'] * tech_stack_counts['PM'] +
    ['데이터 분석'] * tech_stack_counts['데이터 분석']
)

# 스택 리스트 예시
frontend_skills = ['React', 'Vue', 'jQuery']
backend_skills = ['Spring Boot', 'Django', 'Ruby', 'Node.js']

data = {
    '회원 ID': range(1, num_samples + 1),
    '기술 스택': tech_stacks,
    '프론트엔드 세부 스택': [np.random.choice(frontend_skills) if tech == '프론트엔드' else np.nan for tech in tech_stacks],
    '백엔드 세부 스택': [np.random.choice(backend_skills) if tech == '백엔드' else np.nan for tech in tech_stacks],
    '기술 스택 점수': np.random.randint(60, 101, num_samples),
    '기존 프로젝트': [np.random.rand(10) for _ in range(num_samples)],  # 임의 벡터
    '개인 성향': [np.random.rand(10) for _ in range(num_samples)],  # 임의 벡터
    'KPI 평가 점수': np.random.randint(60, 101, num_samples),
    '동료 평가 점수': np.random.randint(60, 101, num_samples),
    'Man/Month': np.round(np.random.rand(num_samples), 1)
}


user = pd.DataFrame(data)


In [23]:
user

Unnamed: 0,회원 ID,기술 스택,프론트엔드 세부 스택,백엔드 세부 스택,기술 스택 점수,기존 프로젝트,개인 성향,KPI 평가 점수,동료 평가 점수,Man/Month
0,1,백엔드,,Ruby,61,"[0.3951502360018144, 0.9266588657937942, 0.727...","[0.24872415443028795, 0.3302525290190985, 0.43...",78,62,0.4
1,2,백엔드,,Ruby,80,"[0.5867511656638482, 0.965255307264138, 0.6070...","[0.681039427096909, 0.23750647129246993, 0.400...",91,92,0.6
2,3,백엔드,,Node.js,92,"[0.014079822715084456, 0.19884240408880516, 0....","[0.32268638827758367, 0.04340078329817276, 0.9...",89,87,0.4
3,4,백엔드,,Spring Boot,71,"[0.09541011649041131, 0.37081825219826636, 0.6...","[0.9958313750293707, 0.6969250558638148, 0.384...",88,67,0.3
4,5,백엔드,,Node.js,81,"[0.7217295211648732, 0.23598491974895575, 0.25...","[0.16967990389556553, 0.6468483616687238, 0.38...",88,93,0.9
5,6,백엔드,,Node.js,84,"[0.5632755719763837, 0.6955160864261275, 0.139...","[0.41120672087218624, 0.6027818820232985, 0.27...",89,94,0.8
6,7,백엔드,,Node.js,86,"[0.6243540481337932, 0.295633685837714, 0.1054...","[0.8766614859257927, 0.55158787254407, 0.16483...",75,91,0.1
7,8,백엔드,,Ruby,87,"[0.906828441545754, 0.2721322493846353, 0.6476...","[0.8291642207065144, 0.5690814691657476, 0.063...",99,83,0.9
8,9,백엔드,,Django,75,"[0.2694123337985215, 0.24412552224777417, 0.16...","[0.9131657522576428, 0.585149532325472, 0.7261...",78,73,0.5
9,10,백엔드,,Spring Boot,74,"[0.712270589924442, 0.1480869299533999, 0.9977...","[0.8782206674791923, 0.7569991321722287, 0.046...",77,91,0.9


In [24]:
# 진행할 프로젝트 벡터 예시
reference_vector = np.random.rand(10)
reference_vector

array([0.91441195, 0.41877052, 0.38313853, 0.51891771, 0.04696597,
       0.16628337, 0.73803362, 0.08279867, 0.60315211, 0.24534911])

In [25]:
# 유사도 열 추가
from scipy.spatial.distance import cosine
user['프로젝트 적합도'] = user['기존 프로젝트'].apply(lambda x: 1-cosine(x, reference_vector))

In [26]:
from sklearn.preprocessing import MinMaxScaler

# Min-Max 스케일링
def scale_group(group):
    scaler = MinMaxScaler()
    group[['기술 스택 점수', 'KPI 평가 점수','동료 평가 점수']] = scaler.fit_transform(group[['기술 스택 점수', 'KPI 평가 점수','동료 평가 점수']])
    return group

user = user.groupby('기술 스택').apply(scale_group).reset_index(drop=True)

user.to_csv("user.csv")

  user = user.groupby('기술 스택').apply(scale_group).reset_index(drop=True)


# 최적의 팀 추천

In [27]:
back_end = user[user['기술 스택'] == '백엔드']
front_end = user[user['기술 스택'] == '프론트엔드']
design = user[user['기술 스택'] == '디자인']
pm = user[user['기술 스택'] == 'PM']
data = user[user['기술 스택'] == '데이터 분석']

In [28]:
user

Unnamed: 0,회원 ID,기술 스택,프론트엔드 세부 스택,백엔드 세부 스택,기술 스택 점수,기존 프로젝트,개인 성향,KPI 평가 점수,동료 평가 점수,Man/Month,프로젝트 적합도
0,31,PM,,,0.894737,"[0.6839637693981411, 0.6158511643899138, 0.943...","[0.7305491368093014, 0.6937176074033384, 0.166...",1.0,0.166667,0.8,0.814518
1,32,PM,,,1.0,"[0.811204176736003, 0.8206394757439553, 0.6259...","[0.7785719390507511, 0.14312798554846962, 0.20...",0.193548,1.0,0.2,0.823884
2,33,PM,,,0.052632,"[0.6182538843378929, 0.3365542148887265, 0.655...","[0.6039578720725125, 0.5022883474625301, 0.539...",0.903226,0.5,0.0,0.793015
3,34,PM,,,0.0,"[0.9366481518557073, 0.039186326675646255, 0.4...","[0.5990293647773265, 0.8267988278090783, 0.959...",0.225806,0.0,0.1,0.859769
4,35,PM,,,0.105263,"[0.25001644921610466, 0.5805440123439098, 0.86...","[0.1007946030158865, 0.2560155318536622, 0.726...",0.0,0.055556,0.9,0.791317
5,36,데이터 분석,,,0.0,"[0.4965611906830777, 0.38961808621920346, 0.29...","[0.374435369720422, 0.06892153936721879, 0.077...",0.0,0.5625,0.9,0.787715
6,37,데이터 분석,,,0.833333,"[0.48252094964264247, 0.49302565777180496, 0.0...","[0.8374907890024921, 0.33214641265493894, 0.31...",0.617647,0.21875,0.6,0.614438
7,38,데이터 분석,,,0.466667,"[0.9309291056872926, 0.9742482085344102, 0.995...","[0.11426730005568775, 0.4145045842036277, 0.86...",0.588235,0.46875,0.6,0.772836
8,39,데이터 분석,,,0.733333,"[0.8668687053108414, 0.8384807637639851, 0.426...","[0.2482264427571519, 0.5776319540915472, 0.165...",0.147059,0.0,0.7,0.71243
9,40,데이터 분석,,,1.0,"[0.8631385219890039, 0.8803599686384168, 0.236...","[0.9755413156827563, 0.45898898801701304, 0.55...",1.0,1.0,0.2,0.861524


In [29]:
back_end.index

Index([15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29], dtype='int64')

# 조합 계산

In [15]:
from itertools import combinations

# 최종 점수를 저장할 리스트
final_scores = []

# 팀 구성 인원
back_num = 2
front_num = 1
design_num = 1
pm_num = 1
data_num = 1

# 임의 가중치
weight_stack = 0.2
weight_cosine = 0.2
weight_personality = 0.2
weight_kpi = 0.2
weight_peer = 0.2  # 동료 평가 점수 가중치 추가

# 팀 조합 생성
for back_team in combinations(back_end.index, back_num):
    for front_team in combinations(front_end.index, front_num):
        for design_team in combinations(design.index, design_num):
            for pm_team in combinations(pm.index, pm_num):
                for data_team in combinations(data.index, data_num):
                
                    team_indices = list(back_team) + list(front_team) + list(design_team) + list(pm_team) + list(data_team)
                    team_data = user.loc[team_indices]

                    avg_stack_score = team_data['기술 스택 점수'].mean()
                    avg_cosine_score = team_data['프로젝트 적합도'].mean()
                    avg_kpi_score = team_data['KPI 평가 점수'].mean()
                    avg_peer_score = team_data['동료 평가 점수'].mean()

                    # 개인 성향 유사도 계산
                    personality_vectors = np.array(team_data['개인 성향'].tolist())
                    personality_similarity = []
                    for i, j in combinations(range(len(personality_vectors)), 2):
                        similarity = 1 - cosine(personality_vectors[i], personality_vectors[j])
                        personality_similarity.append(similarity)
                    avg_personality_similarity = np.mean(personality_similarity)

                    # 최종 점수 계산
                    final_score = (avg_stack_score * weight_stack +
                                   avg_cosine_score * weight_cosine +
                                   avg_personality_similarity * weight_personality +
                                   avg_kpi_score * weight_kpi +
                                   avg_peer_score * weight_peer)

                    final_scores.append((team_indices, avg_stack_score, avg_cosine_score, 
                                         avg_personality_similarity, avg_kpi_score, avg_peer_score, final_score))


# DataFrame으로 변환
final_scores_df = pd.DataFrame(final_scores, columns=['팀원 인덱스', '기술 스택 점수', 
                                                       '프로젝트 적합도', '평균 성향 유사도', 
                                                       'KPI 평가 점수', '동료 평가 점수', '최종 점수'])

# 최종 점수로 정렬
final_scores_df.sort_values(by='최종 점수', ascending=False, inplace=True)


In [16]:
final_scores_df

Unnamed: 0,팀원 인덱스,기술 스택 점수,프로젝트 적합도,평균 성향 유사도,KPI 평가 점수,최종 점수
81680,"[20, 26, 33, 12, 1, 5]",0.940440,0.844464,0.858953,0.770635,0.853623
81655,"[20, 26, 33, 11, 1, 5]",0.927620,0.855977,0.827879,0.763690,0.843791
81630,"[20, 26, 33, 10, 1, 5]",0.901979,0.845381,0.849108,0.763690,0.840039
82430,"[20, 26, 39, 12, 1, 5]",0.903403,0.843477,0.851057,0.762088,0.840006
82180,"[20, 26, 37, 12, 1, 5]",0.954329,0.839553,0.848414,0.715079,0.839344
...,...,...,...,...,...,...
53974,"[18, 23, 31, 13, 4, 9]",0.148410,0.824005,0.707039,0.149186,0.457160
111464,"[23, 29, 31, 13, 2, 9]",0.116083,0.825275,0.745276,0.135165,0.455450
53964,"[18, 23, 31, 13, 2, 9]",0.138606,0.819640,0.717780,0.139927,0.453988
105224,"[23, 24, 31, 13, 4, 9]",0.161923,0.814671,0.716242,0.092043,0.446220
