# 실험 데이터 생성

In [74]:
import pandas as pd
import numpy as np

# 시드 고정
np.random.seed(42)

# 가상 데이터 수
num_samples = 65

# 기술 스택 수 고정
tech_stack_counts = {
    '백엔드': 30,
    '프론트엔드': 15,
    '디자인': 10,
    'PM': 10
}

tech_stacks = (
    ['백엔드'] * tech_stack_counts['백엔드'] +
    ['프론트엔드'] * tech_stack_counts['프론트엔드'] +
    ['디자인'] * tech_stack_counts['디자인'] +
    ['PM'] * tech_stack_counts['PM']
)


data = {
    '회원 ID': range(1, num_samples + 1),
    '기술 스택': tech_stacks,
    '기술 스택 점수': np.random.randint(60, 101, num_samples),
    '기존 프로젝트': [np.random.rand(10) for _ in range(num_samples)], # 임의 벡터
    '개인 성향': [np.random.rand(10) for _ in range(num_samples)], # 임의 벡터
    'KPI 평가 점수': np.random.randint(60, 101, num_samples)
}

user = pd.DataFrame(data)

In [75]:
user

Unnamed: 0,회원 ID,기술 스택,기술 스택 점수,기존 프로젝트,개인 성향,KPI 평가 점수
0,1,백엔드,98,"[0.0884925020519195, 0.1959828624191452, 0.045...","[0.32235384497472297, 0.8098744458546349, 0.25...",93
1,2,백엔드,88,"[0.14092422497476265, 0.8021969807540397, 0.07...","[0.8306194077877291, 0.9650269106665126, 0.124...",95
2,3,백엔드,74,"[0.7712703466859457, 0.07404465173409036, 0.35...","[0.1397723766262895, 0.7952673118598902, 0.201...",65
3,4,백엔드,67,"[0.7296061783380641, 0.6375574713552131, 0.887...","[0.3924451074226354, 0.8165994394715771, 0.439...",98
4,5,백엔드,80,"[0.5227328293819941, 0.42754101835854963, 0.02...","[0.3838912213732114, 0.5435528611139886, 0.906...",71
...,...,...,...,...,...,...
60,61,PM,69,"[0.28377590579872447, 0.3630822963986351, 0.64...","[0.7140869321324278, 0.8017280830697918, 0.339...",60
61,62,PM,95,"[0.24595772838450813, 0.16068137325955567, 0.1...","[0.5264026609361132, 0.7315895217553319, 0.081...",79
62,63,PM,73,"[0.11203890216805235, 0.39785559904574164, 0.9...","[0.18211791568869928, 0.7896985071424791, 0.65...",87
63,64,PM,90,"[0.5716126894698998, 0.2799790936602842, 0.769...","[0.19954245092914513, 0.6802282424312913, 0.07...",96


In [76]:
# 진행할 프로젝트 벡터
reference_vector = np.random.rand(10)
reference_vector

array([0.53351193, 0.78179033, 0.70538559, 0.44636865, 0.86421618,
       0.55814025, 0.53990584, 0.03418777, 0.98669298, 0.12297081])

In [77]:
# 유사도 열 추가
from scipy.spatial.distance import cosine
user['프로젝트 적합도'] = user['기존 프로젝트'].apply(lambda x: 1-cosine(x, reference_vector))

In [78]:
from sklearn.preprocessing import MinMaxScaler

# Min-Max 스케일링
def scale_group(group):
    scaler = MinMaxScaler()
    group[['기술 스택 점수', 'KPI 평가 점수']] = scaler.fit_transform(group[['기술 스택 점수', 'KPI 평가 점수']])
    return group

user = user.groupby('기술 스택').apply(scale_group).reset_index(drop=True)

user.to_csv("user.csv")

  user = user.groupby('기술 스택').apply(scale_group).reset_index(drop=True)


# 최적의 팀 추천

In [79]:
back_end = user[user['기술 스택'] == '백엔드']
front_end = user[user['기술 스택'] == '프론트엔드']
design = user[user['기술 스택'] == '디자인']
pm = user[user['기술 스택'] == 'PM']

# 조합 계산

In [80]:
'''
final_scores = []

# 팀 구성 인원
back_num=2
front_num=1
design_num=1
pm_num=1

# 임의 가중치
weight_stack = 0.25
weight_cosine = 0.25
weight_personality = 0.25
weight_kpi = 0.25

# 팀 조합 생성
for back_team in combinations(back_end.index, back_num):
    for front_team in combinations(front_end.index, front_num):
        for design_team in combinations(design.index, design_num):
            for pm_team in combinations(pm.index, pm_num):
                # 선택된 팀원 데이터
                team_indices = list(back_team) + list(front_team) + list(design_team) + list(pm_team)
                team_data = df.loc[team_indices]

                # 기술 스택 점수, 코사인 유사도, KPI 평가 점수 평균 계산
                avg_stack_score = team_data['기술 스택 점수'].mean()
                avg_cosine_score = team_data['프로젝트 적합도'].mean()
                avg_kpi_score = team_data['KPI 평가 점수'].mean()

                # 개인 성향 코사인 유사도 계산
                personality_vectors = np.array(team_data['개인 성향'].tolist())
                personality_similarity = []
                for i, j in combinations(range(len(personality_vectors)), 2):
                    similarity = 1 - cosine(personality_vectors[i], personality_vectors[j])  # 유사도를 계산
                    personality_similarity.append(similarity)
                avg_personality_similarity = np.mean(personality_similarity)  # 팀원 간 평균 성향 유사도 계산

                # 최종 점수 계산 (스케일된 값 사용)
                final_score = (avg_stack_score * weight_stack +
                               avg_cosine_score * weight_cosine +
                               avg_personality_similarity * weight_personality +
                               avg_kpi_score * weight_kpi)

                # 결과 저장 (팀원 ID, 최종 점수)
                final_scores.append((team_indices, final_score))

# 최적의 팀을 찾기 위해 최종 점수 정렬
final_scores.sort(key=lambda x: x[1], reverse=True)  # 점수 내림차순 정렬

# 가장 높은 점수를 가진 팀을 출력
best_team, best_score = final_scores[0]
print("최적의 팀:", best_team)
print("최종 점수:", best_score)
'''

NameError: name 'df' is not defined

In [81]:
from itertools import combinations

final_scores = []

# 팀 구성 인원
back_num = 2
front_num = 1
design_num = 1
pm_num = 1

# 임의 가중치
weight_stack = 0.25
weight_cosine = 0.25
weight_personality = 0.25
weight_kpi = 0.25

# 팀 조합 생성
for back_team in combinations(back_end.index, back_num):
    for front_team in combinations(front_end.index, front_num):
        for design_team in combinations(design.index, design_num):
            for pm_team in combinations(pm.index, pm_num):
                
                team_indices = list(back_team) + list(front_team) + list(design_team) + list(pm_team)
                team_data = user.loc[team_indices]

                avg_stack_score = team_data['기술 스택 점수'].mean()
                avg_cosine_score = team_data['프로젝트 적합도'].mean()
                avg_kpi_score = team_data['KPI 평가 점수'].mean()

                # 개인 성향 유사도 계산
                personality_vectors = np.array(team_data['개인 성향'].tolist())
                personality_similarity = []
                for i, j in combinations(range(len(personality_vectors)), 2):
                    similarity = 1 - cosine(personality_vectors[i], personality_vectors[j])
                    personality_similarity.append(similarity)
                avg_personality_similarity = np.mean(personality_similarity)

                final_score = (avg_stack_score * weight_stack +
                               avg_cosine_score * weight_cosine +
                               avg_personality_similarity * weight_personality +
                               avg_kpi_score * weight_kpi)

                final_scores.append((team_indices, avg_stack_score, avg_cosine_score, avg_personality_similarity, avg_kpi_score, final_score))


final_scores_df = pd.DataFrame(final_scores, columns=['팀원 인덱스', '기술 스택 점수', '프로젝트 적합도', '평균 성향 유사도', 'KPI 평가 점수', '최종 점수'])

final_scores_df.sort_values(by='최종 점수', ascending=False, inplace=True)


                     팀원 인덱스  기술 스택 점수  프로젝트 적합도  평균 성향 유사도  KPI 평가 점수  \
32058   [20, 42, 55, 15, 8]  0.905592  0.775667   0.789844   0.859919   
15558   [20, 31, 55, 15, 8]  0.921382  0.753456   0.797163   0.844534   
411558  [31, 42, 55, 15, 8]  0.889803  0.806862   0.745980   0.865047   
294558  [27, 42, 55, 15, 8]  0.821382  0.808067   0.768060   0.890688   
32068   [20, 42, 55, 16, 8]  0.926645  0.790189   0.806261   0.759919   
...                     ...       ...       ...        ...        ...   
105410  [22, 36, 54, 11, 0]  0.137838  0.795905   0.684650   0.133333   
532900  [36, 48, 54, 10, 0]  0.137838  0.778691   0.685051   0.149663   
105400  [22, 36, 54, 10, 0]  0.132575  0.778661   0.666973   0.170175   
123470  [22, 48, 54, 17, 0]  0.190469  0.795283   0.682181   0.073009   
123400  [22, 48, 54, 10, 0]  0.206259  0.772091   0.694344   0.062483   

           최종 점수  
32058   0.832756  
15558   0.829134  
411558  0.826923  
294558  0.822049  
32068   0.820753  
...      

In [82]:
final_scores_df

Unnamed: 0,팀원 인덱스,기술 스택 점수,프로젝트 적합도,평균 성향 유사도,KPI 평가 점수,최종 점수
32058,"[20, 42, 55, 15, 8]",0.905592,0.775667,0.789844,0.859919,0.832756
15558,"[20, 31, 55, 15, 8]",0.921382,0.753456,0.797163,0.844534,0.829134
411558,"[31, 42, 55, 15, 8]",0.889803,0.806862,0.745980,0.865047,0.826923
294558,"[27, 42, 55, 15, 8]",0.821382,0.808067,0.768060,0.890688,0.822049
32068,"[20, 42, 55, 16, 8]",0.926645,0.790189,0.806261,0.759919,0.820753
...,...,...,...,...,...,...
105410,"[22, 36, 54, 11, 0]",0.137838,0.795905,0.684650,0.133333,0.437931
532900,"[36, 48, 54, 10, 0]",0.137838,0.778691,0.685051,0.149663,0.437811
105400,"[22, 36, 54, 10, 0]",0.132575,0.778661,0.666973,0.170175,0.437096
123470,"[22, 48, 54, 17, 0]",0.190469,0.795283,0.682181,0.073009,0.435236
