In [37]:
import mysql.connector
import pandas as pd
import numpy as np
import itertools
from sentence_transformers import SentenceTransformer
import torch

# MySQL 서버 연결
conn = mysql.connector.connect(
    host='127.0.0.1',  # 호스트 이름
    user='root',       # MySQL 사용자 이름
    password='hj010701',   # MySQL 사용자 비밀번호
    database='employee'  # 연결할 데이터베이스 이름
)

# 커서 생성 및 데이터 가져오기
cursor = conn.cursor()

sql_query = "SELECT * FROM member_assign_50to100"
cursor.execute(sql_query)
result = cursor.fetchall()
column_names = [i[0] for i in cursor.description]
member_based_okr_assignments = pd.DataFrame(result, columns=column_names)

sql_query = "SELECT * FROM okr_30to60"
cursor.execute(sql_query)
result = cursor.fetchall()
column_names = [i[0] for i in cursor.description]
okr_df = pd.DataFrame(result, columns=column_names)

sql_query = '''
SELECT *
FROM member_assign_50to100
JOIN okr_30to60 
ON okr_30to60.OKR_NUM IN (member_assign_50to100.project1, member_assign_50to100.project2, member_assign_50to100.project3);
'''
cursor.execute(sql_query)
result = cursor.fetchall()
column_names = [i[0] for i in cursor.description]
member_okr = pd.DataFrame(result, columns=column_names)

# 데이터 정렬
df = member_okr
df_sorted = df.sort_values(by='Member', ascending=True)

# 데이터 변환 및 처리
flattened_data = []
num_columns = df_sorted.shape[1]

for i in range(50):
    if 0 <= i < 10 or 50 <= i < 60:
        selected_columns = [12] + [col for col in range(14, 20) if col < num_columns]
    elif 10 <= i < 20 or 60 <= i < 70:
        selected_columns = [12] + [col for col in range(20, 26) if col < num_columns]
    elif 20 <= i < 30 or 70 <= i < 80:
        selected_columns = [12] + [col for col in range(26, 32) if col < num_columns]
    elif 30 <= i < 40 or 80 <= i < 90:
        selected_columns = [12] + [col for col in range(32, 38) if col < num_columns]
    elif 40 <= i < 50 or 90 <= i < 100:
        selected_columns = [12] + [col for col in range(38, 44) if col < num_columns]

    first_row_data = df_sorted.iloc[3 * i:3 * (i + 1), selected_columns[0]].T.tolist()
    other_data = df_sorted.iloc[3 * i:3 * (i + 1), selected_columns[1:]].values.flatten().tolist()
    combined_data = [i + 1, np.nan] + first_row_data + other_data
    flattened_data.append(combined_data)

column_names = [
    'member', 'N_OKR', 'pr1_score', 'pr2_score', 'pr3_score', 
    'pr1_1', 'pr1_2', 'pr1_3', 'pr1_4', 'pr1_5', 'pr1_6', 
    'pr2_1', 'pr2_2', 'pr2_3', 'pr2_4', 'pr2_5', 'pr2_6', 
    'pr3_1', 'pr3_2', 'pr3_3', 'pr3_4', 'pr3_5', 'pr3_6'
]
max_length = max(len(row) for row in flattened_data)
adjusted_column_names = column_names[:max_length]
data = pd.DataFrame(flattened_data, columns=adjusted_column_names).iloc[:, 1:]

# OKR 데이터 처리
objectives = okr_df['Objective']
posted_OKR = okr_df['OKR_NUM'].str.replace("OKR_", "").astype(float)
score = okr_df['Objective Score']

# Sentence Transformer 모델 로드
model = SentenceTransformer('paraphrase-MiniLM-L6-v2')

def get_similarities(n_okr, objectives):
    n_okr_embedding = model.encode(n_okr, convert_to_tensor=True)
    similarities = []
    for objective in objectives:
        obj_embedding = model.encode(objective, convert_to_tensor=True)
        similarity = torch.nn.functional.cosine_similarity(n_okr_embedding, obj_embedding, dim=-1).item()
        similarities.append(similarity)
    return similarities

def calculate_weighted_scores(n_okr):
    df = member_okr
    weighted_sums = []
    grouped_df = df.groupby('Member')
    for member, group in grouped_df:
        objectives = group['Objective'].tolist()
        objective_scores = group['Objective Score'].tolist()
        similarities = get_similarities(n_okr, objectives)
        total_weighted_score = 0
        valid_count = 0
        for similarity, objective_score, objective in zip(similarities, objective_scores, objectives):
            if objective != n_okr:
                weighted_score = similarity * objective_score
                total_weighted_score += weighted_score
                valid_count += 1
        weighted_sums.append((member, total_weighted_score / valid_count if valid_count > 0 else 0))
    return weighted_sums

def generate_combinations_3d(data, num_parts=5):
    data_values = data.values
    part_size = len(data_values) // num_parts
    parts = [data_values[i * part_size:(i + 1) * part_size] for i in range(num_parts)]
    combinations = list(itertools.product(*parts))
    return np.array(combinations)

# 사용자 입력값 처리
n_okr_input = "internal team communications Tool Improvement Projects have been aimed at improving the efficiency and accuracy of communication."  # 사용자 정의 OKR
posted_input = 61.0  # OKR 번호
label_input = np.nan  # 레이블

weighted_sums = calculate_weighted_scores(n_okr_input)[:50]
weighted_values = [value[1] for value in weighted_sums]
weighted_array = np.array(weighted_values)

if data.shape[0] == len(weighted_values):
    data.iloc[:, 0] = weighted_array
    data["member"] = data.index.astype(int)
    data["posted"] = posted_input
    data["label"] = label_input
else:
    print(f"샘플 수가 일치하지 않습니다. data 행 수: {data.shape[0]}, weighted_values 길이: {len(weighted_values)}")

data_3d = generate_combinations_3d(data.iloc[:, :], num_parts=5)
final_data_f = np.concatenate((data_3d[:, :, 0:1], data_3d[:, :, 4:]), axis=2)

final_data_f.shape


(100000, 5, 22)

In [38]:
import numpy as np
import torch
from algorithm import TeamTransformer  # TeamTransformer 클래스 불러오기
from dataloader import create_test_loader

# 모델 파라미터 설정 (학습 시와 동일해야 함)
embedding_dim = 19
seq_len = 5
output_dim = 1
n_heads = 1
n_layers = 3
hidden_dim = 64
dropout_rate = 0.2

# 모델 초기화
model = TeamTransformer(
    embedding_dim=embedding_dim,
    n_heads=n_heads,
    hidden_dim=hidden_dim,
    n_layers=n_layers,
    output_dim=output_dim,
    dropout_rate=dropout_rate,
)

# 저장된 가중치 로드
state_dict = torch.load('best_model_weights.pth', map_location=torch.device('cpu'))
model.load_state_dict(state_dict)

transformer_out_list = []
predictions_list =[]
# 평가 모드로 전환
model.eval()

# 테스트 데이터를 위한 DataLoader 생성
batch_size = 512
test_loader = create_test_loader(final_data_f, batch_size)

# 예측 수행
for batch_inputs_total, _ in test_loader:  # 라벨은 사용하지 않음
#     `val_inputs`와 `val_inputs_num` 생성 (학습 코드의 구조와 동일)
   

    # 예측 수행
    with torch.no_grad():
        val_inputs_total, val_targets = next(iter(test_loader))
        val_inputs = batch_inputs_total[:, :, :-2]  # 마지막 열 제외
        val_inputs_num = batch_inputs_total[:, :, -2:].int()  # 마지막 열만 추출
        predictions, transformer_out = model(val_inputs)  # 모델 예측 수행
        transformer_out_list.append(transformer_out.detach().cpu().numpy())
    
        # `final_data_f`의 마지막 열(라벨 열)을 예측 값으로 대체
        predictions_list.append(predictions)
    
    
# transformer_out_list(attention value matrix)의 마지막 배치 가져오기
transformer_out_last = transformer_out_list[-1]

if isinstance(transformer_out_last, np.ndarray):
    transformer_out_last = torch.from_numpy(transformer_out_last)


expand_predict=np.repeat(predictions_list[-1], repeats=5, axis=1).reshape(-1,5,1)

# transformer_out_last에 붙이기
output=torch.cat((transformer_out_last, val_inputs_num), dim=-1) 


result=torch.cat((output, expand_predict), dim=-1) 
result

  state_dict = torch.load('best_model_weights.pth', map_location=torch.device('cpu'))


tensor([[[ 1.7033e+00,  5.3214e+00, -3.5207e+00,  ...,  9.0000e+00,
           6.1000e+01,  6.9362e+01],
         [-2.0743e-01,  2.6483e+00, -3.8416e+00,  ...,  1.9000e+01,
           6.1000e+01,  6.9362e+01],
         [-2.6422e+00,  1.3725e+00, -3.5559e+00,  ...,  2.8000e+01,
           6.1000e+01,  6.9362e+01],
         [-8.0363e-01,  3.6814e+00, -8.2284e-01,  ...,  3.4000e+01,
           6.1000e+01,  6.9362e+01],
         [ 2.1821e-02,  2.1814e+00, -4.3360e+00,  ...,  4.0000e+01,
           6.1000e+01,  6.9362e+01]],

        [[ 1.7033e+00,  5.3214e+00, -3.5207e+00,  ...,  9.0000e+00,
           6.1000e+01,  6.9500e+01],
         [-2.0743e-01,  2.6483e+00, -3.8416e+00,  ...,  1.9000e+01,
           6.1000e+01,  6.9500e+01],
         [-2.6422e+00,  1.3725e+00, -3.5559e+00,  ...,  2.8000e+01,
           6.1000e+01,  6.9500e+01],
         [-8.0363e-01,  3.6814e+00, -8.2284e-01,  ...,  3.4000e+01,
           6.1000e+01,  6.9500e+01],
         [ 3.0186e+00,  6.6033e-01, -3.2347e+00,  ...

In [41]:
result[:,:,-1].mean(axis=1).sort()

torch.return_types.sort(
values=tensor([59.9289, 60.6686, 61.0417, 61.1568, 61.4216, 61.7422, 61.7814, 62.2696,
        62.3797, 62.5344, 62.7035, 62.7887, 62.8551, 63.1496, 63.1560, 63.2308,
        63.3817, 63.4926, 63.8163, 63.8956, 63.9015, 64.0165, 64.2625, 64.2814,
        64.3437, 64.3838, 64.4945, 64.6487, 64.9693, 65.0097, 65.0913, 65.1294,
        65.1479, 65.2286, 65.2395, 65.3331, 65.3834, 65.3942, 65.6068, 65.7494,
        65.8309, 65.8876, 65.9305, 65.9683, 66.0157, 66.0728, 66.0906, 66.1231,
        66.2375, 66.2414, 66.3191, 66.3523, 66.3549, 66.3758, 66.3767, 66.4564,
        66.4579, 66.5024, 66.5610, 66.5840, 66.6087, 66.6112, 66.6407, 66.7213,
        66.8230, 66.8258, 66.8761, 66.9046, 66.9613, 67.0419, 67.0946, 67.1464,
        67.1967, 67.2035, 67.2436, 67.3543, 67.4605, 67.5085, 67.5421, 67.5828,
        67.5988, 67.6794, 67.7839, 67.7843, 67.8342, 67.8476, 67.8658, 67.8695,
        67.9225, 67.9510, 68.0032, 68.0077, 68.0884, 68.1077, 68.1580, 68.1682,
        

In [32]:
transformer_out_list

[array([[[-4.61942  ,  2.0066621, -2.2555742, ...,  2.2759867,
          -3.2986228, -3.3162155],
         [-3.6630504,  5.9614162, -2.329934 , ...,  1.1795375,
          -2.45038  , -2.614767 ],
         [-3.5863874,  4.2331123, -2.0247777, ...,  1.9624201,
          -3.970718 , -3.3078952],
         [-1.8103979,  6.242693 , -2.4902532, ...,  0.8695152,
          -1.8642982, -1.4703243],
         [-3.4960458,  4.2379827, -2.3841267, ...,  2.1828809,
          -2.6673858, -2.1641216]],
 
        [[-4.61942  ,  2.0066621, -2.2555742, ...,  2.2759867,
          -3.2986228, -3.3162155],
         [-3.6630504,  5.9614162, -2.329934 , ...,  1.1795375,
          -2.45038  , -2.614767 ],
         [-3.5863874,  4.2331123, -2.0247777, ...,  1.9624201,
          -3.970718 , -3.3078952],
         [-1.8103979,  6.242693 , -2.4902532, ...,  0.8695152,
          -1.8642982, -1.4703243],
         [-2.9568756,  3.8971765, -2.6201565, ...,  2.1591845,
          -2.7125907, -1.7826046]],
 
        [[-4.6

In [33]:
arrays_only = [item for item in predictions_list if isinstance(item, np.ndarray)]

# 결과 확인
# print(f"추출된 배열 개수: {len(arrays_only)}")
arrays_only

[array([[61.735977],
        [61.880894],
        [60.956524],
        [60.880585],
        [61.553844],
        [61.758446],
        [61.625885],
        [61.16127 ],
        [61.797943],
        [61.987427],
        [62.270073],
        [62.414986],
        [61.49062 ],
        [61.41468 ],
        [62.08794 ],
        [62.29254 ],
        [62.15998 ],
        [61.69536 ],
        [62.332035],
        [62.521526],
        [63.267136],
        [63.41205 ],
        [62.487686],
        [62.411743],
        [63.085007],
        [63.289608],
        [63.157043],
        [62.692425],
        [63.3291  ],
        [63.518585],
        [62.86074 ],
        [63.00566 ],
        [62.08129 ],
        [62.005356],
        [62.67861 ],
        [62.883213],
        [62.75065 ],
        [62.286034],
        [62.922707],
        [63.11219 ],
        [62.998272],
        [63.14319 ],
        [62.21882 ],
        [62.14288 ],
        [62.81614 ],
        [63.020737],
        [62.888184],
        [62.4