In [1]:
import pandas as pd
import numpy as np
import json
import os
from tqdm import tqdm
import ast
import random
from concurrent.futures import ProcessPoolExecutor, as_completed
import multiprocessing

In [2]:
from google.colab import auth
from google.cloud import storage

# 인증
auth.authenticate_user()

# GCP 프로젝트 설정
project_id = 'poised-epigram-454811-q4'  # GCP 프로젝트 ID
bucket_name = 'capteam4stone_real'  # GCP 버킷 이름
file_names = ['rules_f_v12(str).csv','rules_c_v11(str).csv','output_utf8_v11(val).csv','style_dmlp.pth','mlb.pkl']  # 다운로드할 파일 이름

# 버킷에서 데이터 다운로드
client = storage.Client(project=project_id)
bucket = client.get_bucket(bucket_name)

for file_name in file_names:
  blob = bucket.blob(file_name)
  # 로컬로 다운로드
  blob.download_to_filename(file_name)
  print(f"Downloaded {file_name} from bucket {bucket_name}.")

Downloaded rules_f_v12(str).csv from bucket capteam4stone_real.
Downloaded rules_c_v11(str).csv from bucket capteam4stone_real.
Downloaded output_utf8_v11(val).csv from bucket capteam4stone_real.
Downloaded style_dmlp.pth from bucket capteam4stone_real.
Downloaded mlb.pkl from bucket capteam4stone_real.


In [3]:
validation=pd.read_csv('output_utf8_v11(val).csv',encoding='utf-8')
validation['ITEMS'] = validation['ITEMS'].apply(lambda x: ast.literal_eval(x))
idxs=[]#한개만 있는것 제거
for idx,row in tqdm(validation.iterrows()):
  tmp_list=[]
  for i in validation.iloc[idx]['ITEMS']:
    tmp_list.append(i.split('_')[0])
  tmp_set=set(tmp_list)
  if len(tmp_set)>1:
    idxs.append(idx)
validation=validation.iloc[idxs]
validation.reset_index(drop=True)
def categorize_items(item_list):
    categories = {'상의': [], '하의': [], '아우터': [], '원피스': []}
    for item in item_list:
        for key in categories:
            if item.startswith(key + '_'):
                categories[key].append(item)
    return categories

validation['ITEMS'] = validation['ITEMS'].apply(categorize_items)
def sample_tpo_items(df, sample_count):
    """
    TPO 기반 패션 데이터셋에서 랜덤으로 sample_count만큼 추출합니다.

    Args:
        df (pd.DataFrame): TPO 데이터프레임 (ID, TPO, ITEMS 컬럼 포함)
        sample_count (int): 추출할 행 개수

    Returns:
        pd.DataFrame: 무작위로 추출된 부분 데이터프레임
    """
    sample_count = min(sample_count, len(df))
    return df.sample(n=sample_count, random_state=None).reset_index(drop=True)
validation=sample_tpo_items(validation,200)
user_clothes={'아우터':[],'상의':[],'원피스':[],'하의':[]}
cat=['아우터','상의','원피스','하의']
for idx in range(len(validation['ITEMS'])):
  items=validation['ITEMS'].iloc[idx]
  for i in cat:
    input_tmp=[]
    if len(items[i])>0:
      input_tmp.append(validation['ID'].iloc[idx])
      for ii in items[i]:
        input_tmp.append(ii)
    if len(input_tmp)>0:
      user_clothes[i].append(input_tmp)

120523it [00:09, 12426.75it/s]


In [4]:
color_rule=pd.read_csv('rules_c_v11(str).csv',encoding='utf-8')
feature_rule=pd.read_csv('rules_f_v12(str).csv',encoding='utf-8')

In [5]:
color_rule['log_lift'] = np.log(color_rule['lift'])
feature_rule['log_lift'] = np.log(feature_rule['lift'])
from sklearn.preprocessing import RobustScaler

metrics = ['confidence', 'log_lift', 'leverage']
scaler = RobustScaler()  # 이상치에 덜 민감
color_rule[metrics] = scaler.fit_transform(color_rule[metrics])
feature_rule[metrics] = scaler.fit_transform(feature_rule[metrics])

color_rule['score'] = (
    0.3 * color_rule['confidence'] +     # 신뢰도
    0.6 * color_rule['log_lift'] + # lift 로그화 (양수/음수 포함)
    0.1 * color_rule['leverage']          # 양수/음수 다 포함됨
)
feature_rule['score'] = (
    0.3 * feature_rule['confidence'] +     # 신뢰도
    0.6 * feature_rule['log_lift'] +       # lift 로그화 (양수/음수 포함)
    0.1 * feature_rule['leverage']      # 양수/음수 다 포함됨
)
color_rule = color_rule[['antecedents', 'consequents', 'score']]
feature_rule = feature_rule[['antecedents', 'consequents', 'score']]

In [6]:
import pandas as pd
from sklearn.preprocessing import StandardScaler
from collections import defaultdict

df=feature_rule

# 기준 리스트
criteria_list = ['기장', '카테고리', '소재', '핏', '소매기장', '넥라인', '옷깃']

# 가운데 속성 추출 함수
def extract_kinds(row):
    kinds = set()
    for field in ['antecedents', 'consequents']:
        items = row[field]
        if isinstance(items, str):
            items = [item.strip() for item in items.split(',')]
        for item in items:
            parts = item.split('_')
            if len(parts) == 3:
                _, kind, _ = parts
                kinds.add(kind)
    return kinds

# 기준별로 행을 모을 딕셔너리
criterion_rows = defaultdict(list)

# 각 행에서 기준별로 분류
for idx, row in df.iterrows():
    kinds = extract_kinds(row)
    for kind in kinds:
        if kind in criteria_list:
            criterion_rows[kind].append(idx)  # 행 인덱스를 저장

# 결과 누적 리스트
normalized_rows = []

# 기준별 정규화 수행
for kind, indices in criterion_rows.items():
    subset = df.loc[indices].copy()
    scaler = StandardScaler()
    subset['score'] = scaler.fit_transform(subset[['score']])
    subset['기준'] = kind
    normalized_rows.append(subset)

# 최종 합치기
normalized_df = pd.concat(normalized_rows, ignore_index=True)

# 결과 확인

feature_rule=normalized_df[['antecedents', 'consequents', 'score']]

In [7]:
import pandas as pd
from collections import defaultdict

# 점수화 기준 리스트
criteria_list = ['기장', '카테고리', '소재', '핏', '소매기장', '넥라인', '옷깃']

# 각 행에서 'antecedents'와 'consequents'를 합쳐 가운데 속성(kind) 추출
def extract_kinds(row):
    kinds = set()
    for field in ['antecedents', 'consequents']:
        items = row[field]
        if isinstance(items, str):
            items = [item.strip() for item in items.split(',')]  # 문자열이라면 리스트로 분리
        for item in items:
            parts = item.split('_')
            if len(parts) == 3:
                _, kind, _ = parts
                kinds.add(kind)
    return kinds

# 기준별 score 누적용 딕셔너리
score_dict = defaultdict(list)

# 각 행 처리
for _, row in df.iterrows():
    kinds = extract_kinds(row)
    for kind in kinds:
        if kind in criteria_list:
            score_dict[kind].append(row['score'])

# 평균 계산
mean_scores = {
    kind: sum(scores) / len(scores) if scores else float('nan')
    for kind, scores in score_dict.items()
}

# 결과를 DataFrame으로 정리
mean_scores_df = pd.DataFrame(list(mean_scores.items()), columns=['기준', 'score 평균']).sort_values(by='score 평균', ascending=False)

# 출력
print(mean_scores_df)


     기준  score 평균
5    옷깃  5.928946
2    소재  0.306526
3  카테고리  0.014893
0    기장  0.008949
1  소매기장 -0.032025
4     핏 -0.177341


In [8]:
from sklearn.preprocessing import StandardScaler

# color_rule 데이터프레임이 있다고 가정
# 예: color_df = pd.read_csv("color_rule.csv")

# 복사본 생성 (안전하게)
color_df_normalized = color_rule.copy()

# 정규화 수행
scaler = StandardScaler()
color_df_normalized['score'] = scaler.fit_transform(color_rule[['score']])

color_rule=color_df_normalized

In [9]:
#Dict로 rules 데이터 프레임을 변환
def rules_to_dict_sorted_key(rules_df, is_color=False):
    rules = {}
    for _, row in rules_df.iterrows():
        a = str(row['antecedents']).strip()
        b = str(row['consequents']).strip()
        if is_color:
            a = a.split('_')[-1]
            b = b.split('_')[-1]
        key = tuple(sorted([a, b]))
        rules[key] = float(row['score'])
    return rules
color_dict = rules_to_dict_sorted_key(color_rule, is_color=True)
feature_dict = rules_to_dict_sorted_key(feature_rule, is_color=False)

In [12]:
def score_recommendation_dict_sorted_key(item, cat, color_dict, feature_dict, user_clothes, color_rate=1):
    current_id = item[0]
    current_features = set(item[2:])

    if not current_features:
        return {'input_ID': current_id, 'input_cat': cat, 'recommendation': {}}

    compare_categories = ['상의', '하의', '아우터', '원피스']
    if cat in compare_categories:
        compare_categories.remove(cat)

    recommend_dict = {}

    for category in compare_categories:
        item_scores = {}

        for user_item in user_clothes.get(category, []):
            item_raw_id = user_item[0]
            features = user_item[2:]
            item_id = f"{category}_{item_raw_id}"

            score = 0
            for input_feature in current_features:
                input_type = input_feature.split('_')[1]

                for user_feature in features:
                    user_type = user_feature.split('_')[1]

                    if input_type != user_type:
                        continue

                    if input_type == '색상':
                        color1 = input_feature.split('_')[-1]
                        color2 = user_feature.split('_')[-1]
                        key=[color1,color2]
                        key=tuple(sorted(key))
                        raw_score = color_dict.get(key, 0)
                        score += raw_score * color_rate
                    else:
                        key=[input_feature,user_feature]
                        key=tuple(sorted(key))
                        raw_score = feature_dict.get(key, 0)
                        score += raw_score

            item_scores[item_id] = score

        recommend_dict[category] = dict(sorted(item_scores.items(), key=lambda x: x[1], reverse=True))

    return {
        'input_ID': current_id,
        'input_cat': cat,
        'recommendation': recommend_dict
    }

In [13]:
from concurrent.futures import ProcessPoolExecutor, as_completed
from tqdm import tqdm
import multiprocessing

def recommend_clothes_parallel_sorted(validation_input_dict, color_dict, feature_dict, user_clothes, color_rate=1):
    args = []

    for cat, items in validation_input_dict.items():
        for item in items:
            args.append((item, cat, color_dict, feature_dict, user_clothes, color_rate))

    results = []

    with ProcessPoolExecutor(max_workers=multiprocessing.cpu_count()) as executor:
        futures = [executor.submit(score_recommendation_dict_sorted_key, *arg) for arg in args]
        for future in tqdm(as_completed(futures), total=len(futures), desc="Processing recommendations"):
            results.append(future.result())

    return pd.DataFrame(results)


In [15]:
import pickle
with open('mlb.pkl', 'rb') as f:
    mlb = pickle.load(f)

In [16]:
import pandas as pd
from sklearn.preprocessing import MultiLabelBinarizer
import torch
from torch import nn
from torch.utils.data import Dataset, DataLoader
label_to_idx={'스트리트': 0,
 '페미닌': 1,
 '모던': 2,
 '클래식': 3,
 '로맨틱': 4,
 '아방가르드': 5,
 '리조트': 6,
 '소피스트케이티드': 7,
 '웨스턴': 8,
 '키치': 9,
 '톰보이': 10,
 '매니시': 11,
 '레트로': 12,
 '컨트리': 13,
 '힙합': 14,
 '스포티': 15,
 '젠더리스': 16,
 '프레피': 17,
 '밀리터리': 18,
 '히피': 19,
 '섹시': 20,
 '펑크': 21,
 '오리엔탈': 22}

class DMLP(nn.Module):
  def __init__(self, input_size, num_classes):
      super(DMLP, self).__init__()
      self.net = nn.Sequential(
          nn.Linear(input_size, 512),
          nn.BatchNorm1d(512),
          nn.ReLU(),
          nn.Dropout(0.3),
          nn.Linear(512, 256),
          nn.BatchNorm1d(256),
          nn.ReLU(),
          nn.Dropout(0.3),
          nn.Linear(256, 128),
          nn.ReLU(),
          nn.Linear(128, num_classes)
      )

  def forward(self, x):
      return self.net(x)

In [17]:
import torch
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model=torch.load('/content/style_dmlp.pth',weights_only=False,map_location=device)
def predict_top3_tpo(items, model, mlb, label_to_idx, device='cpu'):
    # 인코딩 및 텐서 변환
    sample_X = mlb.transform([items])
    sample_X = torch.tensor(sample_X, dtype=torch.float32).to(device)

    # 모델 추론
    model.eval()
    with torch.no_grad():
        output = model(sample_X)
        probs = torch.softmax(output, dim=1).cpu().numpy()[0]

    # 상위 3개 인덱스
    top3_indices = probs.argsort()[-3:][::-1]

    # 인덱스를 라벨로 매핑
    idx_to_label = {idx: label for label, idx in label_to_idx.items()}
    top3_labels = {idx_to_label[i]: round(probs[i], 4) for i in top3_indices}

    return top3_labels
for category, clothes_list in user_clothes.items():
    for i, item in enumerate(clothes_list):
        item_features = item[1:]  # 2번째 요소 이후 특징만 추출
        prediction = predict_top3_tpo(
            items=item_features,
            model=model,
            mlb=mlb,
            label_to_idx=label_to_idx,
            device=device
        )
        # 두 번째 위치에 예측 결과 딕셔너리 삽입
        user_clothes[category][i] = [item[0], prediction] + item[1:]

In [18]:
user_clothes

{'아우터': [[np.int64(1062492),
   {'스트리트': np.float32(0.7875),
    '컨트리': np.float32(0.0493),
    '로맨틱': np.float32(0.0307)},
   '아우터_기장_노말',
   '아우터_색상_핑크',
   '아우터_카테고리_재킷',
   '아우터_소매기장_긴팔',
   '아우터_넥라인_브이넥',
   '아우터_핏_노멀'],
  [np.int64(874966),
   {'스트리트': np.float32(0.6117),
    '리조트': np.float32(0.0697),
    '로맨틱': np.float32(0.0648)},
   '아우터_기장_하프',
   '아우터_색상_블랙',
   '아우터_카테고리_재킷',
   '아우터_소매기장_긴팔',
   '아우터_핏_노멀'],
  [np.int64(982022),
   {'스트리트': np.float32(0.8534),
    '클래식': np.float32(0.0381),
    '소피스트케이티드': np.float32(0.0352)},
   '아우터_기장_하프',
   '아우터_색상_네온',
   '아우터_카테고리_코트',
   '아우터_소매기장_긴팔',
   '아우터_넥라인_후드',
   '아우터_핏_노멀'],
  [np.int64(371876),
   {'스트리트': np.float32(0.761),
    '로맨틱': np.float32(0.0496),
    '리조트': np.float32(0.0348)},
   '아우터_기장_롱',
   '아우터_색상_블랙',
   '아우터_카테고리_코트',
   '아우터_소매기장_긴팔',
   '아우터_핏_노멀'],
  [np.int64(183539),
   {'스트리트': np.float32(0.6502),
    '리조트': np.float32(0.0619),
    '클래식': np.float32(0.0569)},
   '아우터_기장_하프',
   '아우터_색상_그레이',
   '아

In [19]:
tpo_score_table = {'데일리': {'스트리트': np.float32(0.52858794),
  '클래식': np.float32(0.29832992),
  '로맨틱': np.float32(0.530706),
  '펑크': np.float32(0.25430354),
  '페미닌': np.float32(0.4762796),
  '스포티': np.float32(0.46903458),
  '힙합': np.float32(0.32017642),
  '모던': np.float32(0.61891097),
  '프레피': np.float32(0.59357345),
  '톰보이': np.float32(0.4324854),
  '레트로': np.float32(0.56764644),
  '키치': np.float32(0.47647268),
  '아방가르드': np.float32(0.22979008),
  '오리엔탈': np.float32(0.32786918),
  '웨스턴': np.float32(0.23968673),
  '히피': np.float32(0.44711334),
  '젠더리스': np.float32(0.35417756),
  '밀리터리': np.float32(0.17381348),
  '매니시': np.float32(0.5283391),
  '컨트리': np.float32(0.3614927),
  '리조트': np.float32(0.32551143),
  '소피스트케이티드': np.float32(0.5927358),
  '섹시': np.float32(0.5265901)},
 '직장': {'스트리트': np.float32(0.2307991),
  '클래식': np.float32(0.277169),
  '로맨틱': np.float32(0.373245),
  '펑크': np.float32(0.110792525),
  '페미닌': np.float32(0.34261927),
  '스포티': np.float32(0.19779),
  '힙합': np.float32(0.15050335),
  '모던': np.float32(0.47148114),
  '프레피': np.float32(0.30647242),
  '톰보이': np.float32(0.19858037),
  '레트로': np.float32(0.32886535),
  '키치': np.float32(0.3238792),
  '아방가르드': np.float32(0.1940976),
  '오리엔탈': np.float32(0.28934458),
  '웨스턴': np.float32(0.15596609),
  '히피': np.float32(0.2015844),
  '젠더리스': np.float32(0.28316513),
  '밀리터리': np.float32(0.07486073),
  '매니시': np.float32(0.4094499),
  '컨트리': np.float32(0.22943243),
  '리조트': np.float32(0.16133589),
  '소피스트케이티드': np.float32(0.47308594),
  '섹시': np.float32(0.31044325)},
 '데이트': {'스트리트': np.float32(0.35422868),
  '클래식': np.float32(0.24003245),
  '로맨틱': np.float32(0.69649774),
  '펑크': np.float32(0.21289736),
  '페미닌': np.float32(0.5060767),
  '스포티': np.float32(0.35738122),
  '힙합': np.float32(0.26824525),
  '모던': np.float32(0.4868198),
  '프레피': np.float32(0.53625184),
  '톰보이': np.float32(0.38813046),
  '레트로': np.float32(0.46007064),
  '키치': np.float32(0.5099761),
  '아방가르드': np.float32(0.24406748),
  '오리엔탈': np.float32(0.26468188),
  '웨스턴': np.float32(0.14807768),
  '히피': np.float32(0.3811553),
  '젠더리스': np.float32(0.352549),
  '밀리터리': np.float32(0.050598018),
  '매니시': np.float32(0.4704031),
  '컨트리': np.float32(0.30839613),
  '리조트': np.float32(0.2530147),
  '소피스트케이티드': np.float32(0.65943706),
  '섹시': np.float32(0.5963634)},
 '경조사': {'스트리트': np.float32(0.36375412),
  '클래식': np.float32(0.30381358),
  '로맨틱': np.float32(0.52054816),
  '펑크': np.float32(0.14737882),
  '페미닌': np.float32(0.36699504),
  '스포티': np.float32(0.37927377),
  '힙합': np.float32(0.18956085),
  '모던': np.float32(0.5019663),
  '프레피': np.float32(0.40365452),
  '톰보이': np.float32(0.26966545),
  '레트로': np.float32(0.52741235),
  '키치': np.float32(0.4126282),
  '아방가르드': np.float32(0.1792833),
  '오리엔탈': np.float32(0.3156517),
  '웨스턴': np.float32(0.23063555),
  '히피': np.float32(0.3652409),
  '젠더리스': np.float32(0.26723626),
  '밀리터리': np.float32(0.0898062),
  '매니시': np.float32(0.3378259),
  '컨트리': np.float32(0.3745199),
  '리조트': np.float32(0.3581507),
  '소피스트케이티드': np.float32(0.51044554),
  '섹시': np.float32(0.39378038)},
 '여행': {'스트리트': np.float32(0.5842646),
  '클래식': np.float32(0.15974745),
  '로맨틱': np.float32(0.39738858),
  '펑크': np.float32(0.3726854),
  '페미닌': np.float32(0.28424317),
  '스포티': np.float32(0.5145872),
  '힙합': np.float32(0.41938052),
  '모던': np.float32(0.44138741),
  '프레피': np.float32(0.5156857),
  '톰보이': np.float32(0.39823353),
  '레트로': np.float32(0.56900585),
  '키치': np.float32(0.4954515),
  '아방가르드': np.float32(0.3834904),
  '오리엔탈': np.float32(0.408062),
  '웨스턴': np.float32(0.41594252),
  '히피': np.float32(0.5748364),
  '젠더리스': np.float32(0.27390137),
  '밀리터리': np.float32(0.16488421),
  '매니시': np.float32(0.37509647),
  '컨트리': np.float32(0.46930322),
  '리조트': np.float32(0.70026326),
  '소피스트케이티드': np.float32(0.48646387),
  '섹시': np.float32(0.4108982)},
 '파티': {'스트리트': np.float32(0.4990375),
  '클래식': np.float32(0.22124353),
  '로맨틱': np.float32(0.4567237),
  '펑크': np.float32(0.446158),
  '페미닌': np.float32(0.42028332),
  '스포티': np.float32(0.51005644),
  '힙합': np.float32(0.4711032),
  '모던': np.float32(0.4456517),
  '프레피': np.float32(0.5910596),
  '톰보이': np.float32(0.45082673),
  '레트로': np.float32(0.58568376),
  '키치': np.float32(0.55756074),
  '아방가르드': np.float32(0.40107894),
  '오리엔탈': np.float32(0.3259839),
  '웨스턴': np.float32(0.19204624),
  '히피': np.float32(0.5364033),
  '젠더리스': np.float32(0.3558746),
  '밀리터리': np.float32(0.098348536),
  '매니시': np.float32(0.44707882),
  '컨트리': np.float32(0.40599796),
  '리조트': np.float32(0.42181265),
  '소피스트케이티드': np.float32(0.65604776),
  '섹시': np.float32(0.56462836)},
 '운동': {'스트리트': np.float32(0.379994),
  '클래식': np.float32(0.043412022),
  '로맨틱': np.float32(0.16206),
  '펑크': np.float32(0.24390033),
  '페미닌': np.float32(0.1992781),
  '스포티': np.float32(0.62818235),
  '힙합': np.float32(0.5319789),
  '모던': np.float32(0.29742286),
  '프레피': np.float32(0.3966062),
  '톰보이': np.float32(0.6161824),
  '레트로': np.float32(0.314254),
  '키치': np.float32(0.2673113),
  '아방가르드': np.float32(0.2065803),
  '오리엔탈': np.float32(0.25593814),
  '웨스턴': np.float32(0.12673365),
  '히피': np.float32(0.3309636),
  '젠더리스': np.float32(0.1496136),
  '밀리터리': np.float32(0.124842584),
  '매니시': np.float32(0.34297353),
  '컨트리': np.float32(0.2250949),
  '리조트': np.float32(0.28023916),
  '소피스트케이티드': np.float32(0.27504063),
  '섹시': np.float32(0.3095897)}}

In [20]:

selected_tpo = '데이트'

In [21]:
def calculate_tpo_scores(user_clothes, tpo_score_table, selected_tpo):
    final_scores = {} # 최종결과 저장할 딕셔너리
    style_score_table = tpo_score_table[selected_tpo] # 선택된 TPO에 해당하는 스타일 유사도 점수표 ? 추출

    for category, items in user_clothes.items(): # 아우터, 상의, 하의별로
        category_scores = {} # 카테고리별 점수 저장 딕셔너리
        for item in items: # 모든 아이템 훑기
            item_id = item[0] # 아이템 ID
            style_probs = item[1] # 추정값 딕셔너리

            score = 0 # 초기 점수

            for style, prob in style_probs.items(): # 스타일별로 점수계산
                score += float(prob) * float(style_score_table[style]) # 추정값 X 스타일점수

            key = f"{category}_{item_id}" # 카테고리_ID 형태의 키 생성
            category_scores[key] = score # 점수저장

        final_scores[category] = category_scores # 카테고리별 점수를 최종 딕셔너리에 저장

    return final_scores # 계산결과 반환

In [22]:
result_tpo = calculate_tpo_scores(user_clothes, tpo_score_table, selected_tpo)

In [23]:
result_tpo

{'아우터': {'아우터_1062492': 0.3155415001360399,
  '아우터_874966': 0.27944986097164604,
  '아우터_982022': 0.3346561695517733,
  '아우터_371876': 0.31291921498364417,
  '아우터_183539': 0.2596389450280489,
  '아우터_801799': 0.33336599326402316,
  '아우터_1129354': 0.31760454507203817,
  '아우터_1223964': 0.35517663102413577,
  '아우터_32894': 0.2993217334563929,
  '아우터_665095': 0.2800760844936756,
  '아우터_925635': 0.3290662990557598,
  '아우터_605389': 0.3001320049360814,
  '아우터_435108': 0.28617219003114647,
  '아우터_1005223': 0.2730412618526332,
  '아우터_153293': 0.2471125150798651,
  '아우터_819372': 0.3095594750764139,
  '아우터_613978': 0.29057373951923193,
  '아우터_1684': 0.3487341278616194,
  '아우터_953141': 0.31356757552040415,
  '아우터_1173062': 0.29617875304065033,
  '아우터_1107378': 0.30568362053824516,
  '아우터_894989': 0.32197353230663395,
  '아우터_1054030': 0.2624427979249431,
  '아우터_1286060': 0.30829348306295146,
  '아우터_705888': 0.2180519147212956,
  '아우터_353077': 0.3022426641194458,
  '아우터_43229': 0.26103629032382325,
  '아

In [25]:
user_clothes

{'아우터': [[np.int64(1062492),
   {'스트리트': np.float32(0.7875),
    '컨트리': np.float32(0.0493),
    '로맨틱': np.float32(0.0307)},
   '아우터_기장_노말',
   '아우터_색상_핑크',
   '아우터_카테고리_재킷',
   '아우터_소매기장_긴팔',
   '아우터_넥라인_브이넥',
   '아우터_핏_노멀'],
  [np.int64(874966),
   {'스트리트': np.float32(0.6117),
    '리조트': np.float32(0.0697),
    '로맨틱': np.float32(0.0648)},
   '아우터_기장_하프',
   '아우터_색상_블랙',
   '아우터_카테고리_재킷',
   '아우터_소매기장_긴팔',
   '아우터_핏_노멀'],
  [np.int64(982022),
   {'스트리트': np.float32(0.8534),
    '클래식': np.float32(0.0381),
    '소피스트케이티드': np.float32(0.0352)},
   '아우터_기장_하프',
   '아우터_색상_네온',
   '아우터_카테고리_코트',
   '아우터_소매기장_긴팔',
   '아우터_넥라인_후드',
   '아우터_핏_노멀'],
  [np.int64(371876),
   {'스트리트': np.float32(0.761),
    '로맨틱': np.float32(0.0496),
    '리조트': np.float32(0.0348)},
   '아우터_기장_롱',
   '아우터_색상_블랙',
   '아우터_카테고리_코트',
   '아우터_소매기장_긴팔',
   '아우터_핏_노멀'],
  [np.int64(183539),
   {'스트리트': np.float32(0.6502),
    '리조트': np.float32(0.0619),
    '클래식': np.float32(0.0569)},
   '아우터_기장_하프',
   '아우터_색상_그레이',
   '아

In [26]:
input = {'아우터':[user_clothes['아우터'][0]]}
df_recommendations_d = recommend_clothes_parallel_sorted(input, color_dict, feature_dict, user_clothes)
input = {'아우터':[user_clothes['아우터'][1]]}
df_recommendations_d2 = recommend_clothes_parallel_sorted(input, color_dict, feature_dict, user_clothes)

Processing recommendations: 100%|██████████| 1/1 [00:00<00:00, 113.75it/s]
Processing recommendations: 100%|██████████| 1/1 [00:00<00:00, 120.90it/s]


In [27]:
df_recommendations_d

Unnamed: 0,input_ID,input_cat,recommendation
0,1062492,아우터,"{'상의': {'상의_1062492': 2.8871147605178837, '상의_..."


In [None]:
import numpy as np

def flatten_scores(score_dict):
    """
    전체 점수를 하나의 dict로 평탄화: {item_id: score}
    """
    flat = {}
    for category in score_dict:
        flat.update(score_dict[category])
    return flat

def standardize_all_items(score_dict):
    """
    카테고리 무시하고 전체 아이템에 대해 표준 정규화
    """
    flat = flatten_scores(score_dict)
    values = np.array(list(flat.values()))
    mean = values.mean()
    std = values.std()
    if std == 0:
        std = 1e-8
    standardized_flat = {k: (v - mean) / std for k, v in flat.items()}
    return standardized_flat

def merge_scores_global_standardization(score_dict1, score_dict2, weight1=0.5, weight2=0.5):
    """
    두 데이터 모두에서 공통된 아이템만 정규화 후 합산
    """
    norm1 = standardize_all_items(score_dict1)
    norm2 = standardize_all_items(score_dict2)

    common_keys = set(norm1.keys()).intersection(norm2.keys())
    merged = {
        key: weight1 * norm1[key] + weight2 * norm2[key]
        for key in common_keys
    }

    return merged

In [None]:
result = merge_scores_global_standardization(df_recommendations_d['recommendation'].loc[0], result_tpo)

In [None]:
result

In [None]:
def count_total_items(score_dict):
    """
    모든 카테고리의 아이템 개수를 합산하여 반환
    """
    return sum(len(items) for items in score_dict.values())


In [None]:
print(count_total_items(df_recommendations_d['recommendation'].loc[0]))
print(count_total_items(result_tpo))
print(len(result))

In [None]:
def sort_items_by_score(score_dict, descending=True):
    """
    점수 기준으로 정렬된 (아이템ID, 점수) 튜플 리스트를 반환
    """
    return sorted(score_dict.items(), key=lambda x: x[1], reverse=descending)

sorted_result = sort_items_by_score(result)

In [None]:
from collections import defaultdict
import numpy as np

def get_top_items_by_category(sorted_result):
    """
    주어진 (아이템ID, 점수) 리스트에서 카테고리(상의, 하의 등)별 최고 점수 아이템 ID들을 추출.
    동점인 경우 모두 포함하여 반환.

    Args:
        sorted_result (List[Tuple[str, float]]): 예: [('상의_123', 1.2), ('하의_456', 2.3), ...]

    Returns:
        Dict[str, List[str]]: 각 카테고리별 최고 점수 아이템 ID 목록
    """
    # 카테고리별로 아이템 분류
    grouped = defaultdict(list)
    for item_id, score in sorted_result:
        category = item_id.split('_')[0]
        grouped[category].append((item_id, score))

    # 각 카테고리별 최고 점수 아이템 찾기
    top_items_by_category = {}
    for category, items in grouped.items():
        max_score = max(score for _, score in items)
        top_items = [item_id for item_id, score in items if score == max_score]
        top_items_by_category[category] = top_items

    return top_items_by_category


In [None]:
def run_recommendation(selected_tpo, selected_clothing):
    # ✅ 선택된 옷 종류 추출
    clothing_type = selected_clothing[2].split('_')[0]

    # ✅ input 딕셔너리 구성
    input_dict = {clothing_type: [selected_clothing]}

    # 추천 점수 계산
    score_f = recommend_clothes_parallel_sorted(input_dict, color_dict, feature_dict, user_clothes)
    result_tpo = calculate_tpo_scores(user_clothes, tpo_score_table, selected_tpo)
    result = merge_scores_global_standardization(score_f['recommendation'].loc[0], result_tpo)
    sorted_result = sort_items_by_score(result)
    top_items_by_category = get_top_items_by_category(sorted_result)

    # ✅ 필터링 기준 정의
    category_filter_map = {
        "아우터": {"상의", "하의", "원피스"},
        "상의": {"아우터", "하의"},
        "하의": {"아우터", "상의"},
        "원피스": {"아우터"},
    }

    allowed_categories = category_filter_map.get(clothing_type, set())
    filtered_result = {
        cat: items for cat, items in top_items_by_category.items() if cat in allowed_categories
    }

    return filtered_result


In [None]:
selected_tpo = '데일리'
selected_clothing = user_clothes['상의'][1]

result = run_recommendation(selected_tpo, selected_clothing)
print(result)

In [28]:
user_clothes['상의'][0]

[np.int64(1216062),
 {'스트리트': np.float32(0.6033),
  '페미닌': np.float32(0.0803),
  '로맨틱': np.float32(0.0581)},
 '상의_색상_화이트',
 '상의_카테고리_티셔츠',
 '상의_소매기장_반팔',
 '상의_소재_저지',
 '상의_핏_노멀']

In [None]:
selected_clothing = user_clothes['상의'][1]