In [119]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import os
from sklearn.preprocessing import MultiLabelBinarizer
import re
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.preprocessing import StandardScaler
import tensorflow as tf
import keras
from keras.utils import to_categorical

In [57]:
# csv 파일을 dataframe으로 변환
df_outfit = pd.read_csv('../data/outfit(male)/outfit(male).csv')
df_weather = pd.read_csv('../data/2022-08-01_to_2024-04-30.csv', encoding='cp949')
# 필요한 columns만 추출
df_outfit = df_outfit[['userId', '상의', '아우터', '하의', '신발', '액세서리', '작성일', '체중', '키', '스타일']].copy()
df_weather = df_weather[['평균기온(°C)', '최저기온(°C)', '최고기온(°C)', '강수 계속시간(hr)', '평균 풍속(m/s)', '평균 상대습도(%)', '일시']].copy()

# '작성일'과 '일시' 열을 datetime 형식으로 변환
df_outfit['작성일'] = pd.to_datetime(df_outfit['작성일'], format='%Y년 %m월 %d일')
df_weather['일시'] = pd.to_datetime(df_weather['일시'])

# 두 dataframe을 날짜를 기준으로 병합
df_merged = pd.merge(df_outfit, df_weather, left_on='작성일', right_on='일시')
df_merged = df_merged.drop(['작성일', '일시'], axis=1)
df_merged

Unnamed: 0,userId,상의,아우터,하의,신발,액세서리,체중,키,스타일,평균기온(°C),최저기온(°C),최고기온(°C),강수 계속시간(hr),평균 풍속(m/s),평균 상대습도(%)
0,1,"반팔 티, 셔츠/블라우스",재킷,반바지,구두/로퍼,,67,172,"캐주얼, 프렌치시크",13.2,11.0,16.5,7.00,3.0,80.1
1,1,반팔 티,재킷,반바지,운동화,기타 모자,67,172,"캐주얼, 프렌치시크",17.6,11.5,24.3,,2.1,51.8
2,1,반팔 티,재킷,반바지,구두/로퍼,장목양말,67,172,"캐주얼, 프렌치시크",16.0,11.7,20.0,12.17,2.5,77.4
3,1,반팔 티,,나일론 팬츠,구두/로퍼,,67,172,"캐주얼, 프렌치시크",15.3,10.6,20.8,,3.5,32.4
4,1,반팔 티,집업,면바지,구두/로퍼,,67,172,"캐주얼, 프렌치시크",14.0,10.2,18.8,,2.5,51.3
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1333,14,반팔 티,,반바지,운동화,"기타 모자, 장목양말",65,178,캐주얼,17.3,13.2,24.2,3.33,3.6,55.0
1334,14,반팔 티,,카고바지,운동화,기타 모자,65,178,캐주얼,13.2,11.0,16.5,7.00,3.0,80.1
1335,14,반팔 티,집업,나일론 팬츠,스니커즈/캔버스,기타 모자,65,178,캐주얼,14.4,10.3,20.8,,1.9,75.6
1336,14,"반팔 티, 셔츠/블라우스",,반바지,구두/로퍼,장목양말,65,178,캐주얼,17.8,11.4,26.0,,1.9,61.1


In [58]:
# 강수 계속시간을 비 옴, 비 오지 않음으로 변경
df_merged_sky = df_merged.copy()
df_merged_sky['강수 여부'] = np.where(df_merged_sky['강수 계속시간(hr)'] > 0, 1, 0)
df_merged_sky.drop(['강수 계속시간(hr)'], axis=1, inplace=True)
df_merged_sky

Unnamed: 0,userId,상의,아우터,하의,신발,액세서리,체중,키,스타일,평균기온(°C),최저기온(°C),최고기온(°C),평균 풍속(m/s),평균 상대습도(%),강수 여부
0,1,"반팔 티, 셔츠/블라우스",재킷,반바지,구두/로퍼,,67,172,"캐주얼, 프렌치시크",13.2,11.0,16.5,3.0,80.1,1
1,1,반팔 티,재킷,반바지,운동화,기타 모자,67,172,"캐주얼, 프렌치시크",17.6,11.5,24.3,2.1,51.8,0
2,1,반팔 티,재킷,반바지,구두/로퍼,장목양말,67,172,"캐주얼, 프렌치시크",16.0,11.7,20.0,2.5,77.4,1
3,1,반팔 티,,나일론 팬츠,구두/로퍼,,67,172,"캐주얼, 프렌치시크",15.3,10.6,20.8,3.5,32.4,0
4,1,반팔 티,집업,면바지,구두/로퍼,,67,172,"캐주얼, 프렌치시크",14.0,10.2,18.8,2.5,51.3,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1333,14,반팔 티,,반바지,운동화,"기타 모자, 장목양말",65,178,캐주얼,17.3,13.2,24.2,3.6,55.0,1
1334,14,반팔 티,,카고바지,운동화,기타 모자,65,178,캐주얼,13.2,11.0,16.5,3.0,80.1,1
1335,14,반팔 티,집업,나일론 팬츠,스니커즈/캔버스,기타 모자,65,178,캐주얼,14.4,10.3,20.8,1.9,75.6,0
1336,14,"반팔 티, 셔츠/블라우스",,반바지,구두/로퍼,장목양말,65,178,캐주얼,17.8,11.4,26.0,1.9,61.1,0


In [59]:
# '상의', '아우터', '하의', '신발', '액세서리' 열의 결측값을 '~ 없음'으로 대체
columns = ['상의', '아우터', '하의', '신발', '액세서리']
df_notnull = df_merged_sky.copy()
for column in columns:
    df_notnull[column] = df_merged[column].fillna(column + ' 없음')
df_notnull['평균 풍속(m/s)'].fillna(df_merged_sky['평균 풍속(m/s)'].mean(), inplace=True)
df_notnull.isna().sum()

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df_notnull['평균 풍속(m/s)'].fillna(df_merged_sky['평균 풍속(m/s)'].mean(), inplace=True)


userId        0
상의            0
아우터           0
하의            0
신발            0
액세서리          0
체중            0
키             0
스타일           0
평균기온(°C)      0
최저기온(°C)      0
최고기온(°C)      0
평균 풍속(m/s)    0
평균 상대습도(%)    0
강수 여부         0
dtype: int64

In [60]:
# 2가 붙은 단어를 두 번 반복하는 함수
def duplicate_word(text):
    words = text.split(', ')
    for i, word in enumerate(words):
        if '2' in word:
            words[i] = word.replace('2', '') + ', ' + word.replace('2', '')
    return ', '.join(words)

In [61]:
# 2가 붙은 단어를 두 번 반복한 dataframe df_dup 생성
df_dup = df_notnull
for column in columns:
    df_dup[columns] = df_notnull[columns].map(duplicate_word)

In [62]:
'''df_dup.iloc[168]'''

'df_dup.iloc[168]'

In [63]:
# 옷의 조합 컬럼 생성 (상의, 아우터, 하의, 신발, 액세서리의 각 값들을 하나의 문자열로 조합하여 하나의 컬럼으로 만듦)
df_combination = df_dup.copy()
df_combination['옷 조합'] = df_dup['상의'] + ', ' + df_dup['아우터'] + ', ' + df_dup['하의'] + ', ' + df_dup['신발'] + ', ' + df_dup['액세서리']
df_combination.drop(columns=['상의', '아우터', '하의', '신발', '액세서리'], inplace=True)

In [64]:
# 쉼표를 기준으로 텍스트를 나누는 함수
def comma_tokenizer(s):
    return s.split(', ')

O_vectorizer = CountVectorizer(tokenizer=comma_tokenizer)

O = O_vectorizer.fit_transform(df_combination['옷 조합'])



In [65]:
# multi-hot encoding된 데이터를 numpy array로 변환
df_encoded_O = pd.DataFrame(O.toarray().tolist(), columns=O_vectorizer.get_feature_names_out())
npa_O = np.array(df_encoded_O)
npa_O

array([[0, 0, 1, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 1, ..., 0, 0, 0],
       ...,
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 1, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0]])

In [66]:
# 스타일 칼럼의 공백 제거
df_combination['스타일'] = df_combination['스타일'].str.replace(' ', '')

In [81]:
# 쉼표를 기준으로 텍스트를 나누는 함수
def comma_tokenizer(s):
    return s.split(',')

S_vectorizer = CountVectorizer(tokenizer=comma_tokenizer)

S = S_vectorizer.fit_transform(df_combination['스타일'])

# multi-hot encoding된 데이터를 numpy array로 변환
df_encoded_S = pd.DataFrame(S.toarray().tolist(), columns=S_vectorizer.get_feature_names_out())
df_encoded_S
df_encoded = pd.concat([df_combination.drop(columns=['옷 조합', '스타일']), df_encoded_S, df_encoded_O], axis=1)
df_encoded



Unnamed: 0,userId,체중,키,평균기온(°C),최저기온(°C),최고기온(°C),평균 풍속(m/s),평균 상대습도(%),강수 여부,댄디,...,집업,카고바지,코트,털 모자,트레이닝/조거 팬츠,패딩,패딩슈즈,패딩조끼,하의 없음,후드티
0,1,67,172,13.2,11.0,16.5,3.0,80.1,1,0,...,0,0,0,0,0,0,0,0,0,0
1,1,67,172,17.6,11.5,24.3,2.1,51.8,0,0,...,0,0,0,0,0,0,0,0,0,0
2,1,67,172,16.0,11.7,20.0,2.5,77.4,1,0,...,0,0,0,0,0,0,0,0,0,0
3,1,67,172,15.3,10.6,20.8,3.5,32.4,0,0,...,0,0,0,0,0,0,0,0,0,0
4,1,67,172,14.0,10.2,18.8,2.5,51.3,0,0,...,1,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1333,14,65,178,17.3,13.2,24.2,3.6,55.0,1,0,...,0,0,0,0,0,0,0,0,0,0
1334,14,65,178,13.2,11.0,16.5,3.0,80.1,1,0,...,0,1,0,0,0,0,0,0,0,0
1335,14,65,178,14.4,10.3,20.8,1.9,75.6,0,0,...,1,0,0,0,0,0,0,0,0,0
1336,14,65,178,17.8,11.4,26.0,1.9,61.1,0,0,...,0,0,0,0,0,0,0,0,0,0


In [82]:
df_comb = df_encoded.copy()
df_comb['combination'] = df_encoded[clothes].apply(lambda x: ', '.join(map(str, x)), axis=1)

In [84]:
# 각 사용자마다 combination 값이 같은 행의 개수를 세서 각 행에 저장
df_count = df_encoded.copy()
df_count['count'] = df_comb.groupby('combination')['combination'].transform('count')
df_count

Unnamed: 0,userId,체중,키,평균기온(°C),최저기온(°C),최고기온(°C),평균 풍속(m/s),평균 상대습도(%),강수 여부,댄디,...,카고바지,코트,털 모자,트레이닝/조거 팬츠,패딩,패딩슈즈,패딩조끼,하의 없음,후드티,count
0,1,67,172,13.2,11.0,16.5,3.0,80.1,1,0,...,0,0,0,0,0,0,0,0,0,1
1,1,67,172,17.6,11.5,24.3,2.1,51.8,0,0,...,0,0,0,0,0,0,0,0,0,1
2,1,67,172,16.0,11.7,20.0,2.5,77.4,1,0,...,0,0,0,0,0,0,0,0,0,3
3,1,67,172,15.3,10.6,20.8,3.5,32.4,0,0,...,0,0,0,0,0,0,0,0,0,1
4,1,67,172,14.0,10.2,18.8,2.5,51.3,0,0,...,0,0,0,0,0,0,0,0,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1333,14,65,178,17.3,13.2,24.2,3.6,55.0,1,0,...,0,0,0,0,0,0,0,0,0,4
1334,14,65,178,13.2,11.0,16.5,3.0,80.1,1,0,...,1,0,0,0,0,0,0,0,0,4
1335,14,65,178,14.4,10.3,20.8,1.9,75.6,0,0,...,0,0,0,0,0,0,0,0,0,1
1336,14,65,178,17.8,11.4,26.0,1.9,61.1,0,0,...,0,0,0,0,0,0,0,0,0,7


In [97]:
# 평균기온(°C) column의 최대값과 최솟값
avg_max_temp = df_count['평균기온(°C)'].max()
avg_min_temp = df_count['평균기온(°C)'].min()
min_max_temp = df_count['최저기온(°C)'].max()
min_min_temp = df_count['최저기온(°C)'].min()
max_max_temp = df_count['최고기온(°C)'].max()
max_min_temp = df_count['최고기온(°C)'].min()

In [98]:
df_limit = df_count.copy()
# 평균기온(°C) column을 5도 간격으로 범주화하여 0, 1, 2, ...로 변환
avg_bins=np.round(np.arange(avg_min_temp -5, avg_max_temp+5, 5), 1)
avg_labels=np.arange(0, (avg_max_temp-avg_min_temp)//5+2)
min_bins=np.round(np.arange(min_min_temp -5, min_max_temp+5, 5), 1)
min_labels=np.arange(0, (min_max_temp-min_min_temp)//5+2)
max_bins=np.round(np.arange(max_min_temp -5, max_max_temp+5, 5), 1)
max_labels=np.arange(0, (max_max_temp-max_min_temp)//5+2)
df_limit['평균기온(°C)'] = pd.cut(df_limit['평균기온(°C)'], bins=avg_bins, labels=avg_labels)
df_limit['최저기온(°C)'] = pd.cut(df_limit['최저기온(°C)'], bins=min_bins, labels=min_labels)
df_limit['최고기온(°C)'] = pd.cut(df_limit['최고기온(°C)'], bins=max_bins, labels=max_labels)

In [99]:
df_limit

Unnamed: 0,userId,체중,키,평균기온(°C),최저기온(°C),최고기온(°C),평균 풍속(m/s),평균 상대습도(%),강수 여부,댄디,...,카고바지,코트,털 모자,트레이닝/조거 팬츠,패딩,패딩슈즈,패딩조끼,하의 없음,후드티,count
0,1,67,172,5.0,6.0,6.0,3.0,80.1,1,0,...,0,0,0,0,0,0,0,0,0,1
1,1,67,172,6.0,6.0,7.0,2.1,51.8,0,0,...,0,0,0,0,0,0,0,0,0,1
2,1,67,172,6.0,6.0,6.0,2.5,77.4,1,0,...,0,0,0,0,0,0,0,0,0,3
3,1,67,172,6.0,6.0,6.0,3.5,32.4,0,0,...,0,0,0,0,0,0,0,0,0,1
4,1,67,172,6.0,6.0,6.0,2.5,51.3,0,0,...,0,0,0,0,0,0,0,0,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1333,14,65,178,6.0,7.0,7.0,3.6,55.0,1,0,...,0,0,0,0,0,0,0,0,0,4
1334,14,65,178,5.0,6.0,6.0,3.0,80.1,1,0,...,1,0,0,0,0,0,0,0,0,4
1335,14,65,178,6.0,6.0,6.0,1.9,75.6,0,0,...,0,0,0,0,0,0,0,0,0,1
1336,14,65,178,6.0,6.0,7.0,1.9,61.1,0,0,...,0,0,0,0,0,0,0,0,0,7


In [100]:
inputs_coulumns = ['체중', '키', '평균 풍속(m/s)', '평균 상대습도(%)']

input_scaler = StandardScaler()

df_stand = df_limit.copy()

df_stand[inputs_coulumns] = input_scaler.fit_transform(df_encoded[inputs_coulumns])

In [101]:
# '평균기온(°C)'의 각 범주를 고려하여 데이터를 분할
train_data = []
val_data = []
test_data = []
# 각 user별로 온도 범주의 데이터가 적은 경우 기록
user_category_valid = {}

for user in df_stand['userId'].unique():
    for avg_category in avg_labels:
        category_data = df_limit[(df_limit['평균기온(°C)'] == avg_category) & (df_limit['userId'] == user)]
        
        if category_data.shape[0] >= 20:
            if user not in user_category_valid:
                user_category_valid[user] = [avg_category]
            else:
                user_category_valid[user].append(avg_category)
        else:
            train_data.append(category_data)
            continue
        
        # 먼저 전체 데이터의 50%를 훈련 데이터로 분할
        train, temp = train_test_split(category_data, test_size=0.5, random_state=42)
        
        # 남은 데이터를 반으로 나누어 검증 데이터와 테스트 데이터로 분할
        val, test = train_test_split(temp, test_size=0.5, random_state=42)
        
        train_data.append(train)
        val_data.append(val)
        test_data.append(test)

print(user_category_valid)
# 각 데이터 세트를 하나의 DataFrame으로 병합
train_data_df = pd.concat(train_data)
val_data_df = pd.concat(val_data)
test_data_df = pd.concat(test_data)

train_data_df['평균기온(°C)'] = train_data_df['평균기온(°C)'].astype('float64')
val_data_df['평균기온(°C)'] = val_data_df['평균기온(°C)'].astype('float64')
test_data_df['평균기온(°C)'] = test_data_df['평균기온(°C)'].astype('float64')

{1: [4.0, 5.0, 6.0], 2: [6.0, 8.0], 3: [3.0, 4.0, 6.0, 7.0, 8.0], 8: [8.0], 11: [5.0, 6.0, 7.0, 8.0], 12: [3.0, 4.0], 14: [3.0, 4.0, 5.0, 6.0, 7.0, 8.0]}


In [102]:
shuffled_train = train_data_df.sample(frac=1, random_state=42).reset_index(drop=True)
shuffled_val = train_data_df.sample(frac=1, random_state=42).reset_index(drop=True)
shuffled_test = train_data_df.sample(frac=1, random_state=42).reset_index(drop=True)

In [103]:
df_train_input = shuffled_train.drop(['평균기온(°C)', '최저기온(°C)', '최고기온(°C)'], axis=1).copy()
df_val_input = shuffled_val.drop(['평균기온(°C)', '최저기온(°C)', '최고기온(°C)'], axis=1).copy()
df_test_input = shuffled_test.drop(['평균기온(°C)', '최저기온(°C)', '최고기온(°C)'], axis=1).copy()

In [104]:
df_train_label = shuffled_train[['평균기온(°C)']].copy()
df_val_label = shuffled_val[['평균기온(°C)']].copy()
df_test_label = shuffled_test[['평균기온(°C)']].copy()

In [106]:
X_train = np.array(df_train_input)
X_val = np.array(df_val_input)
X_test = np.array(df_test_input)
Y_train = np.array(df_train_label)
Y_val = np.array(df_val_label)
Y_test = np.array(df_test_label)

In [108]:
Y_train.shape

(965, 1)

In [109]:
X_train.shape

(965, 62)

In [110]:
input = keras.Input(shape=(X_train.shape[1],))
x = keras.layers.Dense(512, activation='relu')(input)
x = keras.layers.Dropout(0.5)(x)
x = keras.layers.Dense(512, activation='relu')(x)
x = keras.layers.Dropout(0.5)(x)
x = keras.layers.Dense(256, activation='relu')(x)
x = keras.layers.Dense(256, activation='relu')(x)
x = keras.layers.Dense(128, activation='relu')(x)
x = keras.layers.Dense(64, activation='relu')(x)
x = keras.layers.Dropout(0.5)(x)
output = keras.layers.Dense(Y_train.shape[1])(x)

In [111]:
model = keras.Model(input, output)
model.compile(optimizer='adam', loss='mse', metrics=['mae'])

In [112]:
train_data_df['userId'].unique()

array([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14])

In [113]:
X_val

array([[ 14.,  65., 178., ...,   0.,   0.,   1.],
       [  8.,  64., 171., ...,   0.,   0.,   2.],
       [  3.,  70., 180., ...,   0.,   1.,   1.],
       ...,
       [ 14.,  65., 178., ...,   0.,   0.,   1.],
       [  7.,  73., 180., ...,   0.,   0.,   4.],
       [  2.,  63., 173., ...,   0.,   0.,   7.]])

In [118]:
shuffled_val[shuffled_val['userId'] == 14]

Unnamed: 0,userId,체중,키,평균기온(°C),최저기온(°C),최고기온(°C),평균 풍속(m/s),평균 상대습도(%),강수 여부,댄디,...,카고바지,코트,털 모자,트레이닝/조거 팬츠,패딩,패딩슈즈,패딩조끼,하의 없음,후드티,count
0,14,65,178,5.0,6.0,5.0,5.4,85.6,1,0,...,0,0,0,0,0,0,0,0,0,1
19,14,65,178,8.0,8.0,8.0,1.3,76.9,0,0,...,0,0,0,0,0,0,0,0,0,6
24,14,65,178,2.0,3.0,3.0,2.0,60.5,1,0,...,0,0,0,0,0,0,0,0,1,7
29,14,65,178,5.0,5.0,6.0,1.9,39.9,0,0,...,0,0,0,0,0,0,0,0,0,1
37,14,65,178,4.0,5.0,4.0,1.9,85.0,1,0,...,1,0,0,0,1,0,0,0,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
911,14,65,178,8.0,9.0,8.0,2.2,77.8,1,0,...,0,0,0,0,0,0,0,0,0,11
916,14,65,178,4.0,4.0,5.0,2.4,28.6,0,0,...,0,0,0,0,0,0,0,0,0,2
928,14,65,178,3.0,3.0,3.0,2.7,66.6,0,0,...,0,0,0,1,0,0,0,0,1,1
947,14,65,178,4.0,4.0,4.0,1.5,72.9,0,0,...,0,0,0,1,0,0,0,0,0,1


In [115]:
X_val[X_val[:, 0] == 14, -1]

array([ 1.,  6.,  7.,  1.,  1.,  4.,  1., 13., 11.,  6.,  7.,  2.,  1.,
        3., 13.,  3.,  1., 12.,  1.,  4.,  1.,  1.,  6.,  1.,  2.,  4.,
        2.,  2.,  4.,  4.,  3.,  1.,  1., 12., 13.,  4.,  6.,  8.,  4.,
        2.,  2., 11.,  7.,  1., 12.,  2.,  3.,  4.,  4.,  1.,  1.,  1.,
        1.,  1.,  4.,  1.,  4.,  1.,  1.,  1.,  2., 13.,  1., 19.,  4.,
        1., 12., 12.,  4.,  3.,  4., 19.,  7.,  1., 13.,  4.,  1.,  8.,
       11., 12.,  1.,  1.,  4.,  3.,  1.,  4.,  4.,  3.,  1.,  1.,  1.,
        1.,  1.,  4.,  7.,  4.,  1., 12.,  3.,  1.,  3.,  1.,  1.,  1.,
        6.,  7.,  1.,  1.,  1.,  1.,  4.,  1.,  2.,  7., 19.,  1.,  1.,
       11.,  2.,  1.,  1.,  1.])

In [49]:
def metrics(count_weight, train_df, avg_labels, user_category_valid, isTrain=False) :
    # user_category_not_valid에 해당하지 않는 경우에 대해 precision, recall, f1_score 계산
    # 평균을 위한 초기화
    precision_m, recall_m, f1_score_m, count_m = 0, 0, 0, 0
    
    for user in train_df['userId'].unique():
        for category in avg_labels:
            # 실제 온도
            # 평균을 적용하고 temp를 빼서 값이 작을수록 실제 온도에 가깝도록 함. 이 때 각 user-item의 사용 횟수를 가중하여 많이 사용한 item이 추천되도록 함
            
            pred = model.predict(X_val[X_val[:, 0] == user])
            diff = np.power(pred - category, 2) - count_weight * X_val[X_val[:, 0] == user, -1]

            # sort predictions
            
            ix = tf.argsort(diff, direction='ASCENDING')

            df_predict = shuffled_val[shuffled_val['userId'] == user].iloc[ix]
            # df_predict의 
            
            if not isTrain:
                # user i에 대한 예측을 파일로 저장
                os.makedirs(f'../data/predictions/CBF/male/user_{i+1}', exist_ok=True)
                # Save predictions to file in user's directory
                with open(f'../data/predictions/CBF/male/user_{i+1}/predictions_{category}.txt', 'w') as f:
                    for item in predict:
                        f.write("%s\n" % item)
            
            if user+1 in user_category_valid and category in user_category_valid[i+1]:
            
                label = df[(df['userId'] == i+1) & (df['평균기온(°C)'] == category)]['옷 조합'].astype(str)
                # label이 UI_temp의 column에 포함되지 않는다면 제외
                label = label[label.isin(UI_temp.columns)]
                # label에 어떠한 옷 조합도 포함되지 않을 시 지표를 측정하지 않음
                if label.shape[0] == 0:
                    '''print(f'{i+1}번 user, {category}도 label 데이터가 부족하여 제외합니다.')'''
                    continue
            
            count_m += 1
            precision = len(set(predict) & set(label)) / len(set(predict))
            '''print(f'{i+1}번 user, {category}도 prediction: {predict}')
            print(f'{i+1}번 user, {category}도 label (개수: {len(set(label))}): {label}')'''
            recall = len(set(predict) & set(label)) / len(set(label))
            if precision + recall == 0:
                '''print(f'0인 경우')
                print(f'{i+1}번 user, {category}도 예측 결과: {predict}, 실제 결과: {label} ')'''  
                f1_score = 0
            else:
                '''print(f'0이 아닌 경우')
                print(f'{i+1}번 user, {category}도 예측 결과: {predict}, 실제 결과: {label} ')'''  
                f1_score = 2 * (precision * recall) / (precision + recall)
            precision_m += precision
            recall_m += recall
            f1_score_m += f1_score
            '''print(precision, recall, f1_score)'''
    precision_m /= count_m
    recall_m /= count_m
    f1_score_m /= count_m
    return precision_m, recall_m, f1_score_m

In [None]:
class CustomCallback(tf.keras.callbacks.Callback):
    def __init__(self, message):
        super(CustomCallback, self).__init__()
        self.message = message

    def on_epoch_end(self, epoch, logs=None):
        # 여기에 원하는 동작을 추가합니다.
        print(f"Epoch {epoch} has ended! {self.message}")

# 모델 훈련 시 Callback 추가
model.fit(X_train, Y_train, epochs=10, callbacks=[CustomCallback(message="This is a custom message.")])

In [125]:
history = model.fit(X_train, Y_train, epochs=20, batch_size=32, validation_data=(X_val, Y_val))

Epoch 1/20


Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [133]:
result = model.predict(X_test)
for i in range(X_test.shape[0]):
    print(f'예측 결과: {result[i]} 실제 결과: {Y_test[i]} ')

예측 결과: [2.953619] 실제 결과: [4.] 
예측 결과: [3.7020125] 실제 결과: [4.] 
예측 결과: [3.7118845] 실제 결과: [4.] 
예측 결과: [4.333366] 실제 결과: [4.] 
예측 결과: [3.789955] 실제 결과: [4.] 
예측 결과: [3.2723036] 실제 결과: [4.] 
예측 결과: [5.371829] 실제 결과: [5.] 
예측 결과: [4.153729] 실제 결과: [5.] 
예측 결과: [5.4307775] 실제 결과: [5.] 
예측 결과: [3.6339269] 실제 결과: [5.] 
예측 결과: [4.759771] 실제 결과: [5.] 
예측 결과: [4.8162484] 실제 결과: [5.] 
예측 결과: [3.8313713] 실제 결과: [5.] 
예측 결과: [3.902329] 실제 결과: [6.] 
예측 결과: [5.2782283] 실제 결과: [6.] 
예측 결과: [4.889947] 실제 결과: [6.] 
예측 결과: [3.855394] 실제 결과: [6.] 
예측 결과: [3.176681] 실제 결과: [6.] 
예측 결과: [5.4288645] 실제 결과: [6.] 
예측 결과: [3.5516448] 실제 결과: [6.] 
예측 결과: [3.8961651] 실제 결과: [6.] 
예측 결과: [5.4831743] 실제 결과: [6.] 
예측 결과: [5.0263224] 실제 결과: [6.] 
예측 결과: [5.16559] 실제 결과: [6.] 
예측 결과: [4.99413] 실제 결과: [6.] 
예측 결과: [4.063234] 실제 결과: [6.] 
예측 결과: [4.370169] 실제 결과: [6.] 
예측 결과: [4.5616183] 실제 결과: [6.] 
예측 결과: [5.9524364] 실제 결과: [8.] 
예측 결과: [7.4248314] 실제 결과: [8.] 
예측 결과: [7.944539] 실제 결과: [8.] 
예측 결과: [7.6504602] 실제 결과:

In [131]:
X_test.shape[0]

194