In [11]:
import os
print(os.path.abspath(os.path.join(os.getcwd(), '..')))

c:\SKN_3_MyProject\SKN_03_FINAL


In [12]:
import sys
sys.path.append(os.path.join(os.getcwd(), 'utils'))
sys.path.append(os.path.join(os.getcwd(), '..'))

In [13]:
from tensorflow.keras.models import load_model
import sys
import os
import config

# 사용자 정의 레이어와 손실 함수 등록
from utils.DeepFM import FMInteraction, weighted_loss  # FMInteraction과 weighted_loss가 정의된 파일에서 가져오기

# 모델 로드 시 custom_objects에 사용자 정의 객체 추가
model = load_model(config.save_model_path, 
                   custom_objects={'FMInteraction': FMInteraction, 'weighted_loss': weighted_loss})




In [14]:
from sklearn.preprocessing import LabelEncoder
import pandas as pd

# 범주형 자료만 (코드)

# 연속형 자료 (모델)

In [15]:
class MusicalRecommender:
    def __init__(self):
        self.data = None
        self.original_data = None
        self.model = None
        self.label_encoders = {}
    
    def load_and_preprocess_data(self):
        # 데이터 로드 및 전처리
        self.data = pd.read_json(config.df_with_negatives_path, lines=True)  # Update the path to a relative one if necessary
        self.original_data = self.data.copy()
        
        categorical_features = ['title', 
                                'cast', 
                                'genre'
                                ]
        
        # 범주형 변수 레이블 인코딩
        for feature in categorical_features:
            self.label_encoders[feature] = LabelEncoder()
            self.data[feature] = self.label_encoders[feature].fit_transform(self.data[feature].astype(str))


    def recommend_for_cast(self, cast_name, top_n=10):
        # 1. 캐스트 이름을 인코딩
        if cast_name not in self.label_encoders['cast'].classes_:
            print(f"Error: {cast_name} is not in the dataset.")
            return []
        
        cast_encoded = self.label_encoders['cast'].transform([cast_name])[0]
        
        # 2. 전체 뮤지컬 후보 생성
        musical_candidates = self.original_data[['title', 
                                                 'genre',
                                                 'percentage', 
                                                #  'ticket_price'
                                                ]].drop_duplicates()
        musical_candidates['cast'] = cast_encoded  # 모든 뮤지컬에 해당 캐스트를 넣음
        
        # 3. 모델 입력 데이터 준비
        input_data = {
            'title': self.label_encoders['title'].transform(musical_candidates['title']),
            'cast': musical_candidates['cast'],
            'genre': self.label_encoders['genre'].transform(musical_candidates['genre']),
            'percentage' : musical_candidates['percentage'],
            # 'ticket_price' : musical_candidates['ticket_price']
        }
        
        # 4. 예측 점수 계산
        predictions = model.predict([
            input_data['title'],
            input_data['cast'],
            input_data['genre'],
            input_data['percentage'],
            # input_data['ticket_price']
        ])
        
        # 5. 상위 N개 추천
        musical_candidates['score'] = predictions
        top_recommendations = musical_candidates.sort_values(by='score', ascending=False).head(top_n)
        
        # 6. 디코딩된 값으로 반환
        top_recommendations['cast'] = self.label_encoders['cast'].inverse_transform(top_recommendations['cast'])
        
        return top_recommendations[['cast', 'title', 'score']]

In [16]:
recommender = MusicalRecommender()
recommender.load_and_preprocess_data()
# 학습된 모델 로드
# recommender.load_model("C:/SKN_3_MyProject/SKN_03_FINAL/Data/Model/Recommend.h5")

In [17]:
data = pd.read_json(config.df_with_negatives_path, lines=True)

In [18]:
# Step 1: cast 유니크 값
unique_cast_names = data['cast'].unique()

# Step 2: 담을 데이터
all_recommendations = []

In [19]:
# top_n 갯수 설정
top_n = 10

In [20]:
for cast_name in unique_cast_names:
    recommendations = recommender.recommend_for_cast(cast_name, top_n)
    if not recommendations.empty:
        all_recommendations.append(recommendations)

[1m27/51[0m [32m━━━━━━━━━━[0m[37m━━━━━━━━━━[0m [1m0s[0m 2ms/step  



[1m51/51[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step
[1m51/51[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
[1m51/51[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
[1m51/51[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step
[1m51/51[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
[1m51/51[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
[1m51/51[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
[1m51/51[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
[1m51/51[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
[1m51/51[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
[1m51/51[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
[1m51/51[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
[1m51/51[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
[1m51/51[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0

In [21]:
recommendations_df = pd.concat(all_recommendations, ignore_index=True)

In [22]:
# 배우수
len(unique_cast_names)

3302

In [23]:
# Unique한 점수 값들
unique_scores = recommendations_df['score'].unique()
print("Unique scores:", unique_scores)

Unique scores: [0.99786395 0.9940649  0.9688289  0.95550454 0.9449338  0.934854
 0.9343383  0.9319011  0.92923826 0.914031   0.99947995 0.99854994
 0.9916986  0.9875934  0.98430943 0.98153615 0.98017037 0.9788042
 0.9772914  0.9771113  0.998734   0.99658954 0.98163646 0.9741742
 0.9671544  0.96169233 0.9601961  0.9593153  0.9575461  0.94633806
 0.99976975 0.99967355 0.999038   0.9986875  0.998251   0.9979082
 0.9978786  0.9976723  0.99739814 0.99735975 0.9995702  0.9993635
 0.9992903  0.9984195  0.99647754 0.9902884  0.98675317 0.98402
 0.9824168  0.9820949  0.997815   0.9935192  0.96916115 0.95658314
 0.9456356  0.93601775 0.93317354 0.9316849  0.9286897  0.91004395
 0.9993643  0.99801123 0.9903708  0.9869991  0.9832407  0.97996885
 0.978462   0.97837734 0.9766602  0.97153395 0.99975145 0.97058
 0.9697024  0.93651646 0.9340236  0.9268997  0.9261125  0.9125226
 0.91121846 0.8579878  0.9994963  0.847484   0.84212536 0.70439476
 0.695271   0.6799933  0.67923653 0.64167106 0.6291897  0.50

In [24]:
merged_df = recommendations_df.merge(data, on=['cast', 'title'], how='inner')

# Filter rows where target == 1 in the data DataFrame
target_1_count = merged_df[merged_df['target'] == 1].shape[0]

# Print the count of rows where target == 1
print("Number of matching rows where target is 1:", target_1_count)

Number of matching rows where target is 1: 78


In [25]:
merged_df

Unnamed: 0,cast,title,score,genre,percentage,ticket_price,target
0,김린,1025 일공이오,0.997864,역사,0.0222,0.8015,1
1,이지형,1025 일공이오,0.999480,역사,0.0222,0.8015,1
2,이지형,문나이트 [연천],0.991699,역사,0.7764,0.7087,1
3,오선혜,1025 일공이오,0.998734,역사,0.0222,0.8015,1
4,권민지,1025 일공이오,0.999770,역사,0.0222,0.8015,1
...,...,...,...,...,...,...,...
76,이성준,프랭크 와일드혼 콘서트,0.999950,대학로,0.1523,0.8785,1
77,박인화,실업자들 [부산],0.999948,대학로,0.0200,0.6712,1
78,박현석,실업자들 [부산],0.999948,대학로,0.0200,0.6712,1
79,차주원,실업자들 [부산],0.999948,대학로,0.0200,0.6712,1


In [26]:
# Step 7: 전체 Ground Truth (target=1) 개수
total_target_1_count = data[data['target'] == 1].shape[0]
total_target_1_count

8550

In [27]:
# Step 8: Recall@10 계산
recall_golbangE_10 = target_1_count / total_target_1_count

# Step 9: Precision@10 계산
precision_golbangE_10 = target_1_count / (10 * len(unique_cast_names))

golbange_K = target_1_count / (10 * len(unique_cast_names))

# 결과 출력
print(f"Recall@10: {recall_golbangE_10:.4f}")
print(f"Precision@10: {precision_golbangE_10:.4f}")
print(f'@K: {golbange_K: .4f}')

Recall@10: 0.0091
Precision@10: 0.0024
@K:  0.0024


In [28]:
# 10710 / 10 = a = 1071
# L = 10710개에서 target값이 1인 것 개수
# L / (10 * a) = 평가값

# 해당 Cast가 title에 Target 값이 1인 것 갯수/K(추천수 = 10)
# 전체 cast의 값을 저장
# 다 더한 값 / cast
# = 그거다
# 나 못해~ 머리로 짜

## Recall@4, Precision@4

- Recall@10= 
    추천된 10개 중 target=1인 실제 관련 항목 수 / 전체 target=1 항목 수 (Ground Truth)
 - Precision@10= 
    추천된 10개 중 target=1인 실제 관련 항목 수 / 10

In [29]:
recommender = MusicalRecommender()
recommender.load_and_preprocess_data()

In [30]:
# Step 1: actor와 genre의 조합 생성
unique_cast_names = data['cast'].unique()
# Step 2: 추천 결과를 담을 데이터
all_recommendations = []

In [31]:
top_n = 4
# Step 2: 추천 결과를 담을 데이터
all_recommendations = []
for cast_name in unique_cast_names:
    recommendations = recommender.recommend_for_cast(cast_name, top_n)
    if not recommendations.empty:
        all_recommendations.append(recommendations)

[1m51/51[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
[1m51/51[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
[1m51/51[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step
[1m51/51[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step
[1m51/51[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
[1m51/51[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
[1m51/51[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step
[1m51/51[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
[1m51/51[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
[1m51/51[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
[1m51/51[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
[1m51/51[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step
[1m51/51[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
[1m51/51[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0

In [32]:
recommendations_df_4 = pd.concat(all_recommendations, ignore_index=True)

In [33]:
recommendations_df_4

Unnamed: 0,cast,title,score
0,김린,1025 일공이오,0.997864
1,김린,프리다 [코엑스],0.994065
2,김린,문나이트 [연천],0.968829
3,김린,컴프롬어웨이 [서울],0.955505
4,이지형,1025 일공이오,0.999480
...,...,...,...
13203,전지후,"갈라 콘서트, HAPPY VIBE(2차 공연)",0.999950
13204,권재훈,"10주년 기념공연, 레베카 [광주(앵콜)]",0.999979
13205,권재훈,"10주년 기념공연, 레베카 [성남(앵콜)]",0.999956
13206,권재훈,데스노트 [부산],0.999955


In [34]:
unique_scores = recommendations_df_4['score'].unique()
print("Unique scores:", unique_scores)

Unique scores: [0.99786395 0.9940649  0.9688289  0.95550454 0.99947995 0.99854994
 0.9916986  0.9875934  0.998734   0.99658954 0.98163646 0.9741742
 0.99976975 0.99967355 0.999038   0.9986875  0.9995702  0.9993635
 0.9992903  0.9984195  0.997815   0.9935192  0.96916115 0.95658314
 0.9993643  0.99801123 0.9903708  0.9869991  0.99975145 0.97058
 0.9697024  0.93651646 0.9994963  0.847484   0.84212536 0.70439476
 0.9997126  0.9473996  0.94530815 0.88025403 0.99940306 0.8218627
 0.81596375 0.6568346  0.9997623  0.9996674  0.99951327 0.9993945
 0.9996865  0.93171036 0.9282989  0.8462217  0.9996818  0.9395591
 0.93784636 0.8693214  0.999947   0.9997841  0.99977165 0.99969834
 0.99995303 0.99988484 0.9998842  0.999864   0.999975   0.999938
 0.9999379  0.99992883 0.9999776  0.9999405  0.99994016 0.9999308
 0.9999695  0.99991846 0.9999184  0.9999079  0.9999436  0.9998708
 0.9998582  0.9998551  0.9998797  0.9998108  0.9997409  0.9997053
 0.79723036 0.19545883 0.1354455  0.11179111 0.9991515  0.99

In [35]:
merged_df_4 = recommendations_df_4.merge(data, on=['cast', 'title'], how='inner')

# Filter rows where target == 1 in the data DataFrame
target_1_count_4 = merged_df_4[merged_df_4['target'] == 1].shape[0]

# Print the count of rows where target == 1
print("Number of matching rows where target is 1:", target_1_count_4)

Number of matching rows where target is 1: 40


In [36]:
# Step 7: 전체 Ground Truth (target=1) 개수
total_target_1_count_4 = data[data['target'] == 1].shape[0]
total_target_1_count_4

8550

In [37]:
# Step 8: Recall@10 계산
recall_golbangE_10 = target_1_count_4 / total_target_1_count_4

# Step 9: Precision@10 계산
precision_golbangE_10 = target_1_count_4 / (4 * len(unique_cast_names))

golbange_K = target_1_count_4 / (4 * len(unique_cast_names))

# 결과 출력
print(f"Recall@4: {recall_golbangE_10:.4f}")
print(f"Precision@4: {precision_golbangE_10:.4f}")
print(f'@K: {golbange_K: .4f}')

Recall@4: 0.0047
Precision@4: 0.0030
@K:  0.0030


## 유명 배우만 찍어보기

In [38]:
cast_name = ["정선아", "이정열", "민우혁", "양준모", 
             "이해준", "김도형", "윤형렬", "류정한",
             "박규원", "신영숙", "박우빈" ] # 예시 캐스트 이름
top_n = 4 # 추천할 상위 10개 뮤지컬

In [39]:
# 추천 결과를 저장할 딕셔너리
recommendations = {}

# 각 배우 이름에 대해 함수 호출
for name in cast_name:
    # recommender.recommend_for_cast를 각 이름에 대해 호출
    result = recommender.recommend_for_cast([name], top_n)
    
    # 반환된 결과를 적절히 처리 (리스트 형태로 변환)
    if isinstance(result, pd.DataFrame):  # 결과가 DataFrame인 경우
        recommendations[name] = result.to_dict(orient='records')  # 레코드 리스트로 변환
    else:
        recommendations[name] = result  # 리스트나 다른 형식일 경우 그대로 저장


[1m23/51[0m [32m━━━━━━━━━[0m[37m━━━━━━━━━━━[0m [1m0s[0m 2ms/step 

  y = column_or_1d(y, dtype=self.classes_.dtype, warn=True)


[1m51/51[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
[1m29/51[0m [32m━━━━━━━━━━━[0m[37m━━━━━━━━━[0m [1m0s[0m 2ms/step 

  y = column_or_1d(y, dtype=self.classes_.dtype, warn=True)


[1m51/51[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
[1m51/51[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step


  y = column_or_1d(y, dtype=self.classes_.dtype, warn=True)


[1m51/51[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step


  y = column_or_1d(y, dtype=self.classes_.dtype, warn=True)


[1m51/51[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step


  y = column_or_1d(y, dtype=self.classes_.dtype, warn=True)
  y = column_or_1d(y, dtype=self.classes_.dtype, warn=True)


[1m51/51[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
[1m51/51[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step


  y = column_or_1d(y, dtype=self.classes_.dtype, warn=True)


[1m51/51[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step


  y = column_or_1d(y, dtype=self.classes_.dtype, warn=True)
  y = column_or_1d(y, dtype=self.classes_.dtype, warn=True)


[1m51/51[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step
[1m29/51[0m [32m━━━━━━━━━━━[0m[37m━━━━━━━━━[0m [1m0s[0m 2ms/step 

  y = column_or_1d(y, dtype=self.classes_.dtype, warn=True)


[1m51/51[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
[1m27/51[0m [32m━━━━━━━━━━[0m[37m━━━━━━━━━━[0m [1m0s[0m 2ms/step 

  y = column_or_1d(y, dtype=self.classes_.dtype, warn=True)


[1m51/51[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step


In [40]:
recommendation_list = []
for cast, musicals in recommendations.items():
    for musical in musicals:
        recommendation_list.append(musical)

df_recommendations = pd.DataFrame(recommendation_list)

In [41]:
unique_scores = df_recommendations['score'].unique()
print("Unique scores:", unique_scores)

Unique scores: [0.99997932 0.99995714 0.99995631 0.9999519  0.99995941 0.99994624
 0.99993783 0.99993759 0.99997002 0.99992573 0.99992549 0.99991488
 0.99995965 0.99994642 0.99993914 0.99993795 0.99997872 0.99995583
 0.99995506 0.99995053 0.99988794 0.99986464 0.99982941 0.99981552
 0.99997944 0.99995762 0.99995685 0.99995255 0.99994701 0.99978411
 0.99977165 0.99969834 0.99997896 0.99995637 0.99995559 0.99995112
 0.99997759 0.99994051 0.99994016 0.9999308  0.99995577 0.999955
 0.99995047]


In [42]:
merged_df_4 = df_recommendations.merge(data, on=['cast', 'title'], how='inner')

In [43]:
# Filter rows where target == 1 in the data DataFrame
target_1_count_4 = merged_df_4[merged_df_4['target'] == 1].shape[0]

In [44]:
# Print the count of rows where target == 1
print("Number of matching rows where target is 1:", target_1_count_4)

Number of matching rows where target is 1: 3


In [45]:
merged_df_4

Unnamed: 0,cast,title,score,genre,percentage,ticket_price,target
0,이정열,컴프롬어웨이 [서울],0.999938,역사,0.4546,0.9133,1
1,류정한,"10주년 기념공연, 레베카 [성남(앵콜)]",0.999784,대학로,0.7483,0.9412,1
2,신영숙,"10주년 기념공연, 레베카 [성남(앵콜)]",0.999941,대학로,0.7483,0.9412,1


In [46]:
# Step 8: Recall@10 계산
recall_golbangE_10 = target_1_count_4 / total_target_1_count_4

# Step 9: Precision@10 계산
precision_golbangE_10 = target_1_count_4 / (4 * len(unique_cast_names))

golbange_K = target_1_count_4 / (4 * len(unique_cast_names))

# 결과 출력
print(f"Recall@4: {recall_golbangE_10:.4f}")
print(f"Precision@4: {precision_golbangE_10:.4f}")
print(f'@K: {golbange_K: .4f}')

Recall@4: 0.0004
Precision@4: 0.0002
@K:  0.0002


## 개인 확인

In [47]:
cast_name = "양준모"  # 예시 캐스트 이름
top_n = 10 # 추천할 상위 10개 뮤지컬
recommendations = recommender.recommend_for_cast(cast_name, top_n)
print(recommendations)

[1m51/51[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
     cast                              title     score
0     양준모                          1025 일공이오  0.999960
8152  양준모                          프리다 [코엑스]  0.999946
7509  양준모                        컴프롬어웨이 [서울]  0.999939
3007  양준모                          문나이트 [연천]  0.999938
3466  양준모     베토벤: Beethoven Secret SEASON 2  0.999934
1569  양준모                     노트르담 드 파리 [창원]  0.999934
81    양준모                 15주년 기념공연, 영웅 [수원]  0.999928
2455  양준모                루드윅: 베토벤 더 피아노 [동해]  0.999927
468   양준모                     UNKNOW [인천 서구]  0.999925
7080  양준모  지저스 크라이스트 수퍼스타 50주년 기념 한국 공연 [안동]  0.999925


In [48]:
merged_df_4 = recommendations.merge(data, on=['cast', 'title'], how='inner')

In [49]:
# Filter rows where target == 1 in the data DataFrame
target_1_count_4 = merged_df_4[merged_df_4['target'] == 1].shape[0]

In [50]:
# Print the count of rows where target == 1
print("Number of matching rows where target is 1:", target_1_count_4)

Number of matching rows where target is 1: 2


In [51]:
merged_df_4

Unnamed: 0,cast,title,score,genre,percentage,ticket_price,target
0,양준모,노트르담 드 파리 [창원],0.999934,역사,0.4075,0.945,1
1,양준모,"15주년 기념공연, 영웅 [수원]",0.999928,역사,0.8948,0.9133,1
