In [64]:
import os
print(os.path.abspath(os.path.join(os.getcwd(), '..')))

c:\SKN_3_MyProject\SKN_03_FINAL


In [65]:
import sys
sys.path.append(os.path.join(os.getcwd(), 'utils'))
sys.path.append(os.path.join(os.getcwd(), '..'))

In [66]:
from tensorflow.keras.models import load_model
import sys
import os
import config

# 사용자 정의 레이어와 손실 함수 등록
from utils.DeepFM import FMInteraction, weighted_loss  # FMInteraction과 weighted_loss가 정의된 파일에서 가져오기

# 모델 로드 시 custom_objects에 사용자 정의 객체 추가
model = load_model(config.save_model_path, 
                   custom_objects={'FMInteraction': FMInteraction, 'weighted_loss': weighted_loss})




In [67]:
from sklearn.preprocessing import LabelEncoder
import pandas as pd

# 범주형 자료만 (코드)

# 연속형 자료 (모델)

In [68]:
class MusicalRecommender:
    def __init__(self):
        self.data = None
        self.original_data = None
        self.model = None
        self.label_encoders = {}
    
    def load_and_preprocess_data(self):
        # 데이터 로드 및 전처리
        self.data = pd.read_json(config.df_with_negatives_path, lines=True)  # Update the path to a relative one if necessary
        self.original_data = self.data.copy()
        
        categorical_features = ['title', 
                                'cast', 
                                'genre'
                                ]
        
        # 범주형 변수 레이블 인코딩
        for feature in categorical_features:
            self.label_encoders[feature] = LabelEncoder()
            self.data[feature] = self.label_encoders[feature].fit_transform(self.data[feature].astype(str))


    def recommend_for_cast(self, cast_name, top_n=10):
        # 1. 캐스트 이름을 인코딩
        if cast_name not in self.label_encoders['cast'].classes_:
            print(f"Error: {cast_name} is not in the dataset.")
            return []
        
        cast_encoded = self.label_encoders['cast'].transform([cast_name])[0]
        
        # 2. 전체 뮤지컬 후보 생성
        musical_candidates = self.original_data[['title', 
                                                 'genre',
                                                #  'percentage', 
                                                #  'ticket_price'
                                                ]].drop_duplicates()
        musical_candidates['cast'] = cast_encoded  # 모든 뮤지컬에 해당 캐스트를 넣음
        
        # 3. 모델 입력 데이터 준비
        input_data = {
            'title': self.label_encoders['title'].transform(musical_candidates['title']),
            'cast': musical_candidates['cast'],
            'genre': self.label_encoders['genre'].transform(musical_candidates['genre']),
            # 'percentage' : musical_candidates['percentage'],
            # 'ticket_price' : musical_candidates['ticket_price']
        }
        
        # 4. 예측 점수 계산
        predictions = model.predict([
            input_data['title'],
            input_data['cast'],
            input_data['genre'],
            # input_data['percentage'],
            # input_data['ticket_price']
        ])
        
        # 5. 점수 조정
        musical_candidates['score'] = predictions
        # 5. 상위 N개 추천
        top_recommendations = musical_candidates.sort_values(by='score', ascending=False).head(top_n)
        
        # 6. 디코딩된 값으로 반환
        top_recommendations['cast'] = self.label_encoders['cast'].inverse_transform(top_recommendations['cast'])
        
        return top_recommendations[['cast', 'title', 'score']]

In [69]:
recommender = MusicalRecommender()
recommender.load_and_preprocess_data()
# 학습된 모델 로드
# recommender.load_model("C:/SKN_3_MyProject/SKN_03_FINAL/Data/Model/Recommend.h5")

In [70]:
data = pd.read_json(config.df_with_negatives_path, lines=True)

In [71]:
# Step 1: cast 유니크 값
unique_cast_names = data['cast'].unique()

# Step 2: 담을 데이터
all_recommendations = []

In [72]:
# top_n 갯수 설정
top_n = 10

In [73]:
for cast_name in unique_cast_names:
    recommendations = recommender.recommend_for_cast(cast_name, top_n)
    if not recommendations.empty:
        all_recommendations.append(recommendations)

[1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
[1m 1/45[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 15ms/step



[1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 756us/step
[1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 712us/step
[1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 870us/step
[1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step 
[1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
[1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 877us/step
[1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 975us/step
[1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 942us/step
[1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 991us/step
[1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step 
[1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 739us/step
[1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 880us/step
[1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
[1m45/45[0m [32m━━━━━━━━━━━━

In [74]:
recommendations_df = pd.concat(all_recommendations, ignore_index=True)

In [75]:
# 배우수
len(unique_cast_names)

408

In [76]:
# # Unique한 점수 값들
# unique_scores = recommendations_df['score'].unique()
# print("Unique scores:", unique_scores)

In [77]:
merged_df = recommendations_df.merge(data, on=['cast', 'title'], how='inner')

# Filter rows where target == 1 in the data DataFrame
target_1_count = merged_df[merged_df['target'] == 1].shape[0]

# Print the count of rows where target == 1
print("Number of matching rows where target is 1:", target_1_count)

Number of matching rows where target is 1: 49


In [78]:
merged_df

Unnamed: 0,cast,title,score,genre,percentage,ticket_price,target
0,류정한,"10주년 기념공연, 레베카 [서울(앵콜)]",0.99996,가족,0.6673,0.734,1
1,오만석,"10주년 기념공연, 레베카 [서울(앵콜)]",0.999987,가족,0.6673,0.734,1
2,신영숙,"10주년 기념공연, 레베카 [서울(앵콜)]",0.999987,가족,0.6673,0.734,1
3,장은아,헤드윅,0.999966,대학로,0.7885,0.6749,1
4,홍지민,온스테이지 [정읍],0.999986,지역|창작,0.9855,0.0838,1
5,정민,라흐마니노프,0.999966,역사,0.3383,0.3419,1
6,홍지수,"제17회 DIMF 창작지원작, The Tempest [대구]",0.999823,신화,,0.2315,1
7,김태윤,나도 해피엔딩을 쓰고 싶어,0.999957,대학로,0.0763,0.3202,1
8,송원근,Via Air Mail (비아 에어 메일) [대학로],0.999944,대학로,0.3745,0.3498,1
9,성태준,Via Air Mail (비아 에어 메일) [대학로],0.999905,대학로,0.3745,0.3498,1


In [79]:
# Step 7: 전체 Ground Truth (target=1) 개수
total_target_1_count = data[data['target'] == 1].shape[0]
total_target_1_count

4182

In [80]:
# Step 8: Recall@10 계산
recall_golbangE_10 = target_1_count / total_target_1_count

# Step 9: Precision@10 계산
precision_golbangE_10 = target_1_count / (10 * len(unique_cast_names))

golbange_K = target_1_count / (10 * len(unique_cast_names))

# 결과 출력
print(f"Recall@10: {recall_golbangE_10:.4f}")
print(f"Precision@10: {precision_golbangE_10:.4f}")
print(f'@K: {golbange_K: .4f}')

Recall@10: 0.0117
Precision@10: 0.0120
@K:  0.0120


In [81]:
# 10710 / 10 = a = 1071
# L = 10710개에서 target값이 1인 것 개수
# L / (10 * a) = 평가값

# 해당 Cast가 title에 Target 값이 1인 것 갯수/K(추천수 = 10)
# 전체 cast의 값을 저장
# 다 더한 값 / cast
# = 그거다
# 나 못해~ 머리로 짜

## Recall@4, Precision@4

- Recall@10= 
    추천된 10개 중 target=1인 실제 관련 항목 수 / 전체 target=1 항목 수 (Ground Truth)
 - Precision@10= 
    추천된 10개 중 target=1인 실제 관련 항목 수 / 10

In [82]:
recommender = MusicalRecommender()
recommender.load_and_preprocess_data()

In [83]:
# Step 1: actor와 genre의 조합 생성
unique_cast_names = data['cast'].unique()
# Step 2: 추천 결과를 담을 데이터
all_recommendations = []

In [84]:
top_n = 4
# Step 2: 추천 결과를 담을 데이터
all_recommendations = []
for cast_name in unique_cast_names:
    recommendations = recommender.recommend_for_cast(cast_name, top_n)
    if not recommendations.empty:
        all_recommendations.append(recommendations)

[1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 628us/step
[1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 615us/step
[1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 693us/step
[1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 646us/step
[1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 622us/step
[1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 598us/step
[1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 603us/step
[1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 585us/step
[1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 622us/step
[1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step 
[1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 683us/step
[1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 666us/step
[1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 654us/step
[1m45/45[0m [32m━━━━━━━

In [85]:
recommendations_df_4 = pd.concat(all_recommendations, ignore_index=True)

In [86]:
recommendations_df_4

Unnamed: 0,cast,title,score
0,류정한,여신님이 보고 계셔 10th Anniversary [부산],0.999967
1,류정한,원스 어게인,0.999966
2,류정한,키다리 아저씨 [대학로],0.999965
3,류정한,데미안,0.999965
4,민영기,"육영수, 그 시절의 아카시아 [대구]",0.999990
...,...,...,...
1627,박중리,"제18회 DIMF 창작지원작, 시지프스 [대구]",0.999966
1628,조흠,할란카운티,0.999948
1629,조흠,레드북 [부산],0.999946
1630,조흠,배니싱 [안양],0.999938


In [87]:
# unique_scores = recommendations_df_4['score'].unique()
# print("Unique scores:", unique_scores)

In [88]:
merged_df_4 = recommendations_df_4.merge(data, on=['cast', 'title'], how='inner')

# Filter rows where target == 1 in the data DataFrame
target_1_count_4 = merged_df_4[merged_df_4['target'] == 1].shape[0]

# Print the count of rows where target == 1
print("Number of matching rows where target is 1:", target_1_count_4)

Number of matching rows where target is 1: 16


In [89]:
# Step 7: 전체 Ground Truth (target=1) 개수
total_target_1_count_4 = data[data['target'] == 1].shape[0]
total_target_1_count_4

4182

In [90]:
# Step 8: Recall@10 계산
recall_golbangE_10 = target_1_count_4 / total_target_1_count_4

# Step 9: Precision@10 계산
precision_golbangE_10 = target_1_count_4 / (4 * len(unique_cast_names))

golbange_K = target_1_count_4 / (4 * len(unique_cast_names))

# 결과 출력
print(f"Recall@4: {recall_golbangE_10:.4f}")
print(f"Precision@4: {precision_golbangE_10:.4f}")
print(f'@K: {golbange_K: .4f}')

Recall@4: 0.0038
Precision@4: 0.0098
@K:  0.0098


## 유명 배우만 찍어보기

In [91]:
cast_name = ["정선아", "이정열", "민우혁", "양준모", 
             "이해준", "김도형", "윤형렬", "류정한",
             "박규원", "신영숙", "박우빈" ] # 예시 캐스트 이름
top_n = 4 # 추천할 상위 10개 뮤지컬

In [92]:
# 추천 결과를 저장할 딕셔너리
recommendations = {}

# 각 배우 이름에 대해 함수 호출
for name in cast_name:
    # recommender.recommend_for_cast를 각 이름에 대해 호출
    result = recommender.recommend_for_cast([name], top_n)
    
    # 반환된 결과를 적절히 처리 (리스트 형태로 변환)
    if isinstance(result, pd.DataFrame):  # 결과가 DataFrame인 경우
        recommendations[name] = result.to_dict(orient='records')  # 레코드 리스트로 변환
    else:
        recommendations[name] = result  # 리스트나 다른 형식일 경우 그대로 저장


[1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 651us/step
[1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 678us/step


  y = column_or_1d(y, dtype=self.classes_.dtype, warn=True)
  y = column_or_1d(y, dtype=self.classes_.dtype, warn=True)
  y = column_or_1d(y, dtype=self.classes_.dtype, warn=True)


[1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 909us/step
[1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 645us/step
[1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 651us/step


  y = column_or_1d(y, dtype=self.classes_.dtype, warn=True)
  y = column_or_1d(y, dtype=self.classes_.dtype, warn=True)
  y = column_or_1d(y, dtype=self.classes_.dtype, warn=True)


[1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 641us/step
[1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 622us/step
[1m22/45[0m [32m━━━━━━━━━[0m[37m━━━━━━━━━━━[0m [1m0s[0m 2ms/step 

  y = column_or_1d(y, dtype=self.classes_.dtype, warn=True)
  y = column_or_1d(y, dtype=self.classes_.dtype, warn=True)


[1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
[1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 842us/step
[1m 1/45[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 15ms/step

  y = column_or_1d(y, dtype=self.classes_.dtype, warn=True)
  y = column_or_1d(y, dtype=self.classes_.dtype, warn=True)


[1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 665us/step
Error: ['박우빈'] is not in the dataset.


In [93]:
recommendation_list = []
for cast, musicals in recommendations.items():
    for musical in musicals:
        recommendation_list.append(musical)

df_recommendations = pd.DataFrame(recommendation_list)

In [94]:
# unique_scores = df_recommendations['score'].unique()
# print("Unique scores:", unique_scores)

In [95]:
merged_df_4 = df_recommendations.merge(data, on=['cast', 'title'], how='inner')

In [96]:
# Filter rows where target == 1 in the data DataFrame
target_1_count_4 = merged_df_4[merged_df_4['target'] == 1].shape[0]

In [97]:
# Print the count of rows where target == 1
print("Number of matching rows where target is 1:", target_1_count_4)

Number of matching rows where target is 1: 1


In [98]:
merged_df_4

Unnamed: 0,cast,title,score,genre,percentage,ticket_price,target
0,이해준,틱틱붐: LOUDER THAN WORDS! 쇼케이스,0.999963,대학로,0.9735,0.1133,1


In [99]:
# Step 8: Recall@10 계산
recall_golbangE_10 = target_1_count_4 / total_target_1_count_4

# Step 9: Precision@10 계산
precision_golbangE_10 = target_1_count_4 / (4 * len(unique_cast_names))

golbange_K = target_1_count_4 / (4 * len(unique_cast_names))

# 결과 출력
print(f"Recall@4: {recall_golbangE_10:.4f}")
print(f"Precision@4: {precision_golbangE_10:.4f}")
print(f'@K: {golbange_K: .4f}')

Recall@4: 0.0002
Precision@4: 0.0006
@K:  0.0006


## 개인 확인

In [100]:
cast_name = "양준모"  # 예시 캐스트 이름
top_n = 10 # 추천할 상위 10개 뮤지컬
recommendations = recommender.recommend_for_cast(cast_name, top_n)
print(recommendations)

[1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 636us/step
     cast                                              title     score
3477  양준모  창작뮤지컬어워드 NEXT, 니에블라 & 죽거나, 죽이거나 & Behind the Moon  0.999974
2796  양준모                                       연남동 빙굴빙굴 빨래방  0.999974
3836  양준모                       틱틱붐: LOUDER THAN WORDS! 쇼케이스  0.999967
3459  양준모                                  집들이 콘서트, #38. 로기수  0.999965
615   양준모                                     나도 해피엔딩을 쓰고 싶어  0.999964
207   양준모                      Via Air Mail (비아 에어 메일) [대학로]  0.999964
4065  양준모                                                헤드윅  0.999964
3347  양준모                    제8회 한국뮤지컬어워즈, 뮤이어(MU:Year) 스테이지  0.999962
1638  양준모                                              바톤콘서트  0.999959
1059  양준모                                   라이브 콘서트, The 슈또풍  0.999959


In [101]:
merged_df_4 = recommendations.merge(data, on=['cast', 'title'], how='inner')

In [102]:
# Filter rows where target == 1 in the data DataFrame
target_1_count_4 = merged_df_4[merged_df_4['target'] == 1].shape[0]

In [103]:
# Print the count of rows where target == 1
print("Number of matching rows where target is 1:", target_1_count_4)

Number of matching rows where target is 1: 0


In [104]:
merged_df_4

Unnamed: 0,cast,title,score,genre,percentage,ticket_price,target
