In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import pandas as pd
import numpy as np
data = pd.read_csv('/content/drive/MyDrive/기학팀플/movie.csv')

In [None]:
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, mean_absolute_error
import numpy as np
from tqdm import tqdm

# 사용자-영화 평점 데이터 구성
user_movie_matrix = data.pivot_table(index='critic_name', columns='movie_id', values='review_score_cleaned').fillna(0)

# 사용자 간 코사인 유사도 계산
user_similarity = cosine_similarity(user_movie_matrix)
user_similarity_df = pd.DataFrame(user_similarity, index=user_movie_matrix.index, columns=user_movie_matrix.index)

Recommended Movies for Ben McEachen:
    movie_id                                    movie_title     score
0       4489                              Fantastic Mr. Fox  0.132506
1       7061                                       Kick-Ass  0.117671
2        195                             500 Days of Summer  0.113480
3       6460                                      Inception  0.108684
4       6597                                     Iron Man 2  0.106882
5       1332                                         Avatar  0.096764
6       6577                                       Invictus  0.094177
7       5017                                   Funny People  0.091779
8      13761                         The Invention of Lying  0.089653
9       5658  Harry Potter and the Deathly Hallows - Part 1  0.088599
10      6497                           Inglourious Basterds  0.087991
11     16995                                     Zombieland  0.087462
12     14598                      The Princess and th

Computing Precision, Recall, and F1: 100%|██████████| 4415/4415 [11:05<00:00,  6.63it/s]

Precision: 0.9243140180717898
Recall: 0.10727342697710217
F1 Score: 0.1922364076791117





In [None]:
def recommend_movies_user_based(user_name, user_movie_matrix, user_similarity_df, movie_titles, n_recommendations=1500, top_n_users=1500):
    # 사용자 확인
    if user_name not in user_movie_matrix.index:
        raise ValueError(f"User '{user_name}' does not exist in the user-movie matrix.")

    # 사용자 평점 가져오기
    user_ratings = user_movie_matrix.loc[user_name]

    # Top-N 유사 사용자만 사용
    similar_users = user_similarity_df[user_name].sort_values(ascending=False).iloc[:top_n_users]

    # 행렬 정렬
    similar_users = similar_users.reindex(user_movie_matrix.index).fillna(0)

    # 유사도에 따라 평점 가중치 계산 (가중치 강화)
    weighted_ratings = user_movie_matrix.T.dot(similar_users ** 2).div((similar_users ** 2).sum())

    # 이미 본 영화 제외
    recommendations = weighted_ratings[~user_ratings.index.isin(user_ratings[user_ratings > 0].index)]
    top_recommendations = recommendations.nlargest(n_recommendations)

    # movie_id를 movie_title로 변환
    movie_titles = movie_titles.drop_duplicates(subset='movie_id', keep='first')  # 중복 제거
    top_recommendations = top_recommendations.reset_index().rename(columns={0: 'score'})
    top_recommendations = top_recommendations.merge(movie_titles, how='left', left_on='movie_id', right_on='movie_id')
    top_recommendations = top_recommendations.drop_duplicates(subset=['movie_id'])  # 병합 결과 중복 제거

    return top_recommendations[['movie_id', 'movie_title', 'score']]


In [None]:
# Train-Test Split
train_data, test_data = train_test_split(data[['critic_name', 'movie_id', 'review_score_cleaned']], test_size=0.2, random_state=42)

In [None]:
# 특정 사용자에 대한 추천 실행
user_name = "Ben McEachen"
recommended_movies = recommend_movies_user_based(
    user_name,
    user_movie_matrix,
    user_similarity_df,
    data[['movie_id', 'movie_title']],
    n_recommendations=1500,
    top_n_users=1500
)
print(recommended_movies[:5])


   movie_id           movie_title     score
0      6460             Inception  0.304690
1     14938    The Social Network  0.278743
2     11693             Star Trek  0.270233
3     16096                    Up  0.268978
4      6497  Inglourious Basterds  0.261622


In [None]:
from sklearn.metrics import mean_squared_error, mean_absolute_error
import numpy as np

# Precision, Recall, F1, RMSE, MAE 함수 수정
def precision_recall_f1_rmse_mae(user_movie_matrix, user_similarity_df, test_data, threshold=0.01, n_recommendations=1500, top_n_users=1500):
    true_positives = 0
    false_positives = 0
    false_negatives = 0

    all_actuals = []
    all_predictions = []

    for user in tqdm(test_data['critic_name'].unique(), desc="Computing Precision, Recall, F1, RMSE, and MAE"):
        if user not in user_movie_matrix.index:
            continue

        # 실제 평점 가져오기
        user_actual_ratings = test_data[test_data['critic_name'] == user]

        # 추천 시스템 예측
        similar_users = user_similarity_df[user].sort_values(ascending=False).iloc[:top_n_users]
        similar_users = similar_users.reindex(user_movie_matrix.index).fillna(0)
        weighted_ratings = user_movie_matrix.T.dot(similar_users).div(similar_users.sum())

        for _, row in user_actual_ratings.iterrows():
            movie_id = row['movie_id']
            actual = row['review_score_cleaned']
            predicted = weighted_ratings.get(movie_id, 0)  # 예측 평점이 없으면 0으로 처리

            all_actuals.append(actual)
            all_predictions.append(predicted)

            if actual >= threshold and movie_id in weighted_ratings.nlargest(n_recommendations).index:
                true_positives += 1
            elif actual < threshold and movie_id in weighted_ratings.nlargest(n_recommendations).index:
                false_positives += 1
            elif actual >= threshold and movie_id not in weighted_ratings.nlargest(n_recommendations).index:
                false_negatives += 1

    precision = true_positives / (true_positives + false_positives) if (true_positives + false_positives) > 0 else 0
    recall = true_positives / (true_positives + false_negatives) if (true_positives + false_negatives) > 0 else 0
    f1 = (2 * precision * recall) / (precision + recall) if (precision + recall) > 0 else 0

    # RMSE와 MAE 계산
    rmse = np.sqrt(mean_squared_error(all_actuals, all_predictions))
    mae = mean_absolute_error(all_actuals, all_predictions)

    return precision, recall, f1, rmse, mae

# 성능 평가 실행
precision, recall, f1, rmse, mae = precision_recall_f1_rmse_mae(user_movie_matrix, user_similarity_df, test_data, threshold=0.01, n_recommendations=1500, top_n_users=1500)
print(f"Precision: {precision}")
print(f"Recall: {recall}")
print(f"F1 Score: {f1}")
print(f"RMSE: {rmse}")
print(f"MAE: {mae}")


Computing Precision, Recall, F1, RMSE, and MAE: 100%|██████████| 4415/4415 [16:34<00:00,  4.44it/s]

Precision: 0.9975970023622689
Recall: 0.5472874337126854
F1 Score: 0.7068131318475198
RMSE: 0.5663638289579201
MAE: 0.5287731515047402





In [None]:
ben_movies = data[data['critic_name'] == 'Ben McEachen']
ben_movies

Unnamed: 0,critic_name,top_critic,review_type,review_content,movie_title,authors,actors,original_release_date,runtime,tomatometer_rating,...,production_company_category,directors_category,movie_info_keyword,author_id,actor_id,actors_split,actor_ids,movie_id,authors_split,predicted_emotion
0,Ben McEachen,0,1,Whether audiences will get behind The Lightnin...,Percy Jackson & the Olympians: The Lightning T...,"Craig Titley, Chris Columbus, Rick Riordan","Logan Lerman, Brandon T. Jackson, Alexandra Da...",2010-02-12,119.0,0.49,...,1,1,life war mother son gets,0,0,"['Logan Lerman', 'Brandon T. Jackson', 'Alexan...","[76058, 130933, 53933, 107930, 33787, 182302, ...",9737,"['Craig Titley', 'Chris Columbus', 'Rick Riord...",worry
2525,Ben McEachen,0,0,"Another disappointment from M, despite some di...",The Happening,M. Night Shyamalan,"Mark Wahlberg, Zooey Deschanel, John Leguizamo...",2008-06-11,89.0,0.17,...,1,1,love young soon wife john,80,84,"['Mark Wahlberg', 'Zooey Deschanel', 'John Leg...","[133589, 170858, 46348, 55762, 14460, 188220, ...",13538,['M. Night Shyamalan'],sadness
3859,Ben McEachen,0,1,"Hardly a massive gamble, Cameron Diaz and Asht...",What Happens in Vegas,Dana Fox,"Cameron Diaz, Ashton Kutcher, Jason Sudeikis, ...",2008-04-22,99.0,0.26,...,1,1,love group woman make work,115,123,"['Cameron Diaz', 'Ashton Kutcher', 'Jason Sude...","[172115, 125668, 170060, 199862, 106990, 60798...",16499,['Dana Fox'],neutral
4891,Ben McEachen,0,1,Kelly's most accessible ambiguous odyssey to d...,The Box,Richard Kelly,"Cameron Diaz, James Marsden, Frank Langella, J...",2009-11-06,113.0,0.44,...,1,2,lives love group young time,134,144,"['Cameron Diaz', 'James Marsden', 'Frank Lange...","[172115, 82968, 44219, 111295, 152515, 136387,...",12666,['Richard Kelly'],neutral
5255,Ben McEachen,0,1,Leonardo DiCaprio and Russell Crowe fire up Ri...,Body of Lies,William Monahan,"Leonardo DiCaprio, Russell Crowe, Mark Strong ...",2008-10-08,128.0,0.55,...,1,1,life finds help time secret,141,151,"['Leonardo DiCaprio', 'Russell Crowe', 'Mark S...","[24863, 110382, 111749, 46435, 150842, 165900,...",2099,['William Monahan'],fun
5945,Ben McEachen,0,1,"A bleak, depressing odyssey you can firmly adm...",The Road,Joe Penhall,"Viggo Mortensen, Kodi Smit-McPhee, Charlize Th...",2009-11-25,110.0,0.73,...,1,1,help young world man son,153,163,"['Viggo Mortensen', 'Kodi Smit-McPhee', 'Charl...","[14928, 56520, 105444, 132918, 123444, 44003, ...",14746,['Joe Penhall'],sadness
9239,Ben McEachen,0,0,Not going to live forever. Not going to light ...,Fame,"Aline Brosh McKenna, Michael Gore, Allison Bur...","Collins Pennie, Kristy Flores, Asher Book, Pau...",2009-09-25,107.0,0.24,...,1,2,years school york high men,221,236,"['Collins Pennie', 'Kristy Flores', 'Asher Boo...","[139775, 109796, 197977, 89029, 29440, 194305,...",4470,"['Aline Brosh McKenna', 'Michael Gore', 'Allis...",worry
41525,Ben McEachen,0,1,A fun-tastic adventure that virtually guarante...,Ice Age: Dawn of the Dinosaurs (Ice Age 3),Unknown,Unknown,2009-07-01,94.0,0.46,...,1,0,help young world named john,59,211,['Unknown'],[168214],6314,['Unknown'],happiness
41914,Ben McEachen,0,1,Despite massive production troubles including ...,The Wolfman,"David Self, Andrew Kevin Walker","Benicio Del Toro, Anthony Hopkins, Emily Blunt...",2010-02-12,102.0,0.34,...,1,1,young family home years save,1483,1645,"['Benicio Del Toro', 'Anthony Hopkins', 'Emily...","[127214, 149970, 188493, 202537, 172652, 12360...",15383,"['David Self', 'Andrew Kevin Walker']",joy
43406,Ben McEachen,0,1,While Scorsese overcooks the final stages of e...,Shutter Island,"Laeta Kalogridis, Dennis Lehane","Leonardo DiCaprio, Mark Ruffalo, Ben Kingsley,...",2010-02-19,138.0,0.68,...,1,1,love help new woman make,1507,1672,"['Leonardo DiCaprio', 'Mark Ruffalo', 'Ben Kin...","[24863, 119476, 159106, 88277, 136448, 136072,...",11212,"['Laeta Kalogridis', 'Dennis Lehane']",joy


In [None]:
# Sorting by 'review_score_cleaned' in descending order and selecting top 5
top_ben_movies = ben_movies.sort_values(by='review_score_cleaned', ascending=False).head(5)

# Selecting required columns
top_ben_movies_filtered = top_ben_movies[['critic_name', 'movie_title', 'review_score_cleaned']]

# Displaying the result
print(top_ben_movies_filtered)

         critic_name        movie_title  review_score_cleaned
637741  Ben McEachen      Up in the Air                   1.0
622467  Ben McEachen        Toy Story 3                   1.0
576314  Ben McEachen  The King's Speech                   1.0
66966   Ben McEachen         The A-Team                   1.0
43406   Ben McEachen     Shutter Island                   0.9
