# 🔗 문장 임베딩 기반 리뷰 유사도 분석 (SentenceBERT)

In [None]:

# 설치 (최초 1회 필요)
# !pip install sentence-transformers pandas scikit-learn


In [None]:
!pip install sentence-transformers

In [None]:

import pandas as pd

df = pd.read_csv("36000_reviews_label.csv")
df = df[['sentence', 'label']].dropna().reset_index(drop=True)
df = df[df['label'].isin(['긍정', '부정', '중립'])]
df = df.head(300)  # 연습용 소규모 샘플
df.head()


In [None]:
!pip install torch

In [None]:
!pip uninstall torch torchvision torchaudio sentence-transformers triton -y

In [None]:
!pip install torch==2.2.2 torchvision==0.17.2 torchaudio==2.2.2
!pip install sentence-transformers==2.2.2
!pip install triton==2.0.0

In [None]:
!pip uninstall torch torchvision torchaudio
!pip uninstall sentence-transformers

In [None]:
from sentence_transformers import SentenceTransformer

model = SentenceTransformer('sentence-transformers/distiluse-base-multilingual-cased-v1')
embeddings = model.encode(df['sentence'], convert_to_tensor=True)

print("임베딩 완료! 리뷰 개수:", len(embeddings))


In [None]:

from sklearn.metrics.pairwise import cosine_similarity
import numpy as np

# 한 문장을 골라 유사한 리뷰 5개 찾기
target_idx = 0
target_vec = embeddings[target_idx].unsqueeze(0)

cos_scores = cosine_similarity(target_vec, embeddings)[0]
top_indices = np.argsort(cos_scores)[::-1][1:6]  # 자기 자신 제외

print("🔍 기준 문장:")
print(df['review_content'][target_idx])
print("\n📍 유사 리뷰 Top 5:")
for i in top_indices:
    print(f"- ({cos_scores[i]:.3f})", df['review_content'][i])
