In [None]:
import tkinter as tk
from tkinter import ttk, messagebox
from tkinter.filedialog import askdirectory
import threading
import os
import pandas as pd
import time, csv, pickle, warnings
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
from konlpy.tag import Hannanum
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.decomposition import PCA
from sklearn.cluster import KMeans
import plotly.graph_objects as go
import pyLDAvis
import pyLDAvis.gensim as gensimvis
from gensim.models import LdaModel, Word2Vec
from gensim import corpora

warnings.filterwarnings('ignore')


In [1]:
class YouTubeAnalyzerApp:
    def __init__(self, root):
        self.root = root
        self.root.title("YouTube 댓글 분석기")
        self.root.geometry("600x500")
        self.root.configure(bg="#f7f7f7")

        style = ttk.Style()
        style.theme_use("clam")

        self.url_var = tk.StringVar()
        self.path_var = tk.StringVar()

        # --- UI 구성 ---
        ttk.Label(root, text="YouTube URL", font=('Arial', 12)).pack(pady=10)
        ttk.Entry(root, textvariable=self.url_var, width=60).pack()

        ttk.Label(root, text="저장 경로 선택", font=('Arial', 12)).pack(pady=10)
        frame = ttk.Frame(root)
        frame.pack()
        ttk.Entry(frame, textvariable=self.path_var, width=50).pack(side=tk.LEFT)
        ttk.Button(frame, text="탐색", command=self.browse).pack(side=tk.LEFT, padx=5)

        ttk.Button(root, text="분석 시작", command=self.run_analysis, width=20).pack(pady=20)

        self.log = tk.Text(root, height=15, width=70)
        self.log.pack(pady=10)
        self.log.configure(state='disabled')

    def browse(self):
        path = askdirectory()
        if path:
            self.path_var.set(path)

    def log_write(self, msg):
        self.log.configure(state='normal')
        self.log.insert(tk.END, f"{msg}\n")
        self.log.see(tk.END)
        self.log.configure(state='disabled')
        self.root.update()

    def run_analysis(self):
        threading.Thread(target=self.analyze).start()


In [3]:
    def analyze(self):
        url = self.url_var.get()
        save_path = self.path_var.get()
        if not url or not save_path:
            messagebox.showwarning("입력 오류", "URL과 저장 경로를 모두 입력해주세요.")
            return

        total_csv_path = os.path.join(save_path, "total.csv")
        base_path = save_path + os.sep

        try:
            self.log_write("▶ 댓글 수집 시작...")
            driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()))
            driver.get(url)
            time.sleep(2)

            last_height = driver.execute_script("return document.documentElement.scrollHeight")
            while True:
                driver.execute_script("window.scrollTo(0, document.documentElement.scrollHeight);")
                time.sleep(1.5)
                new_height = driver.execute_script("return document.documentElement.scrollHeight")
                if new_height == last_height:
                    break
                last_height = new_height

            try:
                driver.find_element(By.CSS_SELECTOR, "#dismiss-button > a").click()
            except: pass

            time.sleep(1.5)
            buttons = driver.find_elements(By.CSS_SELECTOR, "#more-replies > a")
            for button in buttons:
                try:
                    button.send_keys(Keys.ENTER)
                    time.sleep(1)
                    button.click()
                except: continue

            html_source = driver.page_source
            soup = BeautifulSoup(html_source, 'html.parser')
            comment_list = soup.select("ytd-comment-thread-renderer #content-text")

            comment_final = []
            for comment in comment_list:
                cleaned = comment.text.replace('\n', '').replace('\t', '').replace('\r', '').strip()
                comment_final.append(cleaned)

            df = pd.DataFrame({"Comment": comment_final})
            df.to_csv(total_csv_path, encoding="utf-8-sig", index=False)
            driver.quit()
            self.log_write("✅ 댓글 수집 완료!")

            # 텍스트 전처리
            self.log_write("▶ 명사 추출 및 전처리 중...")
            hannanum = Hannanum()
            documents = [' '.join([w for w in hannanum.nouns(line) if len(w) >= 2]) for line in df["Comment"].tolist()]
            self.log_write("✅ 전처리 완료")

            # DTM/TTM 생성
            vectorizer = CountVectorizer()
            X = vectorizer.fit_transform(documents)
            terms = vectorizer.get_feature_names_out()
            ttm = pd.DataFrame(X.T.dot(X).toarray(), index=terms, columns=terms)
            ttm.to_csv(base_path + 'ttm.csv', encoding='utf-8-sig')

            # LDA 모델
            self.log_write("▶ LDA 모델 학습 중...")
            dictionary = corpora.Dictionary([doc.split() for doc in documents])
            corpus = [dictionary.doc2bow(doc.split()) for doc in documents]
            lda_model = LdaModel(corpus, id2word=dictionary, num_topics=5, random_state=42)
            with open(base_path + 'lda_model.pkl', 'wb') as f:
                pickle.dump(lda_model, f)
            lda_vis = gensimvis.prepare(lda_model, corpus, dictionary)
            pyLDAvis.save_html(lda_vis, base_path + 'LDA시각화.html')
            self.log_write("✅ LDA 시각화 저장 완료")

            # Word2Vec
            self.log_write("▶ Word2Vec 학습 중...")
            sentences = [doc.split() for doc in documents]
            w2v_model = Word2Vec(sentences=sentences, vector_size=100, window=4, min_count=1, epochs=10, sg=1)
            w2v_model.save(base_path + "w2v.model")

            word_vectors = w2v_model.wv
            vectors = word_vectors.vectors
            words = word_vectors.index_to_key
            with open(base_path + "w2v_vectors.csv", "w", encoding="utf-8-sig", newline='') as f:
                writer = csv.writer(f)
                writer.writerow(["word"] + [f"dim_{i}" for i in range(vectors.shape[1])])
                for word, vector in zip(words, vectors):
                    writer.writerow([word] + list(vector))

            pca = PCA(n_components=3)
            reduced_vectors = pca.fit_transform(vectors)
            kmeans = KMeans(n_clusters=5, random_state=42)
            labels = kmeans.fit_predict(vectors)

            fig = go.Figure(data=[go.Scatter3d(
                x=reduced_vectors[:, 0],
                y=reduced_vectors[:, 1],
                z=reduced_vectors[:, 2],
                mode='markers',
                marker=dict(size=5, color=labels, colorscale='Viridis', opacity=0.8),
                text=words,
                hovertemplate='%{text}'
            )])
            fig.update_layout(scene=dict(xaxis_title='PCA1', yaxis_title='PCA2', zaxis_title='PCA3'))
            fig.write_html(base_path + "Word2vec3D시각화.html")
            self.log_write("✅ Word2Vec 3D 시각화 완료")

            self.log_write("🎉 전체 분석 완료! 결과 파일이 저장되었습니다.")
            messagebox.showinfo("완료", "분석이 완료되었습니다!")

        except Exception as e:
            self.log_write(f"❌ 오류 발생: {str(e)}")
            messagebox.showerror("오류", str(e))


In [5]:
if __name__ == "__main__":
    root = tk.Tk()
    app = YouTubeAnalyzerApp(root)
    root.mainloop()


NameError: name 'tk' is not defined