In [None]:
%pip install google-api-python-client langchain_openai langchain langchain_community pandas langchain-core langdetect

## Retrieve and Process youtube comments

In [None]:
import getpass
import os

if not os.environ.get("YOUTUBE_API_KEY"):
  os.environ["YOUTUBE_API_KEY"] = getpass.getpass("Enter API key for Youtube Data API: ")

In [None]:
from googleapiclient.discovery import build
import json


youtube = build('youtube', 'v3', developerKey=os.environ['YOUTUBE_API_KEY'])

def get_request(video_id, pageToken=None):
    request = youtube.commentThreads().list(
        part='snippet',
        videoId=video_id,
        textFormat='plainText',
        maxResults=100,
        pageToken=pageToken
    )
    return request
    
def get_comments(video_id):
    comments = []
    pageToken = None
    
    while True:
        req = get_request(video_id, pageToken)
        try:
            response = req.execute()
            
            for item in response['items']:
                comments.append(item['snippet']['topLevelComment']['snippet']['textDisplay'])
                if 'replies' in item:
                    for reply in item['replies']['comments']:
                        comments.append(reply['snippet']['textDisplay'])
        
            if 'nextPageToken' in response:
                pageToken = response['nextPageToken'] 
            else:
                break 

        except Exception as e:
            print(f"Error fetching comments: {e}")
            break 
    
    return comments


In [None]:
video_id = 'OY2x0TyKzIQ'
comments = get_comments(video_id)


In [None]:
len(comments)

## Using Langchain to classify the text and set the Pydantic Output Parser


In [None]:
import getpass
import os

if not os.environ.get("OPENAI_API_KEY"):
  os.environ["OPENAI_API_KEY"] = getpass.getpass("Enter API key for OpenAI: ")

In [None]:
from typing import List
from pydantic import BaseModel, Field

class Classification(BaseModel):
    pos_comments: List[str] = Field(
        default_factory=list, 
        description="If the sentiment is positive, provide the actual positive comments without changing it. If the feedback is not in English, please translate it into English to ensure clarity for all users."
    )
    neg_comments: List[str] = Field(
        default_factory=list,
        description="If the sentiment is negative, provide the actual negative comments without changing it. If the feedback is not in English, please translate it into English to ensure clarity for all users."
    )
    suggestions: List[str] = Field(
        default_factory=list,
        description="If the comments involve suggestions, provide the actual comments without changing it.If the feedback is not in English, please translate it into English to ensure clarity for all users."
    )

In [None]:
from langchain_openai import ChatOpenAI
from langchain_core.prompts import ChatPromptTemplate


tagging_prompt = ChatPromptTemplate.from_template(
    """
Extract the desired information from the following comments.

Only extract the properties mentioned in the 'Classification' function. If a property is not present, leave it blank.

Comments:
{comments}

Classification:
"""
)

llm = ChatOpenAI(temperature=0, model="gpt-4o-mini").with_structured_output(
    Classification
)

In [None]:
comments[:5]

In [None]:
res = llm.invoke(comments[:5]).model_dump()

In [None]:
res

In [None]:
chunk_size = 500

In [None]:
result = []

for i in range(0, len(comments), chunk_size):
    comments_chunk = comments[i:i + chunk_size] 
    prompt = tagging_prompt.invoke({"comments": comments_chunk}) 
    llm_result = llm.invoke(prompt).model_dump()
    result.append(llm_result)

In [None]:
result[0]

## Summarize the reviews

In [None]:
from langchain_core.documents import Document
    
pos_documents = [Document(page_content=' '.join(map(str, row['pos_comments']))) for row in result]

neg_documents = [Document(page_content=' '.join(map(str, row['neg_comments']))) for row in result]

suggestion_documents = [Document(page_content=' '.join(map(str, row['suggestions']))) for row in result ]



In [None]:
pos_documents

In [None]:
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate

prompt = ChatPromptTemplate.from_messages(
    [("system", "Write a concise summary of the reviews:\\n\\n{context}")]
)
llm = ChatOpenAI(temperature=0, model="gpt-4o-mini")

chain = create_stuff_documents_chain(llm, prompt)



In [None]:
chain.invoke({"context": pos_documents})


In [None]:
chain.invoke({"context": neg_documents})


In [None]:
chain.invoke({"context": suggestion_documents})
