# YT Comments analysis

In [67]:
from googleapiclient.discovery import build
import pandas as pd
from api import API_KEY

channel_id = "UCWeg2Pkate69NFdBeuRFTAw" #Squeezie channel

youtube = build('youtube', 'v3', developerKey=API_KEY)

exemple_video = "br-Ao33SfMw"

In [68]:
def get_channel_stats(youtube, channel_id):
    request = youtube.channels().list(
        part="snippet,contentDetails,statistics",
        id=channel_id
    )
    response = request.execute()
    data = {
        "channel_name": response['items'][0]['snippet']['title'],
        "channel_id": response['items'][0]['id'],
        "stats": response['items'][0]['statistics']
    }
    return data

In [69]:
def get_video_comments(youtube,video_Id):
    request = youtube.commentThreads().list(
        part="snippet",
        videoId=video_Id,
        maxResults=1000,
    )
    response = request.execute()
    return response

In [70]:
def get_video_comments_texts(youtube, video_Id):
    request = youtube.commentThreads().list(
        part="snippet",
        videoId=video_Id,
        maxResults=1150,
    )
    response = request.execute()
    
    # Extract textOriginal from each item
    comments_texts = []
    for item in response.get("items", []):
        if "snippet" in item and "topLevelComment" in item["snippet"]:
            comment_snippet = item["snippet"]["topLevelComment"]["snippet"]
            if "textOriginal" in comment_snippet:
                comments_texts.append(comment_snippet["textOriginal"])
    
    return comments_texts

In [71]:
get_channel_stats(youtube, channel_id)
video = get_video_comments_texts(youtube, exemple_video)

In [72]:
pd.set_option('display.max_rows', False)
df = pd.DataFrame({"Com":video})

df

Unnamed: 0,Com
0,Thank you for enjoying Kamakura! Kamakura is a...
1,You didn't tell us if you liked your first mas...
2,I've never wanted to have my own child so badl...
3,"I absolutely love Mushroom farms, there is a s..."
4,Bro is legit making a documentary at this poin...
5,Bjorn is such a happy baby!
6,I am so happy for Felix. He worked so hard for...
7,PewDiePie reminds me that if I want to have ch...
8,That’s a cute baby omg❤❤
...,...


In [73]:
from langdetect import detect, LangDetectException

def detect_language(text):
    try:
        return detect(text)
    except LangDetectException:
        return "unknown"

df["Language"] = df["Com"].apply(detect_language)
df = df.loc[df['Language'] == 'en']
df

Unnamed: 0,Com,Language
0,Thank you for enjoying Kamakura! Kamakura is a...,en
1,You didn't tell us if you liked your first mas...,en
2,I've never wanted to have my own child so badl...,en
3,"I absolutely love Mushroom farms, there is a s...",en
4,Bro is legit making a documentary at this poin...,en
5,Bjorn is such a happy baby!,en
6,I am so happy for Felix. He worked so hard for...,en
7,PewDiePie reminds me that if I want to have ch...,en
8,That’s a cute baby omg❤❤,en
...,...,...


In [74]:
import nltk

from nltk.sentiment.vader import SentimentIntensityAnalyzer

from nltk.corpus import stopwords

from nltk.tokenize import word_tokenize

from nltk.stem import WordNetLemmatizer

import re

In [75]:
### A faire la premiere fois ###
#nltk.download('all')

In [76]:
def remove_punctuation(text):
    # Utilisation d'une expression régulière pour supprimer les caractères de ponctuation
    return re.sub(r'[^\w\s]', '', text)
df['No_ponctuation'] = df['Com'].apply(remove_punctuation)
df

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['No_ponctuation'] = df['Com'].apply(remove_punctuation)


Unnamed: 0,Com,Language,No_ponctuation
0,Thank you for enjoying Kamakura! Kamakura is a...,en,Thank you for enjoying Kamakura Kamakura is an...
1,You didn't tell us if you liked your first mas...,en,You didnt tell us if you liked your first mass...
2,I've never wanted to have my own child so badl...,en,Ive never wanted to have my own child so badly...
3,"I absolutely love Mushroom farms, there is a s...",en,I absolutely love Mushroom farms there is a sw...
4,Bro is legit making a documentary at this poin...,en,Bro is legit making a documentary at this poin...
5,Bjorn is such a happy baby!,en,Bjorn is such a happy baby
6,I am so happy for Felix. He worked so hard for...,en,I am so happy for Felix He worked so hard for ...
7,PewDiePie reminds me that if I want to have ch...,en,PewDiePie reminds me that if I want to have ch...
8,That’s a cute baby omg❤❤,en,Thats a cute baby omg
...,...,...,...


In [77]:
def preprocess(text):
    emoji_pattern = re.compile("["
        #u"\U0001F600-\U0001F64F"  # emoticons
        u"\U0001F300-\U0001F5FF"  # symbols & pictographs
        u"\U0001F680-\U0001F6FF"  # transport & map symbols
        u"\U0001F1E0-\U0001F1FF"  # flags (iOS)
                           "]+", flags=re.UNICODE)
    text_no_emojis = emoji_pattern.sub(r'', text)
    tokens = word_tokenize(text_no_emojis.lower())
    filtered_tokens = [token for token in tokens if token not in stopwords.words('english')]
    lemmatizer = WordNetLemmatizer()
    lemmatized_tokens = [lemmatizer.lemmatize(token) for token in filtered_tokens]
    processed_text = ' '.join(lemmatized_tokens)
    return processed_text

df["Preprocessing"] = df["No_ponctuation"].apply(preprocess)
df

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["Preprocessing"] = df["No_ponctuation"].apply(preprocess)


Unnamed: 0,Com,Language,No_ponctuation,Preprocessing
0,Thank you for enjoying Kamakura! Kamakura is a...,en,Thank you for enjoying Kamakura Kamakura is an...,thank enjoying kamakura kamakura ancient city ...
1,You didn't tell us if you liked your first mas...,en,You didnt tell us if you liked your first mass...,didnt tell u liked first massage massage revie...
2,I've never wanted to have my own child so badl...,en,Ive never wanted to have my own child so badly...,ive never wanted child badly make tear see pur...
3,"I absolutely love Mushroom farms, there is a s...",en,I absolutely love Mushroom farms there is a sw...,absolutely love mushroom farm swedish company ...
4,Bro is legit making a documentary at this poin...,en,Bro is legit making a documentary at this poin...,bro legit making documentary point true king y...
5,Bjorn is such a happy baby!,en,Bjorn is such a happy baby,bjorn happy baby
6,I am so happy for Felix. He worked so hard for...,en,I am so happy for Felix He worked so hard for ...,happy felix worked hard many year beautiful li...
7,PewDiePie reminds me that if I want to have ch...,en,PewDiePie reminds me that if I want to have ch...,pewdiepie reminds want child make sure give wo...
8,That’s a cute baby omg❤❤,en,Thats a cute baby omg,thats cute baby omg
...,...,...,...,...
