## YouTube videos link comments using web scrapping

In [1]:
import pandas as pd
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from bs4 import BeautifulSoup
import time

import spacy
import string
from nltk.probability import FreqDist

import warnings
warnings.filterwarnings("ignore")

In [2]:
url = "https://www.youtube.com/watch?v=Gs15V79cauo"
chromedriver_path = r"C:\Users\ricky\Downloads\chromedriver_win32 (1)\chromedriver.exe" 

chrome_options = Options().add_argument("--headless")  # Run Chrome in headless mode

# To create a new Chrome web driver
driver = webdriver.Chrome(executable_path=chromedriver_path, options=chrome_options)
driver.get(url)

# Scroll down to load comments
last_height = driver.execute_script("return document.documentElement.scrollHeight")
print(last_height)

while True:
    driver.execute_script("window.scrollTo(0, document.documentElement.scrollHeight);")
    time.sleep(20)
    
    new_height = driver.execute_script("return document.documentElement.scrollHeight")
    if new_height == last_height:
        break
    last_height = new_height
    
html = driver.page_source
driver.quit()

# Parse the HTML content with BeautifulSoup
soup = BeautifulSoup(html, 'html.parser')

# Extract the comment elements
comment_elements = soup.find_all('yt-formatted-string', class_='style-scope ytd-comment-renderer')
comments = [comment.text for comment in comment_elements]

df = pd.DataFrame({'Comment': comments})
df.to_csv('datasets/Youtube_video_comments.csv', header=True, index=False)
df.head()

765


Unnamed: 0,Comment
0,Hi @KrishNaik Can you make a playlist that say...
1,Hi @KrishNaik Can you make a playlist which sa...
2,"Hi Krish,\nThanks a lot for this. This is just..."
3,@Krish Naik : Yes please we need an entire ser...
4,The amount of help you are providing is just a...


In [3]:
df.shape

(31, 1)

In [4]:
# To define the most demanding topic in that videos comments section

nlp = spacy.load('en_core_web_sm')
stop_words = spacy.lang.en.stop_words.STOP_WORDS

def preprocess_text(text):
    tokens = nlp(text)
    tokens = [word.lemma_.lower().strip() if word.lemma_ != '-PRON-' else word.lower() for word in tokens]
    tokens = [word for word in tokens if word not in stop_words]
    tokens = [word for word in tokens if word not in string.punctuation]
    tokens = ' '.join(tokens)
    
    return tokens

df['Comment'] = df['Comment'].apply(preprocess_text)
df.head()

Unnamed: 0,Comment
0,hi @krishnaik playlist day deployment cover en...
1,hi @krishnaik playlist day deployment cover en...
2,hi krish thank lot amazing stuff happy action ...
3,@krish naik yes need entire series deployment ...
4,help provide amazing rock educate ️


In [5]:
df['Comment'] = df['Comment'].str.replace('@', '')
df['Comment'] = df['Comment'].str.replace('krishnaik ', 'krish naik ')
df.head()

Unnamed: 0,Comment
0,hi krish naik playlist day deployment cover en...
1,hi krish naik playlist day deployment cover en...
2,hi krish thank lot amazing stuff happy action ...
3,krish naik yes need entire series deployment u...
4,help provide amazing rock educate ️


In [6]:
tokens = ' '.join(df['Comment']).split()
tokens

['hi',
 'krish',
 'naik',
 'playlist',
 'day',
 'deployment',
 'cover',
 'end',
 'end',
 'deployment',
 'series',
 'include',
 'ml',
 'model',
 'dl',
 'nlp',
 'model',
 'use',
 'docker',
 'fast',
 'api',
 'thank',
 'amazing',
 'series',
 'hi',
 'krish',
 'naik',
 'playlist',
 'day',
 'deployment',
 'cover',
 'end',
 'end',
 'deployment',
 'series',
 'include',
 'ml',
 'model',
 'dl',
 'nlp',
 'model',
 'use',
 'docker',
 'fast',
 'api',
 'thank',
 'amazing',
 'series',
 'hi',
 'krish',
 'thank',
 'lot',
 'amazing',
 'stuff',
 'happy',
 'action',
 'happen',
 'automatically',
 'thankfully',
 '1',
 'error',
 'resolve',
 'rerun',
 'workflow',
 'github',
 ':-)',
 'video',
 'explain',
 'exactly',
 'happen',
 'main.yaml',
 'file',
 'line',
 'line',
 'like',
 'black',
 'box',
 'brief',
 'info',
 'abt',
 'appreciated',
 'krish',
 'naik',
 'yes',
 'need',
 'entire',
 'series',
 'deployment',
 'use',
 'docker',
 'container',
 'youtube',
 'allow',
 '1000like',
 'continue',
 'nlp',
 'series',
 'ber

In [7]:
for token in tokens:
    print(token, end =" ")

hi krish naik playlist day deployment cover end end deployment series include ml model dl nlp model use docker fast api thank amazing series hi krish naik playlist day deployment cover end end deployment series include ml model dl nlp model use docker fast api thank amazing series hi krish thank lot amazing stuff happy action happen automatically thankfully 1 error resolve rerun workflow github :-) video explain exactly happen main.yaml file line line like black box brief info abt appreciated krish naik yes need entire series deployment use docker container youtube allow 1000like continue nlp series bert gpt literally love help provide amazing rock educate ️ hello try replicate deployment wonder template yaml decide choose thank thank great service nation .. create employment contribute nation build directly .. yes dockers github actions series thank share knowledge <3 role github integration gitubaction docker content exactly need appreciate create video create production api form tes

In [8]:
most_common_words = FreqDist(tokens).most_common(10)

for word, freq in most_common_words:
    print(f'{word}: {freq}')

video: 11
thank: 10
deployment: 9
series: 8
use: 8
sir: 7
krish: 6
docker: 6
amazing: 6
end: 5
