In [1]:
import time
import pandas as pd
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.action_chains import ActionChains
from selenium.common.exceptions import NoSuchElementException

In [2]:
driver = webdriver.Chrome()
driver.get("https://www.youtube.com/")

In [3]:
search_box = driver.find_element(By.XPATH, '//*[@id="center"]/yt-searchbox/div[1]/form/input')
search_box.send_keys("Deepseek")
search_box.send_keys(Keys.RETURN)
time.sleep(5)

In [23]:
max_scrolls = 1
csv_filename = "youtube.csv"
unique_movies = set()
for _ in range(max_scrolls):
    print("Scrolling down...")
    driver.execute_script("window.scrollTo(0, document.documentElement.scrollHeight);")
    time.sleep(5)  

    main_div = driver.find_element(By.XPATH, "//*[@id='contents']")
    movies = main_div.find_elements(By.XPATH, "//*[@id='contents']/ytd-video-renderer")

    print(len(movies))

    for movie in movies:
        try:
            movie_anchor = movie.find_element(By.ID, "video-title")
            mov_url = movie_anchor.get_attribute("href")
            movie_title = movie_anchor.text.strip()
            views = movie.find_element(By.XPATH, ".//*[@id='metadata-line']/span[1]").text.strip()
            channel = movie.find_element(By.XPATH, ".//*[@id='text']/a").text.strip()
            date = movie.find_element(By.XPATH, ".//*[@id='metadata-line']/span[2]").text.strip()
            duration_element = movie.find_elements(By.XPATH, ".//*[@id='overlays']/ytd-thumbnail-overlay-time-status-renderer/div[1]/badge-shape/div")
            duration = duration_element[0].text.strip() if duration_element else "N/A"

            movie_entry = (movie_title, mov_url, views, channel, date, duration)

            if movie_entry not in unique_movies:
                unique_movies.add(movie_entry)

        except Exception as e:
            print(f"Skipping movie due to error: {e}")

df = pd.DataFrame(list(unique_movies), columns=["Title", "URL", "Views", "Channel", "Upload Date", "Duration"])
df.to_csv(csv_filename, index=False)
print(f"Final CSV saved: {csv_filename} with {len(unique_movies)} unique movies.")


Scrolling down...
530
Final CSV saved: youtube.csv with 519 unique movies.


In [24]:
data = pd.read_csv("youtube.csv")
data


Unnamed: 0,Title,URL,Views,Channel,Upload Date,Duration
0,How To Earn 200$ with Deepseek AI (FREE) | Cre...,https://www.youtube.com/watch?v=6Y287V4PPUI&pp...,18K views,,4 days ago,
1,รู้จัก DeepSeek AI น้องใหม่ ทำไมถึงมาแรงยิ่งกว...,https://www.youtube.com/watch?v=OMi23SYvfO4&pp...,87K views,,2 weeks ago,
2,DeepSeek AI Kya Hai? #deepseek #sscutkarsh #sh...,https://www.youtube.com/shorts/i3RQV3mK3KY,28K views,,2 weeks ago,
3,Should you sell Nvidia’s stock? #nvidia #ai #d...,https://www.youtube.com/shorts/KqmSIIISTDw,25K views,,2 weeks ago,
4,Deepseek R1 671b Local Ai Takes How Much Power...,https://www.youtube.com/shorts/8bPn0Yl-PK4,101K views,,2 weeks ago,
...,...,...,...,...,...,...
514,China caught LYING about DeepSeek Ai,https://www.youtube.com/watch?v=ZuMPPf3D5R8&pp...,347K views,,2 weeks ago,
515,DeepSeek Vs ChatGPT For AI Content Creators | ...,https://www.youtube.com/watch?v=3Ga8hu4PXhw&pp...,24K views,,2 days ago,9:51
516,Deepseek R1 | एक चाइनीज़ App ने Donald Trump औ...,https://www.youtube.com/watch?v=k_kjMc-bZEQ&pp...,277K views,,2 weeks ago,
517,DeepSeek-R1 Paper Explained - A New RL LLMs Er...,https://www.youtube.com/watch?v=DCqqCLlsIBU&pp...,74K views,,3 weeks ago,


In [25]:
input_csv = "youtube.csv"
output_csv = "youtube_details.csv"

df = pd.read_csv(input_csv)
driver = webdriver.Chrome()
movie_details = []

for index, row in df.iterrows():
    movie_url = row["URL"]
    video_name = row["Title"]
    print(f"Scraping details for: {video_name}")
    driver.get(movie_url)
    time.sleep(5)

    try:
        description_elements = driver.find_elements(By.XPATH, '//*[@id="attributed-snippet-text"]/span/span')
        description = " ".join([desc.text.strip() for desc in description_elements]) if description_elements else "N/A"

        try:
            likes = driver.find_element(By.XPATH, '//*[@id="top-level-buttons-computed"]/segmented-like-dislike-button-view-model/yt-smartimation/div/div/like-button-view-model/toggle-button-view-model/button-view-model/button/yt-touch-feedback-shape/div/div[2]').text.strip()
        except:
            likes = "N/A"

        try:
            channel = driver.find_element(By.XPATH, '//*[@id="text"]/a').text.strip()
        except:
            channel = "N/A"

        movie_details.append({
            "Title": video_name,
            "URL": movie_url,
            "Channel": channel,
            "Likes": likes,
            "Description": description
        })

    except Exception as e:
        print(f"Skipping due to error: {e}")

df_details = pd.DataFrame(movie_details)
df_details.to_csv(output_csv, index=False)

print(f"Video details saved in {output_csv}")

Scraping details for: How To Earn 200$ with Deepseek AI (FREE) | Create Tool Website Using Deepseek AI | Deepseek R1
Scraping details for: รู้จัก DeepSeek AI น้องใหม่ ทำไมถึงมาแรงยิ่งกว่าม้ามืด I Ceemeagain
Scraping details for: DeepSeek AI Kya Hai? #deepseek #sscutkarsh #shorts
Scraping details for: Should you sell Nvidia’s stock? #nvidia #ai #deepseek #stockmarket
Scraping details for: Deepseek R1 671b Local Ai Takes How Much Power?! 👀⚡💸
Scraping details for: 【DeepSeek】米テックを抜く？AI専門家「超すごい」開発コストの安さとド根性に驚愕…スターゲート計画は？東大で7千人受講希望の研究者が解説｜アベヒル
Scraping details for: DeepSeek stole our tech... says OpenAI
Scraping details for: HOW TO MAKE MONEY WITH GOOGLE & DEEPSEEK (Step By Step Tutorial)
Scraping details for: DeepSeek-R1 Logo Design is INSANE 🐳💥
Scraping details for: DEEPSEEK AI CHINA KALAHKAN ChatGPT DAN BIKIN SAHAM AMERIKA ANJLOK RIBUAN TRILIUN
Scraping details for: Deepseek DEEP Agent: This AI Agent CAN CONTROL 1000s OF BROWSERS AT ONCE! (Deep Research)
Scraping details for: How Did Chin

InvalidSessionIdException: Message: invalid session id
Stacktrace:
	GetHandleVerifier [0x00007FF613BC6F15+28773]
	(No symbol) [0x00007FF613B32600]
	(No symbol) [0x00007FF6139C8DDC]
	(No symbol) [0x00007FF613A0F17F]
	(No symbol) [0x00007FF613A47112]
	(No symbol) [0x00007FF613A41AA9]
	(No symbol) [0x00007FF613A40B59]
	(No symbol) [0x00007FF613995595]
	GetHandleVerifier [0x00007FF613F1425D+3490733]
	GetHandleVerifier [0x00007FF613F2BA43+3586963]
	GetHandleVerifier [0x00007FF613F2147D+3544525]
	GetHandleVerifier [0x00007FF613C8C9DA+838442]
	(No symbol) [0x00007FF613B3D04F]
	(No symbol) [0x00007FF6139941AE]
	GetHandleVerifier [0x00007FF613F9EDA8+4058872]
	BaseThreadInitThunk [0x00007FFC3672E8D7+23]
	RtlUserThreadStart [0x00007FFC374FBF2C+44]
