<a href="https://colab.research.google.com/github/ANDIECOOLER2/YoutubeVideoAnalysis/blob/main/YoutubeAnalysis.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# import pandas as pd
# import requests
# from bs4 import BeautifulSoup
# import re

# # Define your niche keywords for topic research
# niche_keywords = ["calisthenics skills", "gymnastics rings training", "bodyweight strength", "advanced push-ups", "muscle-ups"]

# # Function to scrape YouTube search results for top-performing videos
# def get_top_videos(search_query, max_results=10):
#     search_url = f"https://www.youtube.com/results?search_query={search_query.replace(' ', '+')}"
#     response = requests.get(search_url)
#     soup = BeautifulSoup(response.text, 'html.parser')

#     video_data = []
#     for video in soup.find_all('a', href=True):
#         if '/watch?v=' in video['href']:
#             title = video.text.strip()
#             link = "https://www.youtube.com" + video['href']
#             video_data.append({"title": title, "link": link})
#             if len(video_data) >= max_results:
#                 break

#     return video_data

# # Function to extract trending topics from popular videos
# def analyze_video_titles(videos):
#     topics = []
#     for video in videos:
#         words = re.findall(r"\b\w+\b", video["title"].lower())
#         topics.extend(words)

#     # Get the most common words used in video titles (excluding stop words)
#     common_words = pd.Series(topics).value_counts().head(10)
#     return common_words

# # Function to analyze thumbnails using OpenAI Vision API (Placeholder for future integration)
# def analyze_thumbnails(video_links):
#     thumbnail_data = {}
#     for link in video_links:
#         # Placeholder for thumbnail analysis (e.g., extracting color, text, faces)
#         thumbnail_data[link] = "Analysis Pending"
#     return thumbnail_data

# # Scrape top-performing videos for each niche keyword
# all_videos = {}
# for keyword in niche_keywords:
#     videos = get_top_videos(keyword)
#     all_videos[keyword] = videos

# # Analyze top video titles to find trending topics
# trending_topics = {}
# for keyword, videos in all_videos.items():
#     trending_topics[keyword] = analyze_video_titles(videos)

# # Extract video links for thumbnail analysis
# video_links = [video["link"] for videos in all_videos.values() for video in videos]
# thumbnail_analysis = analyze_thumbnails(video_links)

# # Convert results to DataFrame and save for analysis
# df_topics = pd.DataFrame(trending_topics)
# df_topics.to_csv("trending_topics.csv", index=False)
# df_thumbnails = pd.DataFrame(list(thumbnail_analysis.items()), columns=["Video Link", "Thumbnail Analysis"])
# df_thumbnails.to_csv("thumbnail_analysis.csv", index=False)

# print("Trending topics and thumbnail analysis complete! Check CSV files for insights.")


In [2]:
from google.colab import userdata
import pandas as pd
import googleapiclient.discovery
import time
import os

# Load API key from environment variable
youtube_api_key = userdata.get('ytdataAPIKEY')
if not youtube_api_key:
    raise ValueError("Missing YouTube API key. Set the YOUTUBE_API_KEY environment variable.")

youtube = googleapiclient.discovery.build("youtube", "v3", developerKey=youtube_api_key)

# Define your niche keywords for topic research
niche_keywords = ["calisthenics skills progression"]

# Function to fetch video search results
def search_videos(search_query, max_results=10):
    request = youtube.search().list(
        q=search_query,
        part="snippet",
        maxResults=max_results,
        type="video",
        order="viewCount"
    )
    response = request.execute()

    video_results = []
    for item in response.get("items", []):
        video_results.append({
            "video_id": item["id"]["videoId"],
            "title": item["snippet"]["title"],
            "channel": item["snippet"]["channelTitle"]
        })

    return video_results

# Function to fetch video statistics in batches
def get_video_statistics(video_ids):
    stats_request = youtube.videos().list(
        part="statistics",
        id=",".join(video_ids)
    )
    stats_response = stats_request.execute()

    stats_data = {}
    for item in stats_response.get("items", []):
        stats_data[item["id"]] = {
            "views": int(item["statistics"].get("viewCount", 0)),
            "likes": int(item["statistics"].get("likeCount", 0))
        }

    return stats_data

# Function to fetch and store popular videos
def fetch_popular_videos():
    all_videos = []
    for keyword in niche_keywords:
        videos = search_videos(keyword)
        video_ids = [video["video_id"] for video in videos]

        if video_ids:
            stats = get_video_statistics(video_ids)

            for video in videos:
                video_id = video["video_id"]
                video["views"] = stats.get(video_id, {}).get("views", 0)
                video["likes"] = stats.get(video_id, {}).get("likes", 0)
                video["link"] = f"https://www.youtube.com/watch?v={video_id}"
                all_videos.append(video)

        time.sleep(1)  # Avoid hitting API rate limits

    return all_videos

video_data = fetch_popular_videos()

# Add timestamp for cache versioning
timestamp = time.strftime("%Y%m%d-%H%M%S")
filename = f"popular_videos_cache_{timestamp}.csv"

df_videos = pd.DataFrame(video_data)
df_videos.to_csv(filename, index=False)

print(f"Popular videos data saved for caching: {filename}")


Popular videos data saved for caching: popular_videos_cache_20250226-210507.csv


In [3]:
df_videos

Unnamed: 0,video_id,title,channel,views,likes,link
0,vczr0WuYK9g,Calisthenics for Beginners | In Depth Step-by-...,Leo Wang,5788562,319956,https://www.youtube.com/watch?v=vczr0WuYK9g
1,1mlN0yuxoLE,"Calisthenics for Complete Beginners (Tips, Exe...",yvguo,5649123,330467,https://www.youtube.com/watch?v=1mlN0yuxoLE
2,lo792RZ245Y,5 Calisthenics Skills Beginners Can Learn at H...,Ashton Fitness,5462206,240039,https://www.youtube.com/watch?v=lo792RZ245Y
3,geUKI-2uWaY,BEGINNER CALISTHENICS WORKOUT,Meli,4174799,198243,https://www.youtube.com/watch?v=geUKI-2uWaY
4,BeGUXwZmATQ,All Planche Progressions from 0 to Full,Vitaly Pavlenko,3303909,118207,https://www.youtube.com/watch?v=BeGUXwZmATQ
5,WhLialqwJDM,Skills that are easier than they look #calisth...,Wan Aesthenix,3280175,215054,https://www.youtube.com/watch?v=WhLialqwJDM
6,TKYZTbyQQHY,Top 10 Rules of Calisthenics (FOLLOW OR FAIL),FitnessFAQs,3258964,96252,https://www.youtube.com/watch?v=TKYZTbyQQHY
7,kdB-UJg3s-Q,First what you should learn in Calisthenics | ...,Andry Strong,3205262,0,https://www.youtube.com/watch?v=kdB-UJg3s-Q
8,hByjG7mymdw,Pull-Ups LVL 1-10 (How To Progress Faster),CHRIS HERIA,2787819,91878,https://www.youtube.com/watch?v=hByjG7mymdw
9,_iYvlSMgUGE,HOW TO MUSCLE UP - THE BEST WAY,CHRIS HERIA,2738874,80064,https://www.youtube.com/watch?v=_iYvlSMgUGE
