<a href="https://colab.research.google.com/github/PoojaDD-18/EnglishToFrenchTranslation/blob/main/videoRecommend2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import os
os.environ['YOUTUBE_API_KEY'] = 'AIzaSyAtTRGo1r2Lpy8Tfzz9tfCAz2kUx4LgX9c'
print("YOUTUBE_API_KEY has been set as an environment variable.")

YOUTUBE_API_KEY has been set as an environment variable.


In [None]:
from googleapiclient.discovery import build
from googleapiclient.errors import HttpError
from datetime import datetime, timedelta
from sklearn.preprocessing import MinMaxScaler
import os

# --- Configuration ---
API_KEY = os.environ.get('AIzaSyAtTRGo1r2Lpy8Tfzz9tfCAz2kUx4LgX9c')  # Ensure you have your API key as an environment variable
MAX_SEARCH_RESULTS = 50
TOP_N = 10
MIN_VIEW_COUNT = 1000
RELEVANCE_LANGUAGE = "en"
RECENCY_BOOST_DAYS = 30
RECENCY_BOOST_FACTOR = 0.1

def calculate_engagement_rate(likes, comments, views):
    """Calculates the engagement rate of a video."""
    if views > 0:
        return (likes + comments) / views
    return 0

def calculate_recency_factor(published_at):
    """Calculates a recency factor to boost recent videos."""
    published_date = datetime.strptime(published_at[:10], '%Y-%m-%d').date()
    today = datetime.now().date()
    age = (today - published_date).days
    if age <= RECENCY_BOOST_DAYS:
        return RECENCY_BOOST_FACTOR
    return 0

def normalize_data(data):
    """Normalizes a list of numerical data using min-max scaling."""
    if not data:
        return []
    scaler = MinMaxScaler()
    normalized_data = scaler.fit_transform([[x] for x in data])
    return [item[0] for item in normalized_data]

def search_youtube_videos(youtube, topic, max_results=MAX_SEARCH_RESULTS):
    """Searches YouTube for videos based on the given topic."""
    try:
        search_response = youtube.search().list(
            q=topic,
            type="video",
            maxResults=max_results,
            part="snippet",
            relevanceLanguage=RELEVANCE_LANGUAGE,
            order="relevance"
        ).execute()
        video_ids = [item['id']['videoId'] for item in search_response.get('items', [])]
        return video_ids
    except HttpError as e:
        print(f"An HTTP error {e.resp.status} occurred:\n{e.content}")
        return []

def fetch_video_stats(youtube, video_ids):
    """Fetches detailed statistics for a list of video IDs."""
    try:
        video_response = youtube.videos().list(
            id=','.join(video_ids),
            part="statistics,snippet,contentDetails"
        ).execute()
        videos_data = {}
        for item in video_response.get('items', []):
            video_id = item['id']
            snippet = item.get('snippet', {})
            statistics = item.get('statistics', {})
            content_details = item.get('contentDetails', {})

            view_count = int(statistics.get('viewCount', 0))
            like_count = int(statistics.get('likeCount', 0))
            comment_count = int(statistics.get('commentCount', 0))
            published_at = snippet.get('publishedAt')
            channel_name = snippet.get('channelTitle')
            title = snippet.get('title')
            captions = content_details.get('caption') == 'true'

            videos_data[video_id] = {
                'title': title,
                'channelName': channel_name,
                'viewCount': view_count,
                'likeCount': like_count,
                'commentCount': comment_count,
                'publishedAt': published_at,
                'hasEnglishCaptions': captions,
            }
        return videos_data
    except HttpError as e:
        print(f"An HTTP error {e.resp.status} occurred:\n{e.content}")
        return {}

def rank_videos(videos_data):
    """Ranks videos based on the defined criteria."""
    ranked_videos = []
    view_counts = [data['viewCount'] for data in videos_data.values()]
    like_counts = [data['likeCount'] for data in videos_data.values()]

    normalized_views = normalize_data(view_counts)
    normalized_likes = normalize_data(like_counts)

    for video_id, data in videos_data.items():
        if data['viewCount'] >= MIN_VIEW_COUNT:
            engagement_rate = calculate_engagement_rate(data['likeCount'], data['commentCount'], data['viewCount'])
            recency_factor = calculate_recency_factor(data['publishedAt'])

            view_index = list(videos_data.keys()).index(video_id)
            normalized_view_count = normalized_views[view_index]
            normalized_like_count = normalized_likes[view_index]

            score = (
                0.4 * normalized_view_count +
                0.2 * normalized_like_count +
                0.2 * engagement_rate +
                0.2 * recency_factor
            )
            ranked_videos.append({
                'videoId': video_id,
                'title': data['title'],
                'channelName': data['channelName'],
                'viewCount': data['viewCount'],
                'likeCount': data['likeCount'],
                'engagementRate': engagement_rate,
                'score': score,
                'publishedAt': data['publishedAt'],
                'hasEnglishCaptions': data['hasEnglishCaptions'],
            })

    # Optional: Filter out videos without English captions
    # ranked_videos = [video for video in ranked_videos if video['hasEnglishCaptions']]

    ranked_videos.sort(key=lambda item: item['score'], reverse=True)
    return ranked_videos

def output_top_videos(ranked_videos, top_n=TOP_N):
    """Prints the top N ranked videos."""
    print(f"\n--- Top {top_n} Ranked Videos ---")
    for i, video in enumerate(ranked_videos[:top_n]):
        url = f"https://www.youtube.com/watch?v={video['videoId']}"
        print(f"\nRank {i+1}:")
        print(f"  Title: {video['title']}")
        print(f"  Video ID: {video['videoId']}")
        print(f"  Channel Name: {video['channelName']}")
        print(f"  View Count: {video['viewCount']:,}")
        print(f"  Like Count: {video['likeCount']:,}")
        print(f"  Engagement Rate: {video['engagementRate']:.4f}")
        print(f"  URL: {url}")

if __name__ == "__main__":
    topic = input("Enter the topic to search on YouTube: ")

    if not API_KEY:
        print("Error: API_KEY environment variable not set.")
    else:
        try:
            youtube = build("youtube", "v3", developerKey=API_KEY)
            video_ids = search_youtube_videos(youtube, topic)

            if video_ids:
                videos_data = fetch_video_stats(youtube, video_ids)

                if videos_data:
                    ranked_videos = rank_videos(videos_data)
                    output_top_videos(ranked_videos)
                else:
                    print("No video statistics found for the searched videos.")
            else:
                print("No videos found for the given topic.")

        except Exception as e:
            print(f"An error occurred: {e}")

Enter the topic to search on YouTube: Data Science
Error: API_KEY environment variable not set.
