#Final Project
Description:

This tool performs the following functions:

Retrieves information about a YouTube channel based on its handle.

Compares channels that I enter and recommends the best one among them.

Suggests a channel based on other channels that you input as ones you like.

##First try - All parts together

In [None]:
#Part 1 - retrives the information of the chanle or chanels i enterd

from googleapiclient.discovery import build
from collections import Counter
from datetime import datetime

# Replace with your own API key
API_KEY = 'AIzaSyCXEhvGzLjh6IjRogjjJ3CJ2g4J9P64Yho'

# Initialize the YouTube API client
youtube = build('youtube', 'v3', developerKey=API_KEY)

def get_channel_id(handle):
    """Find the channel ID by channel handle using YouTube Search API."""
    try:
        request = youtube.search().list(
            part='snippet',
            q=handle,
            type='channel'
        )
        response = request.execute()

        if 'items' in response and len(response['items']) > 0:
            channel_info = response['items'][0]
            return channel_info['id']['channelId']
        else:
            print(f"No channel found for handle {handle}")
            return None

    except Exception as e:
        print(f"Error finding channel ID for handle {handle}: {e}")
        return None

def get_all_video_categories(channel_id, max_results=50):
    """Fetch a limited number of videos from the channel and return a list of categories."""
    categories = []
    try:
        request = youtube.search().list(
            part='snippet',
            channelId=channel_id,
            maxResults=max_results,
            type='video'
        )
        response = request.execute()

        for item in response['items']:
            video_id = item['id']['videoId']

            # Get the category ID for each video
            video_request = youtube.videos().list(
                part='snippet',
                id=video_id
            )
            video_response = video_request.execute()

            if 'items' in video_response and len(video_response['items']) > 0:
                category_id = video_response['items'][0]['snippet']['categoryId']
                categories.append(category_id)

    except Exception as e:
        print(f"Error fetching video categories for channel ID {channel_id}: {e}")

    return categories

def get_category_name(category_id):
    """Fetch the category name from the category ID."""
    try:
        request = youtube.videoCategories().list(
            part='snippet',
            id=category_id
        )
        response = request.execute()

        if 'items' in response and len(response['items']) > 0:
            return response['items'][0]['snippet']['title']
        else:
            return None
    except Exception as e:
        print(f"Error fetching category name for category ID {category_id}: {e}")
        return None

def get_channel_data(channel_id):
    """Fetch channel data from YouTube API."""
    try:
        request = youtube.channels().list(
            part='snippet,statistics,contentDetails',
            id=channel_id
        )
        response = request.execute()

        if 'items' in response and len(response['items']) > 0:
            channel_info = response['items'][0]
            data = {
                'YouTube Handle': channel_info['snippet']['title'],
                'YouTube Channel Link': f"https://www.youtube.com/channel/{channel_id}",
                'YouTube Subscribers': channel_info['statistics'].get('subscriberCount', '0'),
                'YouTube Views': channel_info['statistics'].get('viewCount', '0'),
                'YouTube Videos': channel_info['statistics'].get('videoCount', '0')
            }
            return data
        else:
            print(f"No data found for channel ID {channel_id}")
            return None

    except Exception as e:
        print(f"Error fetching data for channel ID {channel_id}: {e}")
        return None

from datetime import datetime

def calculate_content_frequency(channel_id):
    """Calculate content upload frequency based on the last several video upload dates."""
    try:
        request = youtube.activities().list(
            part='snippet,contentDetails',
            channelId=channel_id,
            maxResults=50
        )
        response = request.execute()

        upload_dates = []
        for item in response.get('items', []):
            if 'publishedAt' in item['snippet']:
                # Adjust the format to account for the timezone offset
                upload_date = datetime.strptime(item['snippet']['publishedAt'], '%Y-%m-%dT%H:%M:%S%z')
                upload_dates.append(upload_date)

        # Calculate average frequency in days
        if len(upload_dates) > 1:
            upload_dates.sort(reverse=True)
            intervals = [(upload_dates[i] - upload_dates[i+1]).days for i in range(len(upload_dates)-1)]
            avg_interval = sum(intervals) / len(intervals)
            return avg_interval
        else:
            return None

    except Exception as e:
        print(f"Error calculating content frequency for channel ID {channel_id}: {e}")
        return None


def get_most_popular_video(channel_id):
    """Get the most popular video (highest view count) for a given channel."""
    try:
        request = youtube.search().list(
            part='snippet',
            channelId=channel_id,
            maxResults=1,
            type='video',
            order='viewCount'
        )
        response = request.execute()

        if 'items' in response and len(response['items']) > 0:
            popular_video = response['items'][0]
            video_id = popular_video['id']['videoId']
            video_title = popular_video['snippet']['title']
            return f"{video_title} (https://www.youtube.com/watch?v={video_id})"
        else:
            return None

    except Exception as e:
        print(f"Error fetching most popular video for channel ID {channel_id}: {e}")
        return None

def compare_channels(channel_handles):
    """Compare channels by retrieving their predominant categories and engagement rates."""
    comparison_data = []

    for handle in channel_handles:
        # Step 1: Get the channel ID
        channel_id = get_channel_id(handle)
        if not channel_id:
            continue

        # Step 2: Get all video categories of the channel
        categories = get_all_video_categories(channel_id)
        if not categories:
            print(f"Unable to retrieve categories for channel {handle}.")
            continue

        # Step 3: Find the most common category
        if categories:
            most_common_category_id, _ = Counter(categories).most_common(1)[0]
            category_name = get_category_name(most_common_category_id)

            # Step 4: Get channel data
            channel_data = get_channel_data(channel_id)
            if channel_data:
                # Step 5: Calculate engagement rate
                views = int(channel_data['YouTube Views'])
                subscribers = int(channel_data['YouTube Subscribers'])
                engagement_rate = (views / subscribers) * 100 if subscribers > 0 else 0

                # Step 6: Calculate content frequency
                content_frequency = calculate_content_frequency(channel_id)

                # Step 7: Get most popular video
                most_popular_video = get_most_popular_video(channel_id)

                channel_data['Most Common Category'] = category_name
                channel_data['Engagement Rate (%)'] = round(engagement_rate, 2)
                if content_frequency:
                    channel_data['Content Frequency'] = f"New content uploaded every {round(content_frequency, 2)} days"
                else:
                    channel_data['Content Frequency'] = "Not enough data to determine frequency"
                channel_data['Most Popular Video'] = most_popular_video or "No popular video found"
                comparison_data.append(channel_data)

    # Step 8: Display data
    print(f"\nChannel Data:\n")
    for channel in comparison_data:
        print(f"Handle: {channel['YouTube Handle']}")
        print(f"Channel Link: {channel['YouTube Channel Link']}")
        print(f"Subscribers: {channel['YouTube Subscribers']}")
        print(f"Views: {channel['YouTube Views']}")
        print(f"Videos: {channel['YouTube Videos']}")
        print(f"Engagement Rate: {channel['Engagement Rate (%)']}%")
        print(f"Overall Content Category: {channel['Most Common Category']}")
        print(f"Content Frequency: {channel['Content Frequency']}")
        print(f"Most Popular Video: {channel['Most Popular Video']}")
        print("-" * 50)

# Example part 1
print("Example Part 1: \n")
channel_handles = ['KaraandNate']  # Enter handles
compare_channels(channel_handles)



#Part 2 - compares channels i enter

import matplotlib.pyplot as plt

def compare_multiple_channels_and_recommend_with_graph(channel_handles):
    """Compare multiple channels and visualize the comparison with graphs."""
    comparison_data = []

    for handle in channel_handles:
        # Step 1: Get the channel ID
        channel_id = get_channel_id(handle)
        if not channel_id:
            continue

        # Step 2: Get the channel data (views, subscribers, videos)
        channel_data = get_channel_data(channel_id)
        if channel_data:
            # Step 3: Calculate engagement rate
            views = int(channel_data['YouTube Views'])
            subscribers = int(channel_data['YouTube Subscribers'])
            engagement_rate = (views / subscribers) * 100 if subscribers > 0 else 0

            # Step 4: Add the channel data to the comparison list
            channel_data['Engagement Rate (%)'] = round(engagement_rate, 2)
            comparison_data.append(channel_data)

    # Step 5: Sort by engagement rate and views for recommendation
    comparison_data_sorted_by_engagement = sorted(comparison_data, key=lambda x: x['Engagement Rate (%)'], reverse=True)
    comparison_data_sorted_by_views = sorted(comparison_data, key=lambda x: int(x['YouTube Views']), reverse=True)

    # Step 6: Display comparison results
    print("\nChannel Comparison based on Subscribers:")
    for i, channel in enumerate(comparison_data_sorted_by_subscribers, 1):
        print(f"{i}. {channel['YouTube Handle']} - {channel['YouTube Subscribers']} Subscribers")

    print("\nChannel Comparison based on Views:")
    for i, channel in enumerate(comparison_data_sorted_by_views, 1):
        print(f"{i}. {channel['YouTube Handle']} - {channel['YouTube Views']} Views")

    print("\nChannel Comparison based on Engagement Rate:")
    for i, channel in enumerate(comparison_data_sorted_by_engagement, 1):
        print(f"{i}. {channel['YouTube Handle']} - {channel['Engagement Rate (%)']}% Engagement Rate")

    print("-" * 50)

    # Step 7: Recommendation
    best_by_engagement = comparison_data_sorted_by_engagement[0]
    best_by_views = comparison_data_sorted_by_views[0]

    # Step 8: Visualize the comparison using bar charts
    handles = [channel['YouTube Handle'] for channel in comparison_data]
    subscribers = [int(channel['YouTube Subscribers']) for channel in comparison_data]
    views = [int(channel['YouTube Views']) for channel in comparison_data]
    engagement_rates = [channel['Engagement Rate (%)'] for channel in comparison_data]

    # Plot Subscribers
    plt.figure(figsize=(10, 6))
    plt.bar(handles, subscribers, color='skyblue')
    plt.xlabel('Channel')
    plt.ylabel('Subscribers')
    plt.title('Comparison of YouTube Channels by Subscribers')
    plt.xticks(rotation=45)
    plt.show()

    # Plot Views
    plt.figure(figsize=(10, 6))
    plt.bar(handles, views, color='lightgreen')
    plt.xlabel('Channel')
    plt.ylabel('Views')
    plt.title('Comparison of YouTube Channels by Views')
    plt.xticks(rotation=45)
    plt.show()

    # Plot Engagement Rate
    plt.figure(figsize=(10, 6))
    plt.bar(handles, engagement_rates, color='salmon')
    plt.xlabel('Channel')
    plt.ylabel('Engagement Rate (%)')
    plt.title('Comparison of YouTube Channels by Engagement Rate')
    plt.xticks(rotation=45)
    plt.show()

    # Step 9: Print Recommendation
    print("\n**Recommendation**:")
    print(f"Best Channel by Engagement Rate: {best_by_engagement['YouTube Handle']} with an engagement rate of {best_by_engagement['Engagement Rate (%)']}%")
    print(f"Best Channel by Views: {best_by_views['YouTube Handle']} with {best_by_views['YouTube Views']} views")

# Example Part 2
print("Example Part 2: \n")
channel_handles = ['KaraandNate', 'EamonAndBec', 'CaseyNeistat']  # Enter multiple channel handles
compare_multiple_channels_and_recommend_with_graph(channel_handles)


#Part 3 - compares a channel I enter and compares it with the top channels of the same  overall content category
nnel_data(channel_id)
    if not entered_channel_data:
        print("Unable to retrieve data for entered channel.")
        return

    entered_channel_data['Category'] = category_name
    entered_channel_views = int(entered_channel_data['YouTube Views'])
    entered_channel_subscribers = int(entered_channel_data['YouTube Subscribers'])
    entered_channel_engagement_rate = (entered_channel_views / entered_channel_subscribers) * 100 if entered_channel_subscribers > 0 else 0

    # Step 4: Display the entered channel
def compare_channel_with_top_in_category_and_recommend_with_graph(handle):
    """Compare an entered channel with top channels in the same content category and recommend the best with graphs."""
    # Step 1: Get the entered channel ID and category
    channel_id = get_channel_id(handle)
    if not channel_id:
        return

    # Get all video categories of the channel
    categories = get_all_video_categories(channel_id)
    if not categories:
        print(f"Unable to retrieve categories for channel {handle}.")
        return

    # Find the most common category
    most_common_category_id, _ = Counter(categories).most_common(1)[0]
    category_name = get_category_name(most_common_category_id)

    # Step 2: Find top channels in the same category
    print(f"\nTop channels in the category: {category_name}")
    top_channels = find_top_channels_in_category(category_name)

    # Step 3: Compare the entered channel with the top channels
    entered_channel_data = get_chaand the top channels with their data
    print(f"\nEntered Channel: {entered_channel_data['YouTube Handle']}")
    print(f"Subscribers: {entered_channel_data['YouTube Subscribers']}")
    print(f"Views: {entered_channel_data['YouTube Views']}")
    print(f"Engagement Rate: {entered_channel_engagement_rate:.2f}%")

    # Step 5: Display top channels and visualize the data
    top_channel_data = []
    handles = [entered_channel_data['YouTube Handle']]
    subscribers = [entered_channel_data['YouTube Subscribers']]
    views = [entered_channel_data['YouTube Views']]
    engagement_rates = [entered_channel_engagement_rate]

    for channel in top_channels:
        top_channel_id = get_channel_id(channel)
        top_channel_data = get_channel_data(top_channel_id)
        if top_channel_data:
            top_channel_views = int(top_channel_data['YouTube Views'])
            top_channel_subscribers = int(top_channel_data['YouTube Subscribers'])
            top_channel_engagement_rate = (top_channel_views / top_channel_subscribers) * 100 if top_channel_subscribers > 0 else 0

            handles.append(top_channel_data['YouTube Handle'])
            subscribers.append(top_channel_data['YouTube Subscribers'])
            views.append(top_channel_data['YouTube Views'])
            engagement_rates.append(top_channel_engagement_rate)

            print(f"\nTop Channel: {top_channel_data['YouTube Handle']}")
            print(f"Subscribers: {top_channel_data['YouTube Subscribers']}")
            print(f"Views: {top_channel_data['YouTube Views']}")
            print(f"Engagement Rate: {top_channel_engagement_rate:.2f}%")

    # Step 6: Plot the graphs for comparison
    # Plot Subscribers
    plt.figure(figsize=(10, 6))
    plt.bar(handles, subscribers, color='skyblue')
    plt.xlabel('Channel')
    plt.ylabel('Subscribers')
    plt.title(f'Comparison of YouTube Channels in {category_name} by Subscribers')
    plt.xticks(rotation=45)
    plt.show()

    # Plot Views
    plt.figure(figsize=(10, 6))
    plt.bar(handles, views, color='lightgreen')
    plt.xlabel('Channel')
    plt.ylabel('Views')
    plt.title(f'Comparison of YouTube Channels in {category_name} by Views')
    plt.xticks(rotation=45)
    plt.show()

    # Plot Engagement Rate
    plt.figure(figsize=(10, 6))
    plt.bar(handles, engagement_rates, color='salmon')
    plt.xlabel('Channel')
    plt.ylabel('Engagement Rate (%)')
    plt.title(f'Comparison of YouTube Channels in {category_name} by Engagement Rate')
    plt.xticks(rotation=45)
    plt.show()

    # Step 7: Recommend the best channel
    best_channel = max(zip(handles, subscribers, views, engagement_rates), key=lambda x: x[3])
    print(f"\n**Best Channel in {category_name}**: {best_channel[0]} with an engagement rate of {best_channel[3]:.2f}%")

# Example Part 3
print("Example Part 3: \n")
handle = 'KaraandNate'  # Enter your channel handle
compare_channel_with_top_in_category_and_recommend_with_graph(handle)


#Part 4 - suggest a channel based on other channels you input that you like

def suggest_channel_based_on_others(channel_handles):
    """Suggest a channel based on the similarities with other channels you like."""
    channel_data_list = []
    for handle in channel_handles:
        channel_id = get_channel_id(handle)
        if not channel_id:
            continue

        categories = get_all_video_categories(channel_id)
        if not categories:
            print(f"Unable to retrieve categories for channel {handle}.")
            continue

        if categories:
            most_common_category_id, _ = Counter(categories).most_common(1)[0]
            category_name = get_category_name(most_common_category_id)

            # Get the channel's data
            channel_data = get_channel_data(channel_id)
            if channel_data:
                # Calculate engagement rate for the channel
                views = int(channel_data['YouTube Views'])
                subscribers = int(channel_data['YouTube Subscribers'])
                engagement_rate = (views / subscribers) * 100 if subscribers > 0 else 0

                # Calculate content frequency for the channel
                content_frequency = calculate_content_frequency(channel_id)

                # Get the most popular video
                most_popular_video = get_most_popular_video(channel_id)

                channel_data['Engagement Rate (%)'] = round(engagement_rate, 2)
                if content_frequency:
                    channel_data['Content Frequency'] = f"New content uploaded every {round(content_frequency, 2)} days"
                else:
                    channel_data['Content Frequency'] = "Not enough data to determine frequency"
                channel_data['Most Popular Video'] = most_popular_video or "No popular video found"

                # Append channel data to list
                channel_data_list.append({
                    'channel_data': channel_data,
                    'category_name': category_name
                })

    # Suggest channels based on similar categories and engagement
    if channel_data_list:
        print("\nSuggested Channels Based on Your Preferences:\n")
        for data in channel_data_list:
            print(f"Suggested Channel: {data['channel_data']['YouTube Handle']}")
            print(f"Category: {data['category_name']}")
            print(f"Subscribers: {data['channel_data']['YouTube Subscribers']}")
            print(f"Views: {data['channel_data']['YouTube Views']}")
            print(f"Videos: {data['channel_data']['YouTube Videos']}")
            print(f"Engagement Rate: {data['channel_data']['Engagement Rate (%)']}%")
            print(f"Content Frequency: {data['channel_data']['Content Frequency']}")
            print(f"Most Popular Video: {data['channel_data']['Most Popular Video']}")
            print("-" * 50)

# Example part 4
print("Example Part 4: \n")
suggest_channel_based_on_others(['KaraandNate', 'EamonandBec'])  # Replace with channels you like


Example Part 1: 


Channel Data:

Handle: Kara and Nate
Channel Link: https://www.youtube.com/channel/UC4ijq8Cg-8zQKx8OH12dUSw
Subscribers: 3990000
Views: 875051058
Videos: 985
Engagement Rate: 21931.1%
Overall Content Category: Travel & Events
Content Frequency: New content uploaded every 2.45 days
Most Popular Video: WE LIVED IN A $650,000 EARTHROAMER (full tour) (https://www.youtube.com/watch?v=GvvA12oTQ94)
--------------------------------------------------
Example Part 2: 


Channel Comparison based on Subscribers:


NameError: name 'comparison_data_sorted_by_subscribers' is not defined

##Second try - All parts sepretlly

###Just part 1:

In [1]:
#Part 1 - retrives the information of the chanle or chanels i enterd

from googleapiclient.discovery import build
from collections import Counter
from datetime import datetime

# Replace with your own API key
API_KEY = 'AIzaSyCXEhvGzLjh6IjRogjjJ3CJ2g4J9P64Yho'

# Initialize the YouTube API client
youtube = build('youtube', 'v3', developerKey=API_KEY)

def get_channel_id(handle):
    """Find the channel ID by channel handle using YouTube Search API."""
    try:
        request = youtube.search().list(
            part='snippet',
            q=handle,
            type='channel'
        )
        response = request.execute()

        if 'items' in response and len(response['items']) > 0:
            channel_info = response['items'][0]
            return channel_info['id']['channelId']
        else:
            print(f"No channel found for handle {handle}")
            return None

    except Exception as e:
        print(f"Error finding channel ID for handle {handle}: {e}")
        return None

def get_all_video_categories(channel_id, max_results=50):
    """Fetch a limited number of videos from the channel and return a list of categories."""
    categories = []
    try:
        request = youtube.search().list(
            part='snippet',
            channelId=channel_id,
            maxResults=max_results,
            type='video'
        )
        response = request.execute()

        for item in response['items']:
            video_id = item['id']['videoId']

            # Get the category ID for each video
            video_request = youtube.videos().list(
                part='snippet',
                id=video_id
            )
            video_response = video_request.execute()

            if 'items' in video_response and len(video_response['items']) > 0:
                category_id = video_response['items'][0]['snippet']['categoryId']
                categories.append(category_id)

    except Exception as e:
        print(f"Error fetching video categories for channel ID {channel_id}: {e}")

    return categories

def get_category_name(category_id):
    """Fetch the category name from the category ID."""
    try:
        request = youtube.videoCategories().list(
            part='snippet',
            id=category_id
        )
        response = request.execute()

        if 'items' in response and len(response['items']) > 0:
            return response['items'][0]['snippet']['title']
        else:
            return None
    except Exception as e:
        print(f"Error fetching category name for category ID {category_id}: {e}")
        return None

def get_channel_data(channel_id):
    """Fetch channel data from YouTube API."""
    try:
        request = youtube.channels().list(
            part='snippet,statistics,contentDetails',
            id=channel_id
        )
        response = request.execute()

        if 'items' in response and len(response['items']) > 0:
            channel_info = response['items'][0]
            data = {
                'YouTube Handle': channel_info['snippet']['title'],
                'YouTube Channel Link': f"https://www.youtube.com/channel/{channel_id}",
                'YouTube Subscribers': channel_info['statistics'].get('subscriberCount', '0'),
                'YouTube Views': channel_info['statistics'].get('viewCount', '0'),
                'YouTube Videos': channel_info['statistics'].get('videoCount', '0')
            }
            return data
        else:
            print(f"No data found for channel ID {channel_id}")
            return None

    except Exception as e:
        print(f"Error fetching data for channel ID {channel_id}: {e}")
        return None

from datetime import datetime

def calculate_content_frequency(channel_id):
    """Calculate content upload frequency based on the last several video upload dates."""
    try:
        request = youtube.activities().list(
            part='snippet,contentDetails',
            channelId=channel_id,
            maxResults=50
        )
        response = request.execute()

        upload_dates = []
        for item in response.get('items', []):
            if 'publishedAt' in item['snippet']:
                # Adjust the format to account for the timezone offset
                upload_date = datetime.strptime(item['snippet']['publishedAt'], '%Y-%m-%dT%H:%M:%S%z')
                upload_dates.append(upload_date)

        # Calculate average frequency in days
        if len(upload_dates) > 1:
            upload_dates.sort(reverse=True)
            intervals = [(upload_dates[i] - upload_dates[i+1]).days for i in range(len(upload_dates)-1)]
            avg_interval = sum(intervals) / len(intervals)
            return avg_interval
        else:
            return None

    except Exception as e:
        print(f"Error calculating content frequency for channel ID {channel_id}: {e}")
        return None


def get_most_popular_video(channel_id):
    """Get the most popular video (highest view count) for a given channel."""
    try:
        request = youtube.search().list(
            part='snippet',
            channelId=channel_id,
            maxResults=1,
            type='video',
            order='viewCount'
        )
        response = request.execute()

        if 'items' in response and len(response['items']) > 0:
            popular_video = response['items'][0]
            video_id = popular_video['id']['videoId']
            video_title = popular_video['snippet']['title']
            return f"{video_title} (https://www.youtube.com/watch?v={video_id})"
        else:
            return None

    except Exception as e:
        print(f"Error fetching most popular video for channel ID {channel_id}: {e}")
        return None

def compare_channels(channel_handles):
    """Compare channels by retrieving their predominant categories and engagement rates."""
    comparison_data = []

    for handle in channel_handles:
        # Step 1: Get the channel ID
        channel_id = get_channel_id(handle)
        if not channel_id:
            continue

        # Step 2: Get all video categories of the channel
        categories = get_all_video_categories(channel_id)
        if not categories:
            print(f"Unable to retrieve categories for channel {handle}.")
            continue

        # Step 3: Find the most common category
        if categories:
            most_common_category_id, _ = Counter(categories).most_common(1)[0]
            category_name = get_category_name(most_common_category_id)

            # Step 4: Get channel data
            channel_data = get_channel_data(channel_id)
            if channel_data:
                # Step 5: Calculate engagement rate
                views = int(channel_data['YouTube Views'])
                subscribers = int(channel_data['YouTube Subscribers'])
                engagement_rate = (views / subscribers) * 100 if subscribers > 0 else 0

                # Step 6: Calculate content frequency
                content_frequency = calculate_content_frequency(channel_id)

                # Step 7: Get most popular video
                most_popular_video = get_most_popular_video(channel_id)

                channel_data['Most Common Category'] = category_name
                channel_data['Engagement Rate (%)'] = round(engagement_rate, 2)
                if content_frequency:
                    channel_data['Content Frequency'] = f"New content uploaded every {round(content_frequency, 2)} days"
                else:
                    channel_data['Content Frequency'] = "Not enough data to determine frequency"
                channel_data['Most Popular Video'] = most_popular_video or "No popular video found"
                comparison_data.append(channel_data)

    # Step 8: Display data
    print(f"\nChannel Data:\n")
    for channel in comparison_data:
        print(f"Handle: {channel['YouTube Handle']}")
        print(f"Channel Link: {channel['YouTube Channel Link']}")
        print(f"Subscribers: {channel['YouTube Subscribers']}")
        print(f"Views: {channel['YouTube Views']}")
        print(f"Videos: {channel['YouTube Videos']}")
        print(f"Engagement Rate: {channel['Engagement Rate (%)']}%")
        print(f"Overall Content Category: {channel['Most Common Category']}")
        print(f"Content Frequency: {channel['Content Frequency']}")
        print(f"Most Popular Video: {channel['Most Popular Video']}")
        print("-" * 50)

# Example part 1
print("Example Part 1: \n")
channel_handles = ['KaraandNate']  # Enter handles
compare_channels(channel_handles)

Example Part 1: 


Channel Data:

Handle: Kara and Nate
Channel Link: https://www.youtube.com/channel/UC4ijq8Cg-8zQKx8OH12dUSw
Subscribers: 3990000
Views: 875352834
Videos: 985
Engagement Rate: 21938.67%
Overall Content Category: Travel & Events
Content Frequency: New content uploaded every 2.45 days
Most Popular Video: WE LIVED IN A $650,000 EARTHROAMER (full tour) (https://www.youtube.com/watch?v=GvvA12oTQ94)
--------------------------------------------------


###Just part 2

In [3]:
#Part 2 - compares channels i enter

import matplotlib.pyplot as plt
from googleapiclient.discovery import build
import locale

# Set locale for number formatting
locale.setlocale(locale.LC_ALL, 'en_US.UTF-8')

# Replace with your own API key
API_KEY = 'AIzaSyCXEhvGzLjh6IjRogjjJ3CJ2g4J9P64Yho'  # Replace with your actual API key

# Initialize the YouTube API client
youtube = build('youtube', 'v3', developerKey=API_KEY)

def get_channel_id(handle):
    """Retrieve the channel ID using the YouTube Data API."""
    try:
        # Try using forUsername to get the channel ID
        request = youtube.channels().list(part="id,snippet", forUsername=handle)
        response = request.execute()

        if 'items' in response and len(response['items']) > 0:
            return response['items'][0]['id']
        else:
            # If no result, search for the channel using the handle (search API)
            search_request = youtube.search().list(part="snippet", q=handle, type="channel")
            search_response = search_request.execute()

            if 'items' in search_response and len(search_response['items']) > 0:
                return search_response['items'][0]['id']['channelId']
            else:
                print(f"Error: Could not find channel ID for {handle}.")
                return None
    except Exception as e:
        print(f"Error retrieving channel ID for {handle}: {e}")
        return None

def get_channel_data(channel_id):
    """Retrieve channel data (subscribers, views, videos) using the YouTube Data API."""
    try:
        request = youtube.channels().list(part="statistics,snippet", id=channel_id)
        response = request.execute()

        if 'items' in response and len(response['items']) > 0:
            channel = response['items'][0]
            subscribers = int(channel['statistics'].get('subscriberCount', 0))
            views = int(channel['statistics'].get('viewCount', 0))
            videos = int(channel['statistics'].get('videoCount', 0))
            handle = channel['snippet']['title']  # Channel handle is stored in title for simplicity
            return {
                'YouTube Handle': handle,
                'YouTube Subscribers': subscribers,
                'YouTube Views': views,
                'YouTube Videos': videos
            }
        else:
            print(f"Error: Could not retrieve data for channel ID {channel_id}.")
            return None
    except Exception as e:
        print(f"Error retrieving data for channel ID {channel_id}: {e}")
        return None

def compare_multiple_channels_and_recommend_with_graph(channel_handles):
    """Compare multiple channels and visualize the comparison with graphs."""
    comparison_data = []

    for handle in channel_handles:
        # Step 1: Get the channel ID
        channel_id = get_channel_id(handle)
        if not channel_id:
            continue

        # Step 2: Get the channel data (views, subscribers, videos)
        channel_data = get_channel_data(channel_id)
        if channel_data:
            # Step 3: Calculate engagement rate
            views = channel_data['YouTube Views']
            subscribers = channel_data['YouTube Subscribers']
            # Avoid division by zero or unrealistic engagement rates
            engagement_rate = (views / subscribers) * 100 if subscribers > 0 else 0
            engagement_rate = min(engagement_rate, 100)  # Cap engagement rate at 100%

            # Step 4: Add the channel data to the comparison list
            channel_data['Engagement Rate (%)'] = round(engagement_rate, 2)
            comparison_data.append(channel_data)

    if not comparison_data:
        print("No valid data found for any channel.")
        return

    # Step 5: Sort by subscribers, views, and engagement rate
    comparison_data_sorted_by_subscribers = sorted(comparison_data, key=lambda x: x['YouTube Subscribers'], reverse=True)
    comparison_data_sorted_by_views = sorted(comparison_data, key=lambda x: x['YouTube Views'], reverse=True)
    comparison_data_sorted_by_engagement = sorted(comparison_data, key=lambda x: x['Engagement Rate (%)'], reverse=True)

    # Step 6: Display comparison results in desired format
    print("\nChannel Comparison based on Subscribers:")
    for i, channel in enumerate(comparison_data_sorted_by_subscribers, 1):
        print(f"{i}. {channel['YouTube Handle']} - {locale.format_string('%d', channel['YouTube Subscribers'], grouping=True)} Subscribers")

    print("\nChannel Comparison based on Views:")
    for i, channel in enumerate(comparison_data_sorted_by_views, 1):
        print(f"{i}. {channel['YouTube Handle']} - {locale.format_string('%d', channel['YouTube Views'], grouping=True)} Views")

    print("\nChannel Comparison based on Engagement Rate:")
    for i, channel in enumerate(comparison_data_sorted_by_engagement, 1):
        print(f"{i}. {channel['YouTube Handle']} - {channel['Engagement Rate (%)']}% Engagement Rate")

    print("-" * 50)

    # Step 7: Recommendation
    best_by_engagement = comparison_data_sorted_by_engagement[0]
    best_by_views = comparison_data_sorted_by_views[0]

    # Step 8: Print Recommendation
    print("\n**Recommendation**:")
    print(f"Best Channel by Engagement Rate: {best_by_engagement['YouTube Handle']} with an engagement rate of {best_by_engagement['Engagement Rate (%)']}%")
    print(f"Best Channel by Views: {best_by_views['YouTube Handle']} with {locale.format_string('%d', best_by_views['YouTube Views'], grouping=True)} views")

# Example Part 2
print("Example Part 2: \n")
channel_handles = ['KaraandNate', 'EamonAndBec', 'CaseyNeistat']  # Enter multiple channel handles
compare_multiple_channels_and_recommend_with_graph(channel_handles)


Example Part 2: 


Channel Comparison based on Subscribers:
1. CaseyNeistat - 12,600,000 Subscribers
2. Kara and Nate - 3,990,000 Subscribers
3. Eamon & Bec - 1,280,000 Subscribers

Channel Comparison based on Views:
1. CaseyNeistat - 3,207,563,747 Views
2. Kara and Nate - 875,352,834 Views
3. Eamon & Bec - 189,795,673 Views

Channel Comparison based on Engagement Rate:
1. Kara and Nate - 100% Engagement Rate
2. Eamon & Bec - 100% Engagement Rate
3. CaseyNeistat - 100% Engagement Rate
--------------------------------------------------

**Recommendation**:
Best Channel by Engagement Rate: Kara and Nate with an engagement rate of 100%
Best Channel by Views: CaseyNeistat with 3,207,563,747 views


###Just part 3

In [29]:
#Part 3 - recommends a new channels based on other channels you enter

from googleapiclient.discovery import build

# Replace with your own API key
API_KEY = 'AIzaSyCXEhvGzLjh6IjRogjjJ3CJ2g4J9P64Yho'  # Make sure to replace this with your actual API key

# Initialize the YouTube API client
youtube = build('youtube', 'v3', developerKey=API_KEY)

def get_channel_id(handle):
    """Retrieve the channel ID from the channel handle."""
    try:
        # Perform a search query with the handle to get channel ID
        request = youtube.search().list(
            part="snippet",
            q=handle,  # Searching by the channel handle (which can also be a custom name)
            type="channel"
        )
        response = request.execute()

        if 'items' in response and len(response['items']) > 0:
            return response['items'][0]['snippet']['channelId']
        else:
            print(f"Channel '{handle}' not found.")
            return None
    except Exception as e:
        print(f"Error retrieving channel ID for {handle}: {e}")
        return None

def get_channel_data(youtube, channel_id):
    """Retrieve channel data using the YouTube API."""
    try:
        request = youtube.channels().list(
            part="statistics,snippet",
            id=channel_id
        )
        response = request.execute()

        if 'items' in response and len(response['items']) > 0:
            channel_info = response['items'][0]
            channel_data = {
                'YouTube Handle': channel_info['snippet']['title'],
                'YouTube Subscribers': channel_info['statistics']['subscriberCount'],
                'YouTube Views': channel_info['statistics']['viewCount'],
                'YouTube Videos': channel_info['statistics']['videoCount'],
                'YouTube Link': f"https://www.youtube.com/channel/{channel_id}"
            }
            return channel_data
        else:
            print(f"Channel data for {channel_id} not found.")
            return None
    except Exception as e:
        print(f"Error retrieving channel data for {channel_id}: {e}")
        return None

def suggest_channel_based_on_others(channel_handles, youtube):
    """Suggest one new channel based on similarities with other channels you like."""
    suggested_channel = None

    # Get channel IDs for input channels
    input_channel_ids = []
    for handle in channel_handles:
        channel_id = get_channel_id(handle)
        if channel_id:
            input_channel_ids.append(channel_id)

    # Perform a search for similar channels by comparing categories (or content)
    for channel_id in input_channel_ids:
        # Get the channel's data
        channel_data = get_channel_data(youtube, channel_id)
        if channel_data:
            # Search for similar channels based on category
            search_request = youtube.search().list(
                part="snippet",
                type="channel",
                q=channel_data['YouTube Handle'],  # Search by channel name (or content)
                maxResults=10  # Search for 10 channels
            )
            search_response = search_request.execute()

            for item in search_response['items']:
                # Exclude the channels that were inputted
                if item['snippet']['channelId'] not in input_channel_ids:
                    suggested_channel = item['snippet']
                    break
            if suggested_channel:
                break

    if suggested_channel:
        print("\nSuggested Channel Based on Your Preferences:\n")
        print(f"Channel Name: {suggested_channel['title']}")
        print(f"Channel Link: https://www.youtube.com/channel/{suggested_channel['channelId']}")
    else:
        print("No new similar channels found.")

# Example usage
suggest_channel_based_on_others(["Kara and Nate", "Eamon and Bec"], youtube)



Suggested Channel Based on Your Preferences:

Channel Name: Daily Drop
Channel Link: https://www.youtube.com/channel/UCneNuMawbfqgXCew2EdZMcQ
