## Using Google API Collecting details of a Required Channel and Analysis

In [18]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [4]:
import os
os.environ["GOOGLE_API_KEY"] = "AIzaSyAOtkK9zohzXRcCZ-X88PYx3xkotuvveEI"

# Function to retrive data like Title, likes, dislikes, Comment of the required channel

In [5]:
from googleapiclient.discovery import build
def get_channel_videos(api_key, channel_id, max_results=40):
    youtube = build("youtube", "v3", developerKey=api_key)

    request = youtube.search().list(
        part="snippet",
        channelId=channel_id,
        maxResults=max_results,
        type="video"
    )

    response = request.execute()
    video_details = []

    for i in response["items"]:
        video_id = i["id"]["videoId"]
        video_data = youtube.videos().list(
            part="snippet,statistics",
            id=video_id
        ).execute()

        snippet = video_data["items"][0]["snippet"]
        statistics = video_data["items"][0]["statistics"]

        video_info = {
            "title": snippet["title"],
            "published_at": snippet["publishedAt"],
            "views": statistics.get("viewCount", 0),
            "likes": statistics.get("likeCount", 0),
            "dislikes": statistics.get("dislikeCount", 0),
            "comments": statistics.get("commentCount", 0),
        }

        video_details.append(video_info)

    return video_details

In [None]:
# "UCdKS_mDSLUkS6vDK6u1mjOg"  ---> Channel ID ( Lost frequency Musical Band)

In [6]:
channel_id=str(input('enter channel Id: '))
videos = get_channel_videos(os.environ.get("GOOGLE_API_KEY"), channel_id)

enter channel Id: UCdKS_mDSLUkS6vDK6u1mjOg


In [None]:
# data is extracted from the required channel and the collected data is put in a dataFrame

In [7]:
import pandas as pd
df=pd.DataFrame(videos)
df

  from pandas.core import (


Unnamed: 0,title,published_at,views,likes,dislikes,comments
0,1 BILLION STREAMS!!!! ü´∂üèª #lostfrequencies #e...,2024-03-13T15:20:55Z,402759,11372,0,301
1,These lyrics are me this summer üôÇ‚Äç‚ÜïÔ∏è #electron...,2024-07-02T14:22:19Z,28368,1753,0,95
2,Lost Frequencies & Netsky - Leave You In The P...,2023-11-10T09:01:38Z,110895,2433,0,61
3,Lost Frequencies ft. James Blunt - Melody (Off...,2018-04-28T13:19:59Z,157242079,797503,0,10220
4,Lost Frequencies ft. Calum Scott - Where Are Y...,2021-08-18T15:00:11Z,172445381,1061075,0,9372
5,Lost Frequencies & Tom Odell - Black Friday (P...,2024-07-19T07:00:07Z,4519674,49383,0,549
6,Lost Frequencies & David Kushner - In My Bones...,2024-05-15T06:00:06Z,5523789,41772,0,463
7,Lost Frequencies ft. The NGHBRS - Like I Love ...,2018-09-14T13:59:58Z,68834181,395295,0,4313
8,"Lost Frequencies, Elley Duh√©, X Ambassadors - ...",2023-01-19T17:00:09Z,23344580,141396,0,1856
9,Lost Frequencies & James Arthur - Questions (O...,2022-06-16T17:00:11Z,13121855,99073,0,925


In [13]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 40 entries, 0 to 39
Data columns (total 6 columns):
 #   Column        Non-Null Count  Dtype 
---  ------        --------------  ----- 
 0   title         40 non-null     object
 1   published_at  40 non-null     object
 2   views         40 non-null     object
 3   likes         40 non-null     object
 4   dislikes      40 non-null     int64 
 5   comments      40 non-null     object
dtypes: int64(1), object(5)
memory usage: 2.0+ KB


In [8]:
df['published_at'] = pd.to_datetime(df['published_at'])

df['views'] = df['views'].str.replace(',', '').astype(int)
df['likes'] = df['likes'].astype(int)
df['comments'] = df['comments'].astype(int)


In [None]:
# Data processing for analysis purpose

In [16]:

def analyze_youtube_data(df):
    # Create a copy of the dataframe
    df_analysis = df.copy()
    
    
    df_analysis['engagement_rate'] = ((df_analysis['likes'] + df_analysis['comments']) / df_analysis['views']) * 100
    df_analysis['like_to_dislike_ratio'] = df_analysis['likes'] / df_analysis['dislikes']
    
    
    df_analysis['year'] = df_analysis['published_at'].dt.year
    df_analysis['month'] = df_analysis['published_at'].dt.month
    df_analysis['day_of_week'] = df_analysis['published_at'].dt.day_name()
    
    return df_analysis

def generate_channel_insights(df):
    """Generate key insights about the channel"""
    insights = {
        'total_videos': len(df),
        'total_views': df['views'].sum(),
        'avg_views': df['views'].mean(),
        'total_likes': df['likes'].sum(),
        'avg_likes': df['likes'].mean(),
        'total_comments': df['comments'].sum(),
        'avg_comments': df['comments'].mean(),
        'avg_engagement_rate': ((df['likes'] + df['comments']) / df['views']).mean() * 100
    }
    return insights

def plot_views_over_time(df):
    """Plot views trend over time"""
    plt.figure(figsize=(15, 6))
    plt.plot(df['published_at'], df['views'], marker='o')
    plt.title('Views Trend Over Time')
    plt.xlabel('Publication Date')
    plt.ylabel('Views')
    plt.xticks(rotation=45)
    plt.grid(True)
    return plt

def plot_engagement_metrics(df):
    """Plot engagement metrics distribution"""
    fig, axes = plt.subplots(2, 2, figsize=(15, 10))
    
    
    sns.histplot(data=df, x='views', ax=axes[0,0])
    axes[0,0].set_title('Views Distribution')
    
    
    sns.histplot(data=df, x='likes', ax=axes[0,1])
    axes[0,1].set_title('Likes Distribution')
    
    
    sns.histplot(data=df, x='comments', ax=axes[1,0])
    axes[1,0].set_title('Comments Distribution')
    
    # Engagement rate distribution
    sns.histplot(data=df, x='engagement_rate', ax=axes[1,1])
    axes[1,1].set_title('Engagement Rate Distribution')
    
    plt.tight_layout()
    return plt

def analyze_best_performing_content(df):
    """Analyze top performing content"""
    top_videos = {
        'most_viewed': df.nlargest(5, 'views')[['title', 'views', 'published_at']],
        'most_liked': df.nlargest(5, 'likes')[['title', 'likes', 'views']],
        'most_commented': df.nlargest(5, 'comments')[['title', 'comments', 'views']],
        'highest_engagement': df.nlargest(5, 'engagement_rate')[['title', 'engagement_rate', 'views']]
    }
    return top_videos

def perform_time_analysis(df):
    """Analyze posting patterns and performance by time"""
    time_analysis = {
        'avg_views_by_day': df.groupby('day_of_week')['views'].mean().sort_values(ascending=False),
        'avg_engagement_by_month': df.groupby('month')['engagement_rate'].mean(),
        'videos_per_month': df.groupby('month').size()
    }
    return time_analysis

# Main analysis execution
def main():
    # Process the data
    df_analyzed = analyze_youtube_data(df)
    
    # Generate insights
    insights = generate_channel_insights(df_analyzed)
    print("\nChannel Insights:")
    for key, value in insights.items():
        print(f"{key}: {value:,.2f}")
    
    # Generate and save plots
    plot_views_over_time(df_analyzed)
    plt.savefig('views_trend.png')
    plt.close()
    
    plot_engagement_metrics(df_analyzed)
    plt.savefig('engagement_metrics.png')
    plt.close()
    
    # Analyze top performing content
    top_content = analyze_best_performing_content(df_analyzed)
    print("\nTop Performing Videos:")
    for category, videos in top_content.items():
        print(f"\n{category.replace('_', ' ').title()}:")
        print(videos)
    
    # Time-based analysis
    time_analysis = perform_time_analysis(df_analyzed)
    print("\nTime-Based Analysis:")
    for key, value in time_analysis.items():
        print(f"\n{key}:")
        print(value)

In [17]:
if __name__ == "__main__":
    main()


Channel Insights:
total_videos: 40.00
total_views: 678,589,542.00
avg_views: 16,964,738.55
total_likes: 4,341,654.00
avg_likes: 108,541.35
total_comments: 51,413.00
avg_comments: 1,285.33
avg_engagement_rate: 1.76


  with pd.option_context('mode.use_inf_as_na', True):
  with pd.option_context('mode.use_inf_as_na', True):
  with pd.option_context('mode.use_inf_as_na', True):
  with pd.option_context('mode.use_inf_as_na', True):



Top Performing Videos:

Most Viewed:
                                                title      views  \
4   Lost Frequencies ft. Calum Scott - Where Are Y...  172445381   
3   Lost Frequencies ft. James Blunt - Melody (Off...  157242079   
11  Lost Frequencies & Zonderling - Crazy (Officia...  118467844   
7   Lost Frequencies ft. The NGHBRS - Like I Love ...   68834181   
16  Lost Frequencies & Mathieu Koss - Don't Leave ...   23600674   

                published_at  
4  2021-08-18 15:00:11+00:00  
3  2018-04-28 13:19:59+00:00  
11 2017-11-24 14:52:54+00:00  
7  2018-09-14 13:59:58+00:00  
16 2020-08-11 10:00:04+00:00  

Most Liked:
                                                title    likes      views
4   Lost Frequencies ft. Calum Scott - Where Are Y...  1061075  172445381
3   Lost Frequencies ft. James Blunt - Melody (Off...   797503  157242079
11  Lost Frequencies & Zonderling - Crazy (Officia...   666256  118467844
7   Lost Frequencies ft. The NGHBRS - Like I Love ...   39