## Introduction  

This project automates the tracking of comments and engagement metrics for an artist's music video. By extracting and organizing user-generated comments alongside key metrics such as views and likes, it enables real-time monitoring of audience reactions. This helps artists and brands analyze audience sentiment, identify trends in engagement, and optimize their content strategies to align with audience preferences.

## Objective  

The objective of this project is to create an automated cloud function that tracks comments and engagement metrics for an artist's music video. This function is triggered daily to extract the latest comments and metrics, refreshing a Google Sheet with up-to-date data. 

In [1]:
import requests

# Cloud Function URL
cloud_function_url = "https://us-central1-linear-bounty-441500-b2.cloudfunctions.net/katy_perry_comments"

def test_cloud_function():
    # Send an HTTP POST request to trigger the function
    response = requests.post(cloud_function_url)
    
    # Check the response status code
    if response.status_code == 200:
        print("\033[1;34mCloud function executed successfully!\033[0m")
        print("Response:", response.text)
    else:
        print("Error:", response.status_code)
        print("Response:", response.text)

# Call the function to test
test_cloud_function()

[1;34mCloud function executed successfully![0m
Response: Comments updated successfully in Google Sheets!


## Deconstructing the Functions

These functions automate the tracking and storage of YouTube video metrics and comments:

1. **Retrieve Channel and Video Information**: Gets the YouTube channel ID and retrieves the latest music video's title, views, and likes.

2. **Fetch Comments**: Extracts all top-level comments for the video, including text and timestamps, sorted in reverse chronological order.

3. **Store Data in Google Sheets**: Updates a Google Sheet with video metrics, comments, and metadata such as execution date and time.

### *Step 1*: Retrieve Channel ID and Video Information

In [2]:
def get_channel_id(api_key, username):
    try:
        logging.info(f"Fetching channel ID for username: {username}")
        url = "https://www.googleapis.com/youtube/v3/search"
        params = {"part": "snippet", "q": username, "type": "channel", "key": api_key}
        response = requests.get(url, params=params)
        response.raise_for_status()
        items = response.json().get("items", [])
        if items:
            channel_id = items[0]["snippet"]["channelId"]
            logging.info(f"Successfully retrieved channel ID: {channel_id}")
            return channel_id
        else:
            raise ValueError(f"Channel not found for username: {username}")
    except Exception as e:
        logging.error(f"Error retrieving channel ID for {username}: {e}")
        raise

def get_latest_music_video_info(api_key, channel_id):
    try:
        logging.info(f"Fetching latest music video for channel ID: {channel_id}")
        url = "https://www.googleapis.com/youtube/v3/search"
        params = {"part": "snippet", "channelId": channel_id, "order": "date", "maxResults": 10, "type": "video", "key": api_key}
        response = requests.get(url, params=params)
        response.raise_for_status()
        
        for item in response.json()["items"]:
            video_id = item["id"]["videoId"]
            video_category = fetch_video_category(api_key, video_id)
            if video_category == "Music":
                title = item["snippet"]["title"]
                stats = fetch_youtube_video_stats(api_key, video_id)
                logging.info(f"Found music video: {title} (ID: {video_id}) with {stats['viewCount']} views and {stats['likeCount']} likes.")
                return video_id, title, stats["viewCount"], stats["likeCount"]
        
        raise ValueError("No music videos found on the channel.")
    except Exception as e:
        logging.error(f"Error fetching latest music video for channel ID {channel_id}: {e}")
        raise

### *Step 2* : Fetch Video Statistics and Category

In [3]:
def fetch_video_category(api_key, video_id):
    try:
        logging.info(f"Fetching video category for video ID: {video_id}")
        url = "https://www.googleapis.com/youtube/v3/videos"
        params = {"part": "snippet", "id": video_id, "key": api_key}
        response = requests.get(url, params=params)
        response.raise_for_status()
        category_id = response.json()["items"][0]["snippet"]["categoryId"]
        logging.info(f"Video ID {video_id} belongs to category ID: {category_id}")
        return "Music" if category_id == "10" else None
    except Exception as e:
        logging.error(f"Error fetching video category for video ID {video_id}: {e}")
        raise

def fetch_youtube_video_stats(api_key, video_id):
    try:
        logging.info(f"Fetching video statistics for video ID: {video_id}")
        url = "https://www.googleapis.com/youtube/v3/videos"
        params = {"part": "statistics", "id": video_id, "key": api_key}
        response = requests.get(url, params=params)
        response.raise_for_status()
        stats = response.json()["items"][0]["statistics"]
        logging.info(f"Retrieved statistics: {stats}")
        return {"viewCount": int(stats.get("viewCount", 0)), "likeCount": int(stats.get("likeCount", 0))}
    except Exception as e:
        logging.error(f"Error fetching video statistics for video ID {video_id}: {e}")
        return {"viewCount": 0, "likeCount": 0}


### *Step 3* : Fetch Comments

In [4]:
def fetch_all_comments(api_key, video_id):
    try:
        logging.info(f"Fetching all comments for video ID: {video_id}")
        url = "https://www.googleapis.com/youtube/v3/commentThreads"
        params = {"part": "snippet", "videoId": video_id, "maxResults": 100, "order": "time", "key": api_key}
        comments = []
        while True:
            response = requests.get(url, params=params)
            response.raise_for_status()
            items = response.json().get("items", [])
            for item in items:
                comment = item["snippet"]["topLevelComment"]["snippet"]
                comments.append({"Comment": comment["textDisplay"], "Time": comment["publishedAt"]})
            if 'nextPageToken' not in response.json():
                break
            params['pageToken'] = response.json()['nextPageToken']
        logging.info(f"Retrieved {len(comments)} comments for video ID: {video_id}")
        return sorted(comments, key=lambda x: x["Time"], reverse=True)
    except Exception as e:
        logging.error(f"Error fetching comments for video ID {video_id}: {e}")
        raise


### *Step 4* : Store Data in Google Sheets

In [5]:
def store_comments_to_google_sheets(comments, video_title, views, likes):
    try:
        logging.info(f"Authenticating with Google Sheets API to update sheet: {SHEET_NAME}")
        creds = service_account.Credentials.from_service_account_file(
            SERVICE_ACCOUNT_FILE, 
            scopes=["https://www.googleapis.com/auth/spreadsheets", "https://www.googleapis.com/auth/drive"]
        )
        sheet = gspread.authorize(creds).open(SHEET_NAME).sheet1
        logging.info("Clearing existing data in the sheet")
        sheet.clear()

        rows = [
            [], ["Video Title:", video_title], ["Views:", views], ["Likes:", likes], 
            ["Number of Comments:", len(comments)], ["Run Date:", datetime.now().strftime('%Y-%m-%d')],
            ["Run Time:", datetime.now().strftime('%H:%M:%S')], [], ["", "", "Comment", "Date", "Time"]
        ]
        for comment in comments:
            timestamp = datetime.fromisoformat(comment["Time"].replace("Z", "+00:00"))
            rows.append(["", "", comment["Comment"], timestamp.strftime('%Y-%m-%d'), timestamp.strftime('%H:%M:%S')])
        
        logging.info(f"Updating Google Sheet with {len(comments)} comments")
        sheet.update(rows)
        logging.info("Google Sheet updated successfully.")
    except Exception as e:
        logging.error(f"Error updating Google Sheets: {e}")
        raise

### *Step 5* : Main Function

In [6]:
def main(request):
    try:
        logging.info("Starting cloud function execution")
        channel_id = get_channel_id(YOUTUBE_API_KEY, CHANNEL_USERNAME)
        video_id, video_title, views, likes = get_latest_music_video_info(YOUTUBE_API_KEY, channel_id)
        comments = fetch_all_comments(YOUTUBE_API_KEY, video_id)
        store_comments_to_google_sheets(comments, video_title, views, likes)
        logging.info("Comments updated successfully in Google Sheets!")
        return "Comments updated successfully in Google Sheets!"
    except Exception as e:
        logging.error(f"Error in function execution: {e}")
        return f"Error: {e}", 500