# YouTube Video Popularity Prediction Project

## Notebook 1: Data Loading and Storage

In this notebook, we will collect metadata for YouTube videos using the YouTube Data API.
We will then store the collected data in an S3 bucket.

In [3]:
# Import necessary libraries
import requests
import pandas as pd
from io import StringIO


In [7]:
# YouTube Data API v3 key (replace with your own API key)
API_KEY = ''

# Function to search for videos based on keywords
def search_videos_by_keyword(keyword, max_results=500):
    """
    Searches for videos on YouTube based on a given keyword.
    """
    url = f'https://www.googleapis.com/youtube/v3/search?part=snippet&type=video&q={keyword}&maxResults={max_results}&key={API_KEY}'
    response = requests.get(url)
    data = response.json()
    video_ids = [item['id']['videoId'] for item in data.get('items', [])]
    return video_ids

# Function to get video metadata
def get_video_metadata(video_id):
    """
    Fetches metadata for a given YouTube video ID using the YouTube Data API.
    """
    url = f'https://www.googleapis.com/youtube/v3/videos?part=snippet,statistics,contentDetails,status&id={video_id}&key={API_KEY}'
    response = requests.get(url)
    data = response.json()
    if 'items' in data and data['items']:
        item = data['items'][0]
        snippet = item['snippet']
        statistics = item['statistics']
        content_details = item['contentDetails']
        status = item['status']
        return {
            'video_id': video_id,
            'title': snippet['title'],
            'description': snippet['description'],
            'tags': snippet.get('tags', []),
            'published_at': snippet['publishedAt'],
            'channel_id': snippet['channelId'],
            'channel_title': snippet['channelTitle'],
            'category_id': snippet['categoryId'],
            'live_broadcast_content': snippet['liveBroadcastContent'],
            'default_audio_language': snippet.get('defaultAudioLanguage', 'N/A'),
            'view_count': int(statistics.get('viewCount', 0)),
            'like_count': int(statistics.get('likeCount', 0)),
            'dislike_count': int(statistics.get('dislikeCount', 0)),
            'comment_count': int(statistics.get('commentCount', 0)),
            'duration': content_details['duration'],
            'dimension': content_details['dimension'],
            'definition': content_details['definition'],
            'caption': content_details['caption'],
            'licensed_content': content_details['licensedContent'],
            'projection': content_details['projection'],
            'upload_status': status['uploadStatus'],
            'privacy_status': status['privacyStatus'],
            'license': status['license'],
            'embeddable': status['embeddable'],
            'public_stats_viewable': status['publicStatsViewable'],
        }
    return None

# Example keyword (replace with your own keyword)
keyword = 'Electric Scooters'

# Search for videos by keyword and collect metadata
video_ids = search_videos_by_keyword(keyword)
data = []
for video_id in video_ids:
    metadata = get_video_metadata(video_id)
    if metadata:
        data.append(metadata)

# Display collected metadata
data


[{'video_id': 'YuG_AFUxgMY',
  'title': 'TOP 10 BEST ELECTRIC SCOOTERS (2022 - 2023)',
  'description': "What's the best electric scooter? Electric scooters are becoming more and more popular as a mode of transportation, and with good reason: they're cheap to operate, easy to maintain, and perfect for short commutes. Here are the 10 best electric scooters money can buy. From foldable to heavy-duty, we've got you covered. \n\nHere is the list:\n\n-Kaabo Wolf Warrior X \n-Apollo City Pro\n-EMOVE Cruiser\n-Dualtron Ultra 2 UP\xa0\n-Kaabo Wolf King GT Pro\n-Weped FF\n-Segway GT2\n-Inokim Light 2\xa0\n-Turboant V8\n-Hiboy S2 \n\n#electricscooter #scooter #trendmax \n\n---------------------------------------------------------------------------\nAbout Us:\n\nTrend Max is an education and entertainment channel dedicated to creating interesting Tops, Lists and more. Do not miss a single video SUBSCRIBE NOW.",
  'tags': ['best electric scooters 2022',
   'best electric scooter',
   'electric sco

In [8]:
# Create DataFrame
df = pd.DataFrame(data)

# Save to CSV
csv_buffer = StringIO()
df.to_csv('youtube_metadata.csv')
df.to_csv(csv_buffer, index=False)
df

Unnamed: 0,video_id,title,description,tags,published_at,channel_id,channel_title,category_id,live_broadcast_content,default_audio_language,...,dimension,definition,caption,licensed_content,projection,upload_status,privacy_status,license,embeddable,public_stats_viewable
0,YuG_AFUxgMY,TOP 10 BEST ELECTRIC SCOOTERS (2022 - 2023),What's the best electric scooter? Electric sco...,"[best electric scooters 2022, best electric sc...",2022-11-18T16:59:02Z,UCta3MuD7omvsJyb-SMQkKOQ,Trend Max,28,none,en-US,...,2d,hd,False,True,rectangular,processed,public,youtube,True,True
1,HSCyHyA-pQ8,Xiaomi Mi 1S Electric Scooter TOP SPEED (Envir...,#topspeed DOWNHILL,[],2021-05-18T20:49:29Z,UC-nMZSmRgMSNWXLinEDxekg,ozandoganbey,22,none,zxx,...,2d,hd,False,True,rectangular,processed,public,youtube,True,True
2,JbpLjkhxHqs,Electric Scooter WEPED Sonic Burnout,WEPED Director Mr. Jeon\nWhatsApp : +821033333...,"[WEPED Sonic, WEPED, Tesla Cybertruck, Cyberfo...",2023-03-31T08:52:10Z,UCFr4tLw88OQC1CZnjRb3Jwg,WEPED 연제혁,2,none,en,...,2d,hd,False,True,rectangular,processed,public,youtube,True,True
3,DJ53WymRJLc,The CYBERTRUCK of Electric Scooters,,[],2024-06-17T23:17:46Z,UCkSVnUZq3q14lVajaQAlLpA,Nathan Espinoza,28,none,en,...,2d,hd,False,True,rectangular,processed,public,youtube,True,True
4,gPhmhVL-g1g,#shorts | No Kick Scooter | E MOTORAD LIL E,E MOTORAD LIL E NO KICK SCOOTER Detailed Revie...,"[New, Electric, E MOTORAD, No kick scooter]",2022-10-16T17:08:53Z,UCukVAyWJuBN-GSxn3ziOdkw,GEAR UPDATE,2,none,hi,...,2d,hd,False,True,rectangular,processed,public,youtube,True,True
5,L8ZlmMh2J-E,Folding the Segway-Ninebot E22 electric kick s...,,[],2022-08-30T14:59:39Z,UCsTp7_ZWPP63DeMOS8fir6Q,Scooterz Malaysia,22,none,,...,2d,hd,False,False,rectangular,processed,public,youtube,True,True
6,jazv7mRGnw0,【SuperFun S10pro】How to Install the Seat On th...,"【Folding Design】One-step folding system, with ...","[Electric Scooter, best electric scooter, good...",2024-07-22T13:00:45Z,UCcIrWrJmaIEFsCbzDJKm01Q,SuperFun,2,none,,...,2d,hd,False,False,rectangular,processed,public,youtube,True,True
7,Mcqgwf_IaAw,Best Electric Scooters Of India - Who Promises...,The electric age for us two-wheeler enthusiast...,"[ZigWheels, Zigwheels.com, Zigwheels review, b...",2023-06-05T14:45:01Z,UCjmjWp38PCg15Z5ZS-tmpfw,ZigWheels,2,none,en,...,2d,hd,False,True,rectangular,processed,public,youtube,True,True
8,8sk3svkpwRw,2022 Pure EV EPluto 7G Top Speed! Electric Sco...,,[],2022-04-28T15:37:54Z,UCJ2ZPQOCIpw-WsQS6gv1Tqg,AutoRevzz Hindi,2,none,,...,2d,hd,False,True,rectangular,processed,public,youtube,True,True
9,SNLUr8lF0c4,Best Electric Scooter for Commuters,"This scooter has a lock, handle, passcode, and...",[],2023-04-15T14:00:26Z,UCZZ4ivUawsXRktLKej8d6Xw,Mike O'Brien,28,none,en,...,2d,hd,False,True,rectangular,processed,public,youtube,True,True


In [4]:
#Load data
csv_buffer = StringIO()
df = pd.read_csv('youtube_metadata.csv')
df.to_csv(csv_buffer, index=False)

Create AWS account, create an s3 bucket called "electric-scooters"
Create an API key and add details in the following

In [6]:
import boto3

# Upload to S3
client = boto3.client(
    's3',
    aws_access_key_id="",
    aws_secret_access_key="",
)
client.put_object(Bucket='electric-scooters', Key='youtube_metadata.csv', Body=csv_buffer.getvalue())

print("Data successfully uploaded to S3")

Data successfully uploaded to S3
