In [5]:

import os
from googleapiclient.discovery import build
from googleapiclient.errors import HttpError
from datetime import datetime, timedelta
import isodate
import requests




class YoutubeAPIService:
    def __init__(self, api_key, base_url):
        self.api_key = api_key
        self.api_service = self.get_api_service()
        self.base_url = base_url


    def get_api_service(self):
        return build('youtube', 'v3', developerKey=self.api_key)
    
    def get_channel_info(self, channel_id: str):
        """
        Fetch channel information for a given channel ID.

        Parameters:
            channel_id (str): The YouTube channel ID.

        Returns:
            dict: A dictionary containing channel information or None if the request fails.
        """
        params = {
            'key': self.api_key,
            'id': channel_id,
            'part': 'snippet,statistics'
        }
        response = requests.get("https://www.googleapis.com/youtube/v3/channels", params=params)
        if response.status_code == 200:
            response_json = response.json()
            if 'items' in response_json and len(response_json['items']) > 0:
                return response_json['items'][0]
        return None
    
    def search_videos(self, keywords, start_date, end_date, max_results=5):
        try:
            # Format the dates as strings
            start_date_str = start_date.strftime('%Y-%m-%dT%H:%M:%SZ')
            end_date_str = end_date.strftime('%Y-%m-%dT%H:%M:%SZ')

            # Call the YouTube API to search videos
            search_response = self.api_service.search().list(
                q=keywords,
                type='video',
                part='id,snippet',
                publishedAfter=start_date_str,
                publishedBefore=end_date_str,
                maxResults=max_results  # You can adjust this number based on your needs
            ).execute()
            videos = []
            for search_result in search_response.get('items', []):
                video_id = search_result['id']['videoId']
                video_info = self.get_video_info(video_id, update_task=False)
                videos.append(video_info)

            return videos

        except HttpError as e:
            print(f'An error occurred: {e}')




    def get_video_info(self, video_id, update_task):
        import pprint
        try:
            video_response = self.api_service.videos().list(
                part='snippet,statistics,contentDetails',
                id=video_id
            ).execute()

            if video_response['items']:
                video_info = video_response['items'][0]
                video_duration_iso_format = video_info['contentDetails']['duration']
                video_duration = isodate.parse_duration(video_duration_iso_format).total_seconds() / 3600
                video_duration = round(video_duration, 5)

                stats_info = {
                    'video_id': video_info['id'],
                    'views': video_info['statistics']['viewCount'],
                    'likeCount': video_info['statistics'].get('likeCount', 0),
                    'CommentCount': video_info['statistics'].get('commentCount', 0)
                    }
                if update_task:
                    return stats_info
                else:
                    channel_id = video_info['snippet']['channelId']
                    channel_info = self.get_channel_info(channel_id)
                    channel_subscribers = channel_info['statistics']['subscriberCount']
                    channel_country = channel_info['snippet'].get('country', 'world')
                    channel_description = channel_info['snippet'].get('description', None)
                    channel_name = channel_info['snippet']['title']
                    additional_info = {
                        'channel_subscribers': int(channel_subscribers),
                        'channel_tracker': channel_id,
                        'channel_name': channel_name, 
                        'channel_country': channel_country,
                        'channel_description': channel_description,
                        'duration': video_duration, 
                        'title': video_info['snippet']['title'], 
                        'publishedAt': video_info['snippet']['publishedAt']}
                    return {**stats_info, **additional_info}

        except HttpError as e:
            print(f'An error occurred: {e}')

In [6]:
# Example usage:
#api_key = ''
api_key = 'AIzaSyCm44pqIZIW_rwtezawZuqWuR10g0lPqAU'
youtube_service = YoutubeAPIService(api_key, 'https://www.googleapis.com/youtube/v3/')

# Example: Search for videos between two dates
# Example: Search for videos between two dates
# Example: Search for videos between two dates
from datetime import date, datetime, timezone
retrieve_from_datetime = datetime(2023, 12, 1, 0, 0, 0, tzinfo=timezone.utc)
retrieve_to_datetime = datetime(2023, 12, 10, 0, 0, 0, tzinfo=timezone.utc)
videos = youtube_service.search_videos('hugo decrypte', retrieve_from_datetime, retrieve_to_datetime, 1)

In [7]:
videos

[{'video_id': 'TvvGptmjmuM',
  'views': '235181',
  'likeCount': '7187',
  'CommentCount': '787',
  'channel_subscribers': 2640000,
  'channel_tracker': 'UCAcAnMF0OrCtUep3Y4M-ZPw',
  'channel_name': 'HugoDécrypte - Actus du jour',
  'channel_country': 'FR',
  'channel_description': "Chaque jour du lundi au vendredi, sur cette chaîne, un résumé de l'actualité du jour. Bienvenue. 👋",
  'duration': 0.2075,
  'title': 'La réalité sur les menaces qui ciblent la France',
  'publishedAt': '2023-12-04T21:00:12Z'}]

In [8]:
import pandas as pd
pd.DataFrame(videos)

Unnamed: 0,video_id,views,likeCount,CommentCount,channel_subscribers,channel_tracker,channel_name,channel_country,channel_description,duration,title,publishedAt
0,TvvGptmjmuM,235181,7187,787,2640000,UCAcAnMF0OrCtUep3Y4M-ZPw,HugoDécrypte - Actus du jour,FR,"Chaque jour du lundi au vendredi, sur cette ch...",0.2075,La réalité sur les menaces qui ciblent la France,2023-12-04T21:00:12Z


In [9]:
end_datetime = datetime.now(timezone.utc)
start_datetime = end_datetime - timedelta(days=1)
file_name = f'{start_datetime.strftime("%Y-%m-%d")}_{end_datetime.strftime("%Y-%m-%d")}_videos.csv'
pd.DataFrame(videos).to_csv(file_name, index=False)

In [10]:


# method call
project = 'My Fist Project'




#from google.oauth2 import service_account
#redentials = service_account.Credentials.from_service_account_file('../epita-google-cloud-ytb-app/credentials.json')




In [11]:
project_name ='vocal-eon-416516'
bucket_name = 'test-youtube2'
source_file_name = '2024-04-27_2024-04-28_videos.csv'
destination_blob_name = source_file_name

upload_blob(bucket_name, source_file_name, destination_blob_name, project_name, credentials)






NameError: name 'upload_blob' is not defined