cosa raccogliere:
- video_id
- 'titolo'
- descrizione
- data
- statistiche: visualizzazioni, like, commenti, durata...
          dislikeCount non è più disponibile dal 2021 

commenti:
- nome utente
- testo commento
- data commento

In [None]:
#!pip install google-api-python-client

# Codice finale

In [None]:
from googleapiclient.discovery import build
import json
import time

In [None]:
api_key =  ''    #inserire la propria chiave 
max_post = 600
#max_comment = 1000
youtube = build('youtube', 'v3', developerKey=api_key)

In [None]:
CATEGORY_MAPPING = {
    "1": "Film & Animation",
    "2": "Autos & Vehicles",
    "10": "Music",
    "15": "Pets & Animals",
    "17": "Sports",
    "18": "Short Movies",
    "19": "Travel & Events",
    "20": "Gaming",
    "21": "Videoblogging",
    "22": "People & Blogs",
    "23": "Comedy",
    "24": "Entertainment",
    "25": "News & Politics",
    "26": "Howto & Style",
    "27": "Education",
    "28": "Science & Technology",
    "30": "Movies",
    "31": "Anime/Animation",
    "32": "Action/Adventure",
    "33": "Classics",
    "34": "Comedy",
    "35": "Documentary",
    "36": "Drama",
    "37": "Family",
    "38": "Foreign",
    "39": "Horror",
    "40": "Sci-Fi/Fantasy",
    "41": "Thriller",
    "42": "Shorts",
    "43": "Shows",
    "44": "Trailers"
}

In [None]:
#VARIE FUNZIONI
def get_channel_id_from_handle(handle):
    res = youtube.search().list(
        part='snippet',
        q=handle,
        type='channel',
        maxResults=1
    ).execute()
    return res['items'][0]['snippet']['channelId']

def get_channel_stats(channel_id):
    res = youtube.channels().list(
        part="snippet,statistics",
        id=channel_id
    ).execute()
    
    if res['items']:
        item = res['items'][0]
        return {
            "channel_title": item['snippet']['title'],
            "subscribers": int(item['statistics'].get('subscriberCount', 0))
        }
    return None


def get_video_ids_from_channel(channel_id, max_post):
    video_ids = []
    next_page_token = None
    while len(video_ids) < max_post:
        res = youtube.search().list(
            part='id',
            channelId=channel_id,
            maxResults=50,
            order='date',
            pageToken=next_page_token,
            type='video'
        ).execute()
        for item in res['items']:
            video_ids.append(item['id']['videoId'])
            if len(video_ids) >= max_post:
                break
        next_page_token = res.get('nextPageToken')
        if not next_page_token:
            break
    return video_ids

def get_video_info(video_id):
    res = youtube.videos().list(
        part="snippet,contentDetails,statistics",
        id=video_id
    ).execute()

    if res['items']:
        item = res['items'][0]
        channel_name = item['snippet']['channelTitle']
        return {
            "channel_name": channel_name,
            "video_id": video_id,
            "title": item['snippet']['title'],
            "description": item['snippet']['description'],
            "published_at": item['snippet']['publishedAt'],
            "views": item['statistics'].get('viewCount'),
            "likes": item['statistics'].get('likeCount'),
            "comments_count": item['statistics'].get('commentCount'),
            "duration": item['contentDetails']['duration'],
            "tags": item['snippet'].get('tags', []),
            "categoryId": item['snippet'].get('categoryId')
        }
    return None
    
def get_comments(video_id, total_expected_comments):
    comments = []
    seen_comments = set()
    next_page_token = None

    # Calcola soglia commenti da raccogliere
    if total_expected_comments is None:
        comment_target = 1000
    elif int(total_expected_comments) < 1000:
        comment_target = int(total_expected_comments)
    else:
        comment_target = int(int(total_expected_comments) * 0.55)

    while len(comments) < comment_target:
        try:
            res = youtube.commentThreads().list(
                part="snippet",
                videoId=video_id,
                maxResults=100,
                textFormat="plainText",
                pageToken=next_page_token
            ).execute()

            for item in res['items']:
                snippet = item['snippet']['topLevelComment']['snippet']
                author = snippet['authorDisplayName']
                text = snippet['textDisplay']
                published_at = snippet['publishedAt']

                # Usa una tupla per rilevare duplicati
                key = (author, text, published_at)
                if key not in seen_comments:
                    seen_comments.add(key)
                    comments.append({
                        "author": author,
                        "text": text,
                        "published_at": published_at
                    })

                if len(comments) >= comment_target:
                    break

            next_page_token = res.get('nextPageToken')
            if not next_page_token:
                break
            time.sleep(0.3)

        except Exception as e:
            if "commentsDisabled" in str(e):
                print(f"Commenti disabilitati per il video {video_id}")
                return None
            else:
                print(f"Errore nei commenti per {video_id}: {e}")
                return None

    return comments


In [None]:
channel_handle = 'esempio_canale'   #INSERIRE NOME DEL CANALE - quello dopo la @

In [None]:
request = youtube.search().list(
    part="snippet",
    q=channel_handle,  # Può anche essere l'handle
    type="channel",
    maxResults=1
)
response = request.execute()
channel_id = response['items'][0]['snippet']['channelId']
channel_stats = get_channel_stats(channel_id)
subscriber_count = channel_stats["subscribers"] if channel_stats else 0


video_ids = get_video_ids_from_channel(channel_id, max_post)

output_json = {}

for vid in video_ids:
    try:
        info = get_video_info(vid)
        if info:
            comments = get_comments(vid, info["comments_count"])


            # IGNORA video se commenti disabilitati o vuoti
            if comments is None or len(comments) == 0:
                print(f"Video {vid} ignorato: nessun commento disponibile.")
                continue

            channel_name = info["channel_name"]  # viene comunque dal video, non dal nome iniziale
            if channel_name not in output_json:
                output_json[channel_name] = {
                    "subscribers": subscriber_count,
                    "posts": {}
                }


            post_index = str(len(output_json[channel_name]["posts"]))
            category_name = CATEGORY_MAPPING.get(info['categoryId'], "Unknown")
            
            output_json[channel_name]["posts"][post_index] = {
                "title": info["title"],
                "description": info["description"],
                "post_id": info["video_id"],
                "data": info["published_at"],
                "number_comments": int(info["comments_count"]) if info["comments_count"] else 0,
                "number_likes": int(info["likes"]) if info["likes"] else 0,
                "views": int(info["views"]) if info["views"] else 0,
                "tags": info["tags"],
                "category": category_name,
                "interactions_post": comments
            }

    except Exception as e:
        print(f"Errore con video {vid}: {e}")

# Salva JSON
path = r" "  #PERCORSO PER IL SALVATAGGIO 
file_name = channel_name  # ancora basato su info estratta dal video
with open(path + file_name + ".json", "w", encoding='utf-8') as f:
    json.dump(output_json, f, ensure_ascii=False, indent=2)

print(f"Dati salvati in {channel_name}.json")