In [11]:
import os
from dotenv import load_dotenv
from googleapiclient.discovery import build

load_dotenv() #.env dosyasini yüklüyor, API key'leri yüklüyor.

# API Key ve kanal adı
api_key = os.getenv("YOUTUBE_API_KEY")
channel_name = 'TelekomHilfe-Videos'

def get_channel_id(api_key, channel_name):
    youtube = build('youtube', 'v3', developerKey=api_key)
    
    request = youtube.search().list(
        part='snippet',
        q=channel_name,
        type='channel',
        maxResults=1
    )
    response = request.execute()
    
    # Kanal ID'sini döndür
    if response['items']:
        channel_id = response['items'][0]['id']['channelId']
        return channel_id
    else:
        return None

channel_id = get_channel_id(api_key, channel_name)
print(f'Channel ID: {channel_id}')


Channel ID: UC0YoG8q-PWNERhe_h5l8uPw


## Youtube videolarinda transcript varsa onlari aliyor Youtube API'dan, ses üzerinden ilerlemiyor.

In [28]:
import os
import googleapiclient.discovery
from youtube_transcript_api import YouTubeTranscriptApi
from youtube_transcript_api.formatters import SRTFormatter
from pathlib import Path

# API anahtarınızı ve kanal ID'sini girin
API_KEY = os.getenv("YOUTUBE_API_KEY")
CHANNEL_ID = 'UC0YoG8q-PWNERhe_h5l8uPw'
OUTPUT_DIR = 'transcripts'
SUMMARY_FILE = 'summary.txt'

# YouTube Data API istemcisi oluşturma
def get_youtube_service():
    return googleapiclient.discovery.build('youtube', 'v3', developerKey=API_KEY)

# Kanalın tüm video ID'lerini alma
def get_channel_video_ids(youtube, channel_id):
    video_ids = []
    request = youtube.playlistItems().list(
        part='contentDetails,snippet',
        playlistId=f'UU{channel_id[2:]}',  # Kanalın uploads listesinin playlist ID'si
        maxResults=50
    )
    response = request.execute()
    
    while response:
        for item in response['items']:
            video_ids.append({
                'videoId': item['contentDetails']['videoId'],
                'title': item['snippet']['title']
            })
        
        if 'nextPageToken' in response:
            request = youtube.playlistItems().list(
                part='contentDetails,snippet',
                playlistId=f'UU{channel_id[2:]}',
                maxResults=50,
                pageToken=response['nextPageToken']
            )
            response = request.execute()
        else:
            break
    
    return video_ids

# Video ID'sinden transkripti al
def get_video_transcript(video_id):
    try:
        transcript = YouTubeTranscriptApi.get_transcript(video_id, languages=['de'])
        return transcript
    except Exception as e:
        print(f"Error retrieving transcript for video {video_id}: {e}")
        return None

# Transkripti dosyaya yazma
def save_transcript_to_file(video_id, title, transcript):
    if transcript:
        formatter = SRTFormatter()
        srt_transcript = formatter.format_transcript(transcript)
        filename = f"{video_id}.srt"
        file_path = os.path.join(OUTPUT_DIR, filename)
        video_url = f"https://www.youtube.com/watch?v={video_id}"
        Path(OUTPUT_DIR).mkdir(parents=True, exist_ok=True)
        
        with open(file_path, 'w', encoding='utf-8') as f:
            # Başlık ve linki dosyaya ekleyelim
            f.write(f"Title: {title}\n")
            f.write(f"URL: {video_url}\n\n")
            f.write(srt_transcript)
        
        print(f"Saved transcript to {file_path}")
        return True
    return False

# Özet dosyasını yazma
def write_summary(succeeded, failed):
    with open(SUMMARY_FILE, 'w', encoding='utf-8') as f:
        f.write("Transkripti Başarıyla Alınan Videolar:\n")
        f.write("-------------------------------\n")
        for video in succeeded:
            f.write(f"Title: {video['title']}\n")
            f.write(f"URL: https://www.youtube.com/watch?v={video['videoId']}\n\n")
        
        f.write("\nTranskripti Alınamayan Videolar:\n")
        f.write("-------------------------------\n")
        for video in failed:
            f.write(f"Title: {video['title']}\n")
            f.write(f"URL: https://www.youtube.com/watch?v={video['videoId']}\n\n")
        print(f"Summary written to {SUMMARY_FILE}")

# Ana fonksiyon
def main():
    youtube = get_youtube_service()
    video_data = get_channel_video_ids(youtube, CHANNEL_ID)
    
    succeeded = []
    failed = []
    
    for video in video_data:
        transcript = get_video_transcript(video['videoId'])
        if save_transcript_to_file(video['videoId'], video['title'], transcript):
            succeeded.append(video)
        else:
            failed.append(video)
    
    write_summary(succeeded, failed)

if __name__ == '__main__':
    main()


Saved transcript to transcripts/fzFwFvo2o1Q.srt
Error retrieving transcript for video ukYElyDp5jk: 
Could not retrieve a transcript for the video https://www.youtube.com/watch?v=ukYElyDp5jk! This is most likely caused by:

Subtitles are disabled for this video

If you are sure that the described cause is not responsible for this error and that a transcript should be retrievable, please create an issue at https://github.com/jdepoix/youtube-transcript-api/issues. Please add which version of youtube_transcript_api you are using and provide the information needed to replicate the error. Also make sure that there are no open issues which already describe your problem!
Error retrieving transcript for video PXtsOdcjOMg: 
Could not retrieve a transcript for the video https://www.youtube.com/watch?v=PXtsOdcjOMg! This is most likely caused by:

Subtitles are disabled for this video

If you are sure that the described cause is not responsible for this error and that a transcript should be retriev

In [31]:
import whisper
import yt_dlp
import os
from pytube import Channel
from pathlib import Path

# Whisper 'base' modelini yükle
model = whisper.load_model("tiny")

# Kanal ID'si
channel_url = "https://www.youtube.com/@TelekomHilfe-Videos"

# YouTube kanalını indir ve videoları listele
channel = Channel(channel_url)
print(f'Kanal adı: {channel.channel_name}')
print(f'Videolar indiriliyor: {len(channel.video_urls)} adet video bulundu.')

# Videolar için bir klasör oluştur
output_dir = Path("transcripts")
output_dir.mkdir(exist_ok=True)

for url in channel.video_urls:
    print(f"İşlem yapılıyor: {url}")
    
    # Video ID'si alınır
    video_id = url.split("watch?v=")[-1]
    
    # YouTube ses dosyasını indirme ve WAV formatına dönüştürme
    ydl_opts = {
        'format': 'm4a/bestaudio/best',
        'outtmpl': f'{video_id}.%(ext)s',
        'postprocessors': [{  
            'key': 'FFmpegExtractAudio',
            'preferredcodec': 'wav',
        }]
    }

    with yt_dlp.YoutubeDL(ydl_opts) as ydl:
        ydl.download([url])
    
    # WAV dosyasının yolu
    audio_path = Path(f"{video_id}.wav")
    
    # Whisper ile transkript işlemi
    if audio_path.exists():
        # Transkript işlemi için parametreler
        result = model.transcribe(
            str(audio_path),
            language="de",  # Dili otomatik olarak algılar. "en" veya "de" olarak manuel ayarlanabilir.
            task="transcribe",
            temperature=0.0,  # Sıcaklık parametresi
            beam_size=5,  # Işın boyutu
            fp16=False  # Yarı hassasiyetli floating point hesaplama (True yapabilirsiniz)
        )

        # Transkript dosyasını kaydet
        output_formats = ['txt', 'srt']
        for fmt in output_formats:
            output_file = output_dir / f"{video_id}.{fmt}"
            with open(output_file, "w") as f:
                if fmt == 'srt':
                    for i, segment in enumerate(result['segments']):
                        start = segment['start']
                        end = segment['end']
                        text = segment['text']
                        f.write(f"{i+1}\n{start} --> {end}\n{text}\n\n")
                else:
                    f.write(result['text'])
        
        print(f"Transkript tamamlandı: {video_id}")

        # Geçici dosyaları temizle
        os.remove(audio_path)

    else:
        print(f"Ses dosyası bulunamadı: {video_id}")


URLError: <urlopen error [SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed: self-signed certificate in certificate chain (_ssl.c:1000)>