In [None]:
import json
from youtube_transcript_api import YouTubeTranscriptApi
import os

STATE_FILE = 'state.json'
LINKS_FILE = 'links.json'
LINKS_TXT_FILE = 'video18_links.txt'  # Name of your .txt file containing YouTube links
TRANSCRIPTS_FOLDER = 'youtube_transcript'  # Name of the folder to store transcripts

def load_state():
    if os.path.exists(STATE_FILE):
        with open(STATE_FILE, 'r') as f:
            return json.load(f)
    return {'last_index': 0}

def save_state(state):
    with open(STATE_FILE, 'w') as f:
        json.dump(state, f, indent=4)

def load_links():
    if os.path.exists(LINKS_FILE):
        with open(LINKS_FILE, 'r') as f:
            return json.load(f)
    return {}

def save_links(links):
    with open(LINKS_FILE, 'w') as f:
        json.dump(links, f, indent=4)

def get_youtube_transcript(video_id, language='hi'):
    try:
        transcript = YouTubeTranscriptApi.get_transcript(video_id, languages=[language])
        return transcript
    except Exception as e:
        print(f"Error: not able to fetch")
        return None

def save_transcript_to_file(transcript, filename):
    with open(filename, 'w', encoding='utf-8') as file:
        for entry in transcript:
            file.write(f"{entry['text']}\n")

def load_links_from_txt(file_path):
    """Load YouTube video links from a text file."""
    if not os.path.exists(file_path):
        print(f"Error: {file_path} not found.")
        return []

    with open(file_path, 'r') as f:
        links = [line.strip() for line in f.readlines() if line.strip()]
    return links

def main(links):
    state = load_state()
    links_dict = load_links()

    last_index = state['last_index']

    # Create the transcripts folder if it doesn't exist
    if not os.path.exists(TRANSCRIPTS_FOLDER):
        os.makedirs(TRANSCRIPTS_FOLDER)

    if links:  # Check if there are any links to process
        for idx, url in enumerate(links, start=last_index + 1):
            video_id = url.split('v=')[-1]
            transcript = get_youtube_transcript(video_id)

            if transcript:
                # Save the transcript in the youtube_transcript folder
                filename = os.path.join(TRANSCRIPTS_FOLDER, f'transcript_yt{idx}.txt')
                save_transcript_to_file(transcript, filename)
                print(f"Transcript for {url} saved to {filename}")
                links_dict[f'yt{idx}'] = url
            else:
                print(f"Could not fetch the transcript for {url}")
    else:
        print("No links to process.")

    # Save the last index only if we processed at least one link
    state['last_index'] = idx
    save_state(state)
    save_links(links_dict)

if __name__ == "__main__":
    # Load YouTube video links from a text file
    new_links = load_links_from_txt('video18_links.txt')

    # If links are loaded, process them
    if new_links:
        main(new_links)
    else:
        print("No links found in the text file.")
