# YouTube to Book Summary Converter

## Generates both Book Summary AND Transcript as Word files

Prerequisites:
- **Groq API Key**: https://console.groq.com/
- **YouTube URL** with captions

---


In [None]:
# CELL 1: Install Dependencies
!pip install -q youtube-transcript-api groq python-docx
print('Dependencies installed!')

In [None]:
# CELL 2: Configure API Key
import os

# @title Enter Groq API Key
GROQ_API_KEY = ""  # @param {type:"string"}
os.environ['GROQ_API_KEY'] = GROQ_API_KEY

if GROQ_API_KEY:
    print('API Key configured!')
else:
    print('Please enter your Groq API Key above!')

In [None]:
# CELL 3: Enter YouTube URL
# @title Enter YouTube URL
YOUTUBE_URL = ""  # @param {type:"string"}

if YOUTUBE_URL:
    print(f'Ready to process: {YOUTUBE_URL}')

In [None]:
# CELL 4: Import Libraries
import re
from datetime import datetime
from youtube_transcript_api import YouTubeTranscriptApi
from groq import Groq
from docx import Document
from docx.shared import Pt
from docx.enum.text import WD_ALIGN_PARAGRAPH
from IPython.display import display, Markdown
print('Libraries imported!')

In [None]:
# CELL 5: Extract Transcript
def get_video_id(url):
    match = re.search(r'(?:v=|/)([0-9A-Za-z_-]{11})', url)
    return match.group(1) if match else None

def extract_transcript(url):
    video_id = get_video_id(url)
    if not video_id:
        return None, 'Invalid URL'
    try:
        api = YouTubeTranscriptApi()
        tl = api.list(video_id)
        try:
            t = tl.find_manually_created_transcript(['en'])
        except:
            t = tl.find_generated_transcript(['en'])
        data = t.fetch()
        text = ' '.join([e.text for e in data])
        return text, video_id
    except Exception as e:
        return None, str(e)

print('Functions ready!')

In [None]:
# CELL 6: Generate Summary
def create_summary(text, api_key):
    client = Groq(api_key=api_key)
    prompt = f'''You are an expert author. Create a book-style summary with:
1. Executive Overview (2-3 paragraphs)
2. Introduction with background
3. Chapter-by-Chapter Summary
4. Key Concepts
5. Key Takeaways (numbered)
6. Memorable Quotations (3-5)
7. Practical Applications
8. Critical Analysis
9. Conclusion

TRANSCRIPT:
{text[:15000]}'''
    response = client.chat.completions.create(
        model='llama-3.3-70b-versatile',
        messages=[{'role': 'system', 'content': 'Expert author'},
                  {'role': 'user', 'content': prompt}],
        max_tokens=8000, temperature=0.5)
    return response.choices[0].message.content

print('Summary function ready!')

In [None]:
# CELL 7: Save Word Files
def save_word_files(summary, transcript, video_id, title):
    timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
    safe_title = ''.join(c for c in title if c.isalnum() or c in ' -_').strip()[:40]
    
    # Save Book Summary
    book_file = f'{safe_title}_book_{timestamp}.docx'
    doc = Document()
    
    para = doc.add_heading(title, 0)
    para.alignment = WD_ALIGN_PARAGRAPH.CENTER
    doc.add_paragraph(f'Video ID: {video_id}')
    doc.add_paragraph(f'Generated: {datetime.now().isoformat()}')
    doc.add_paragraph()
    
    sections = summary.split('\\n# ')
    for section in sections:
        if section.strip():
            lines = section.strip().split('\\n')
            section_title = lines[0].replace('#', '').strip()
            if section_title:
                doc.add_heading(section_title, level=1)
            content = '\\n'.join(lines[1:]).strip()
            if content:
                doc.add_paragraph(content)
    
    doc.save(book_file)
    
    # Save Transcript
    trans_file = f'{safe_title}_transcript_{timestamp}.docx'
    doc2 = Document()
    
    para2 = doc2.add_heading(f'Transcript: {title}', 0)
    para2.alignment = WD_ALIGN_PARAGRAPH.CENTER
    doc2.add_paragraph(f'Video ID: {video_id}')
    doc2.add_paragraph(f'Generated: {datetime.now().isoformat()}')
    doc2.add_paragraph()
    
    max_chars = 25000
    for i in range(0, len(transcript), max_chars):
        chunk = transcript[i:i+max_chars]
        if i > 0:
            doc2.add_heading(f'Part {i//max_chars + 1}', level=2)
        doc2.add_paragraph(chunk)
    
    doc2.save(trans_file)
    
    return book_file, trans_file

print('Word file functions ready!')

In [None]:
# CELL 8: RUN EVERYTHING
from google.colab import files

if YOUTUBE_URL and GROQ_API_KEY:
    print('Processing...')
    
    transcript, result = extract_transcript(YOUTUBE_URL)
    
    if isinstance(result, str) and transcript is None:
        print(f'Error: {result}')
    else:
        video_id = result
        print(f'Transcript: {len(transcript)} chars')
        
        summary = create_summary(transcript, GROQ_API_KEY)
        print('Summary generated!')
        
        title = f'YouTube Video {video_id}'
        book_file, trans_file = save_word_files(summary, transcript, video_id, title)
        
        print('\\nüìÅ Files saved:')
        print(f'  - {book_file}')
        print(f'  - {trans_file}')
        
        display(Markdown('# BOOK SUMMARY\\n\\n' + summary))
        
        print('\\nüì• Download files:')
        files.download(book_file)
        files.download(trans_file)
else:
    print('Enter API Key and YouTube URL above!')

---
## Complete!

Two Word files generated:
1. **_book.docx** - AI-generated book summary
2. **_transcript.docx** - Full links above!

 transcript

Download*Powered by Groq LLM*