In [None]:
!jupyter nbconvert --clear-output

In [None]:
%pip install pypdf
%pip install transformers
%pip install pdfminer.six
%pip install pdf2image
%pip install pytesseract
%pip install poppler

In [None]:
!apt-get install poppler-utils

In [None]:
%pip install huggingface_hub[hf_xet]

In [None]:
from pdfminer.high_level import extract_text
from pdf2image import convert_from_path
import pytesseract

def is_pdf_text_based(pdf_path):
    text = extract_text(pdf_path, maxpages=1)
    return bool(text.strip())

def extract_text_from_pdf(pdf_path, output_path):
    if is_pdf_text_based(pdf_path):
        print("PDF has selectable text. Using pdfminer.")
        text = extract_text(pdf_path)
        with open(output_path, "w", encoding="utf-8") as f:
            f.write(text)
    else:
        print("PDF appears to be scanned. Using OCR.")
        images = convert_from_path(pdf_path)
        with open(output_path, "w", encoding="utf-8") as f:
            for i, img in enumerate(images):
                text = pytesseract.image_to_string(img)
                f.write(f"\n--- Page {i+1} ---\n{text}")

# Example
extract_text_from_pdf('/content/Ikigai.pdf', "final_output.txt")

In [None]:
def load_book_text(file_path):
    with open(file_path, "r", encoding="utf-8") as f:
        text = f.read()
    return text

def chunk_text(text, max_words=800):
    words = text.split()
    chunks = []
    for i in range(0, len(words), max_words):
        chunk = " ".join(words[i:i + max_words])
        chunks.append(chunk)
    return chunks

# Load and chunk the text
book_text = load_book_text("final_output.txt")
text_chunks = chunk_text(book_text, max_words=1000)

print(f" Total chunks created: {len(text_chunks)}")
print(f"\n First 300 characters of chunk 1:\n{text_chunks[0][:300]}")

In [None]:
  import re

  def load_clean_text(file_path):
      with open(file_path, "r", encoding="utf-8") as f:
          text = f.read()
      # Remove OCR page markers like "--- Page 1 ---"
      cleaned_text = re.sub(r"\n*--- Page \d+ ---\n*", "\n", text)
      return cleaned_text

  # Load and clean
  cleaned_book_text = load_clean_text("final_output.txt")

  # Then chunk
  text_chunks = chunk_text(cleaned_book_text, max_words=1000)

  print(f"✅ Total cleaned chunks: {len(text_chunks)}")
  print(f"\n🔹 First 300 characters of cleaned chunk 1:\n{text_chunks[0][:300]}")

In [None]:
import torch
from transformers import pipeline, AutoTokenizer, AutoModelForSeq2SeqLM

# Use GPU if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model_name = "facebook/bart-large-cnn"

# Load tokenizer and model
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSeq2SeqLM.from_pretrained(model_name).to(device)

# Summarization function without pipeline (manual for more control)
def safe_summarize(text, max_input_tokens=1024, max_output_tokens=600, min_output_tokens=300):
    # Tokenize and move to GPU
    inputs = tokenizer(text, return_tensors="pt", truncation=True, max_length=max_input_tokens).to(device)
    input_ids = inputs["input_ids"]

    # Generate summary
    summary_ids = model.generate(
        input_ids,
        max_length=max_output_tokens,
        min_length=min_output_tokens,
        do_sample=False,
        no_repeat_ngram_size=2,
        early_stopping=True
    )
    return tokenizer.decode(summary_ids[0], skip_special_tokens=True)

# Summarize all text chunks
summaries = []
for i, chunk in enumerate(text_chunks):
    print(f"⏳ Summarizing chunk {i+1}/{len(text_chunks)}...")
    try:
        summary = safe_summarize(chunk)
        summaries.append(summary)
    except Exception as e:
        print(f" Chunk {i+1} failed: {e}")
        summaries.append("")

# Save summaries
with open("summarized_output.txt", "w", encoding="utf-8") as f:
    for i, summary in enumerate(summaries, start=1):
        f.write(f"Summary {i}:\n{summary}\n\n")

print(" All done! Summaries saved to 'summarized_output1.txt'.")


In [None]:
!pip install colab-xterm
%load_ext colabxterm

In [None]:
%xterm

In [None]:
!ollama serve
!ollama pull mistral
!ollama generate "Hello, world!"

In [None]:
import requests
import json

def prompt_ollama_for_dialogue(summary_text):
    prompt = f"""Turn the following book summary into a podcast-style dialogue between two co-hosts, Host 1 and Host 2.
Keep the tone conversational, engaging, and informative. Host 1 can lead with a topic, and Host 2 can ask questions,
react, or add insights. Avoid reading like a summary — make it sound like a natural discussion.

---
"{summary_text}"
"""

    response = requests.post(
        'http://localhost:11434/api/generate',
        json={
            "model": "mistral",
            "prompt": prompt,
            "stream": False
        }
    )

    if response.status_code == 200:
        return response.json()['response'].strip()
    else:
        print("Error from Ollama:", response.text)
        return ""

def generate_podcast_dialogue(input_file='summarized_output.txt', output_file='podcast_dialogue_output.txt'):
    with open(input_file, 'r', encoding='utf-8') as infile:
        paragraphs = infile.read().split("\n\n")  # split multiple summaries if any

    all_dialogues = []

    for i, para in enumerate(paragraphs):
        if para.strip():
            print(f"Generating dialogue for section {i + 1}...")
            dialogue = prompt_ollama_for_dialogue(para.strip())
            all_dialogues.append(f"--- Dialogue {i + 1} ---\n{dialogue}\n")

    with open(output_file, 'w', encoding='utf-8') as outfile:
        outfile.write("\n".join(all_dialogues))

    print(f"\n Podcast dialogues saved to {output_file}")

# Run the generator
generate_podcast_dialogue()

In [None]:
!pip install edge-tts pydub


In [None]:
from pydub import AudioSegment
import asyncio
import edge_tts
import os

# Read uploaded dialogue
dialogue_lines = []
with open("/content/podcast_dialogue_output.txt", "r", encoding="utf-8") as f:
    for line in f:
        if line.startswith("Host 1:"):
            text = line.replace("Host 1:", "").strip()
            dialogue_lines.append(("en-US-GuyNeural", text))  # Voice A
        elif line.startswith("Host 2:"):
            text = line.replace("Host 2:", "").strip()
            dialogue_lines.append(("en-US-JennyNeural", text))  # Voice B

os.makedirs("edge_tts_audio", exist_ok=True)


In [None]:
dialogue_lines = []
with open("/content/podcast_dialogue_output.txt", "r", encoding="utf-8") as f:
    for line in f:
        line = line.strip()
        if not line or ":" not in line:
            continue  # skip empty/bad lines
        if line.startswith("Host 1:"):
            text = line.replace("Host 1:", "").strip()
            if text:
                dialogue_lines.append(("en-US-GuyNeural", text))
        elif line.startswith("Host 2:"):
            text = line.replace("Host 2:", "").strip()
            if text:
                dialogue_lines.append(("en-US-JennyNeural", text))


In [None]:
print(f"Loaded {len(dialogue_lines)} lines for TTS.")

In [None]:
async def generate_tts():
    tasks = []
    for i, (voice, text) in enumerate(dialogue_lines):
        filename = f"edge_tts_audio/line_{i+1:03d}.mp3"
        communicate = edge_tts.Communicate(text=text, voice=voice)
        task = communicate.save(filename)
        tasks.append(task)
    await asyncio.gather(*tasks)

await generate_tts()


In [None]:
# Combine into single audio
final_audio = AudioSegment.empty()
for i in range(len(dialogue_lines)):
    clip = AudioSegment.from_file(f"edge_tts_audio/line_{i+1:03d}.mp3")
    final_audio += clip + AudioSegment.silent(duration=300)

final_audio.export("final_podcast_edge_tts.mp3", format="mp3")
print(" Final podcast saved as final_podcast_edge_tts.mp3")