In [None]:
!pip install PyPDF2 gtts requests transformers datasets torch moviepy pillow paddleocr paddlepaddle llama-cpp-python[server] llama-cpp-python pdf2image edge_tts


Collecting PyPDF2
  Downloading pypdf2-3.0.1-py3-none-any.whl.metadata (6.8 kB)
Collecting gtts
  Downloading gTTS-2.5.4-py3-none-any.whl.metadata (4.1 kB)
Collecting datasets
  Downloading datasets-3.2.0-py3-none-any.whl.metadata (20 kB)
Collecting paddleocr
  Downloading paddleocr-2.9.1-py3-none-any.whl.metadata (8.5 kB)
Collecting paddlepaddle
  Downloading paddlepaddle-2.6.2-cp311-cp311-manylinux1_x86_64.whl.metadata (8.6 kB)
Collecting llama-cpp-python
  Downloading llama_cpp_python-0.3.7.tar.gz (66.7 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m66.7/66.7 MB[0m [31m10.5 MB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Installing backend dependencies ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Collecting pdf2image
  Downloading pdf2image-1.17.0-py3-none-any.whl.metadata (6.2 kB)
Collecting edge_tts
  Downloading edge_tts-7

In [None]:
import os
import PyPDF2
from gtts import gTTS
from transformers import AutoTokenizer
from moviepy.editor import ImageClip, CompositeVideoClip, AudioFileClip
from PIL import Image, ImageDraw, ImageFont
import paddleocr
from llama_cpp import Llama

# Set the path to your LLaMA model
model_path = "llama-3.2-1b-instruct-q8_0.gguf"  # Update this path
llm = Llama(
        model_path="llama-3.2-1b-instruct-q8_0.gguf",  # Use academic-optimized model if available
        n_ctx=2048  # Larger context window for academic text
    )

In [None]:
import os
import PyPDF2
import paddleocr
from gtts import gTTS
from PIL import Image, ImageDraw, ImageFont
from moviepy.editor import concatenate_audioclips, AudioFileClip
from llama_cpp import Llama
from pdf2image import convert_from_path
from collections import defaultdict

# Enhanced configuration with multiple voice profiles per role
ROLES = {
    "Scientist": {
        "variants": [
            {"tld": "com", "speed": False},  # American English
            {"tld": "co.uk", "speed": False}  # British English
        ]
    },
    "Historian": {
        "variants": [
            {"tld": "co.uk", "speed": False},
            {"tld": "com.au", "speed": True}
        ]
    },
    "Philosopher": {
        "variants": [
            {"tld": "com.au", "speed": True},
            {"tld": "fr", "speed": False}  # French-accented English
        ]
    }
}

def extract_text(pdf_path):
    """Hybrid text extraction for research papers"""
    text = ""
    try:
        with open(pdf_path, "rb") as f:
            reader = PyPDF2.PdfReader(f)
            for page in reader.pages:
                text += page.extract_text() or ""

        if len(text) < 1000:
            images = convert_from_path(pdf_path)
            ocr = paddleocr.PaddleOCR()
            for i, img in enumerate(images):
                result = ocr.ocr(img)
                text += "\n".join([line[-1][0] for line in result[0]])

    except Exception as e:
        print(f"Extraction error: {str(e)}")
    return text

def generate_summary(text, llm):
    """Generate paper summary using LLM"""
    prompt = f"Summarize this research paper in 3 key points:\n{text[:3000]}"
    response = llm(prompt, max_tokens=512)
    return response['choices'][0]['text']

def generate_dialogue(text, selected_roles, llm):
    """Generate interactive dialogue between participants"""
    # Create unique participant IDs
    role_counts = defaultdict(int)
    participants = []
    for role in selected_roles:
        role_counts[role] += 1
        participants.append(f"{role} {role_counts[role]}" if role_counts[role] > 1 else role)

    prompt = f"""Generate a natural conversation between {len(participants)} academics discussing this research paper:
    Paper content: {text[:2000]}

    Participants: {", ".join(participants)}
    Requirements:
    - Alternate speakers naturally
    - Include questions and responses
    - Show different perspectives
    - 6-8 exchanges total
    - Format as "[Speaker]: [Message]" exactly"""

    response = llm(prompt, max_tokens=1024)
    return response['choices'][0]['text']

def create_audio(script, selected_roles):
    """Generate multi-voice audio with distinct voices for same roles"""
    clips = []
    temp_files = []
    role_instances = defaultdict(int)

    for line in script.split('\n'):
        if ':' in line:
            speaker, content = line.split(':', 1)
            speaker = speaker.strip()
            base_role = speaker.split(' ')[0]

            # Get voice configuration
            role_config = ROLES.get(base_role, {"variants": [{"tld": "com", "speed": False}]})
            instance_num = role_instances[base_role]
            variant = role_config["variants"][instance_num % len(role_config["variants"])]

            # Generate speech
            tts = gTTS(text=content.strip(), lang='en',
                      tld=variant["tld"], slow=variant["speed"])
            temp_file = f"temp_{speaker.replace(' ', '_')}.mp3"
            tts.save(temp_file)
            temp_files.append(temp_file)
            clips.append(AudioFileClip(temp_file))

            role_instances[base_role] += 1

    # Combine audio clips
    if clips:
        final_audio = concatenate_audioclips(clips)
        final_audio.write_audiofile("dialogue.mp3")

    # Cleanup
    for f in temp_files:
        try:
            os.remove(f)
        except:
            pass

def create_summary_visual(summary):
    """Generate summary visualization"""
    img = Image.new("RGB", (1920, 1080), "white")
    draw = ImageDraw.Draw(img)

    try:
        font = ImageFont.truetype("arial.ttf", 24)
    except:
        font = ImageFont.load_default()

    y = 50
    for line in summary.split('\n'):
        draw.text((50, y), line, font=font, fill="black")
        y += 30

    img.save("summary_visual.png")

def research_paper_processor(pdf_path, selected_roles):
    """Main processing pipeline"""
    # Process paper
    text = extract_text(pdf_path)
    summary = generate_summary(text, llm)
    dialogue = generate_dialogue(text, selected_roles, llm)

    # Generate outputs
    create_audio(dialogue, selected_roles)
    create_summary_visual(summary)

    return summary, dialogue

# Usage Example
selected_roles = ["Scientist", "Scientist"]  # Two scientists conversing
pdf_path = "3-s2.0-B9781843346401500089-main.pdf"

summary, dialogue = research_paper_processor(pdf_path, selected_roles)
print("Summary for visuals:\n", summary)
print("\nDialogue script:\n", dialogue)