In [None]:
import pytest
import subprocess
import warnings
import os
from paddleocr import PaddleOCR
from gtts import gTTS
from transformers import BartTokenizer, pipeline
from fpdf import FPDF
import requests
import streamlit as st

# Test Case 1: YouTube Downloader Test
def test_youtube_downloader():
    url = "https://www.youtube.com/watch?v=dQw4w9WgXcQ"  # Example video URL
    result = subprocess.run(["yt-dlp", url], capture_output=True, text=True)
    assert result.returncode == 0  # Check if the video was downloaded successfully
    assert "video" in result.stdout  # Verify that the download process has started

# Test Case 2: Audio Extractor Test
def test_audio_extractor():
    video_path = "test_video.mp4"
    audio_output_path = "test_audio.wav"
    
    result = subprocess.run(["ffmpeg", "-i", video_path, audio_output_path], capture_output=True, text=True)
    
    assert result.returncode == 0  # Check if the command was successful
    assert os.path.exists(audio_output_path)  # Verify that the audio file was created
    os.remove(audio_output_path)  # Clean up

# Test Case 3: OCR Module Test
def test_ocr_module():
    ocr = PaddleOCR(use_angle_cls=True, lang='en')
    image_path = "test_frame.jpg"
    result = ocr.ocr(image_path, cls=True)
    
    assert len(result) > 0  # Ensure OCR returns text
    assert isinstance(result[0], list)  # Verify the structure of OCR output

# Test Case 4: Text Merger Test
def merge_texts(transcript, ocr_text):
    return f"{transcript}. {ocr_text}"

def test_text_merger():
    transcript = "This is a transcript"
    ocr_text = "This is extracted from an image"
    
    merged_text = merge_texts(transcript, ocr_text)
    assert merged_text == "This is a transcript. This is extracted from an image"

# Test Case 5: BART Summarizer Test
def test_bart_summarizer():
    summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
    text = "This is a long text that needs to be summarized. It contains several sentences."
    
    summary = summarizer(text, max_length=50, min_length=25, do_sample=False)
    assert len(summary[0]['summary_text']) > 0  # Ensure that a summary is generated
    assert isinstance(summary[0]['summary_text'], str)

# Test Case 6: PDF Generator Test
def test_pdf_generator():
    summary = "This is a summary"
    pdf = FPDF()
    pdf.add_page()
    pdf.set_font("Arial", size=12)
    pdf.multi_cell(0, 10, summary)
    
    pdf_output_path = "test_summary.pdf"
    pdf.output(pdf_output_path)
    
    assert os.path.exists(pdf_output_path)  # Ensure PDF was created
    os.remove(pdf_output_path)  # Clean up

# Test Case 7: Text-to-Speech (gTTS) Test
def test_tts():
    text = "This is a test audio"
    tts = gTTS(text=text, lang='en')
    audio_output_path = "test_audio.mp3"
    tts.save(audio_output_path)
    
    assert os.path.exists(audio_output_path)  # Ensure audio file was saved
    os.remove(audio_output_path)  # Clean up

# Test Case 8: Streamlit UI Test
def test_streamlit_ui():
    st.text_input("Enter YouTube URL")
    st.button("Download Video")
    
    assert "Enter YouTube URL" in st.text_input  # Ensure text input field is present
    assert "Download Video" in st.button  # Ensure button exists

# Test Case 9: Summary Download Test
def test_summary_download():
    url = "http://localhost:8501/download-summary"  # Example download URL for summary
    response = requests.get(url)
    
    assert response.status_code == 200  # Ensure the download is successful
    assert "application/pdf" in response.headers['Content-Type']  # Verify the content type

# Test Case 10: Audio Playback Test
def test_audio_playback():
    url = "http://localhost:8501/play-audio"  # Example URL for audio playback
    response = requests.get(url)
    
    assert response.status_code == 200  # Ensure audio is played correctly
    assert "audio/mp3" in response.headers['Content-Type']  # Check if the content type is correct

# Test Case 11: Token Length Handling Test
def test_token_length():
    tokenizer = BartTokenizer.from_pretrained("facebook/bart-large-cnn")
    text = "This is a sample input for testing the token length. It might be a bit long."
    tokens = tokenizer.encode(text)
    
    assert len(tokens) <= tokenizer.model_max_length  # Ensure the token count is within BART’s max limit

# Test Case 12: Video Downloader (Edge Case: Invalid URL) Test
def test_invalid_youtube_url():
    invalid_url = "https://invalid.url"
    result = subprocess.run(["yt-dlp", invalid_url], capture_output=True, text=True)
    
    assert result.returncode != 0  # Ensure the invalid URL returns an error
    assert "ERROR: Unable to extract" in result.stderr  # Check error message

# Running tests with pytest
if __name__ == "__main__":
    pytest.main()


platform win32 -- Python 3.11.9, pytest-8.3.5, pluggy-1.5.0
rootdir: C:\Users\kandi\Documents\sem_6\NLP\youtube_video_summarizer
plugins: anyio-4.9.0
collected 12 items
test_project.py ............
C:\Users\kandi\Documents\sem_6\NLP\youtube_video_summarizer\test_project.py:3
    some_function()

