In [None]:
from openai import OpenAI

client = OpenAI(api_key="Your-OpenAI_API_Key")
import subprocess
import os
import pandas as pd
import requests

# Function to extract audio from YouTube
def extract_audio_from_youtube(video_url, output_file="audio.mp3"):
    """Extracts audio from a YouTube video."""
    command = [
        "yt-dlp",
        "-f", "bestaudio",
        "-o", output_file,
        video_url
    ]
    subprocess.run(command, check=True)
    return output_file

# Function to transcribe audio using OpenAI Whisper API (new API)
def transcribe_audio_with_openai(audio_file):
    """Transcribes audio using OpenAI's new API."""
    url = "https://api.openai.com/v1/audio/transcriptions"
    headers = {
        "Authorization": f"Bearer {client.api_key}"
    }
    with open(audio_file, "rb") as file:
        files = {
            "file": file,
            "model": (None, "whisper-1")
        }
        response = requests.post(url, headers=headers, files=files)
        if response.status_code == 200:
            return response.json().get("text", "")  # Ensure "text" key exists
        else:
            raise RuntimeError(f"Transcription failed: {response.json()}")

# Function to analyze transcript using OpenAI GPT
def analyze_transcript_with_gpt(transcript):
    """Uses OpenAI GPT to generate SEO title, description, keywords, and full optimized description."""
    try:
        prompt = f"""
        You are an SEO expert. Based on the following video transcript, generate:
        1. An SEO-optimized title (under 60 characters).
        2. A meta description (under 160 characters).
        3. A list of relevant keywords.
        4. A full description optimized for SEO.

        Transcript: {transcript}
        """
        response = client.chat.completions.create(
            model="gpt-4",
            messages=[
                {"role": "system", "content": "You are an expert in SEO and video content analysis."},
                {"role": "user", "content": prompt}
            ])
        raw_output = response.choices[0].message.content
        return raw_output  # Return the entire raw output as a single string
    except Exception as e:
        raise RuntimeError(f"GPT analysis failed: {e}")

# Main function to process videos
def process_videos(input_excel, output_excel):
    """Processes a list of YouTube videos and saves SEO analysis to an Excel file."""
    # Load video URLs from Excel
    df = pd.read_excel(input_excel)
    if "YouTube Video URL" not in df.columns:
        raise ValueError("Input Excel must have a 'YouTube Video URL' column.")

    results = []

    for index, row in df.iterrows():
        video_url = row["YouTube Video URL"]
        try:
            print(f"Processing video: {video_url}")

            # Step 1: Extract audio
            audio_file = extract_audio_from_youtube(video_url)

            # Step 2: Transcribe audio
            transcript = transcribe_audio_with_openai(audio_file)
            if not transcript:
                print(f"Warning: Empty transcript for video {video_url}")
                raise ValueError("Empty transcript")

            # Step 3: Analyze transcript with GPT
            combined_output = analyze_transcript_with_gpt(transcript)

            # Append results
            results.append({
                "Page URL": row.get("Page URL", "N/A"),
                "Page Title": row.get("Page Title", "N/A"),
                "Meta description": row.get("Meta description", "N/A"),
                "YouTube Video URL": video_url,
                "SEO Analysis": combined_output
            })
        except Exception as e:
            print(f"Error processing {video_url}: {e}")
            results.append({
                "Page URL": row.get("Page URL", "N/A"),
                "Page Title": row.get("Page Title", "N/A"),
                "Meta description": row.get("Meta description", "N/A"),
                "YouTube Video URL": video_url,
                "SEO Analysis": f"Error: {e}"
            })
        finally:
            # Cleanup: Delete temporary audio file
            if os.path.exists(audio_file):
                os.remove(audio_file)

    # Save results to Excel
    results_df = pd.DataFrame(results)
    results_df.to_excel(output_excel, index=False)
    print(f"Results saved to {output_excel}")

# Run the script
input_excel = "input_videos.xlsx"  # Input file containing video URLs and metadata
output_excel = "video_analysis_results.xlsx"  # Output file for results
process_videos(input_excel, output_excel)
