In [None]:
import pandas as pd
import numpy as np
import google.generativeai as genai
import json
import textwrap
import re
import os 

file_path = r"E:\ NLP Project\surveyproject\DataFile\research_Work_5\athlete_events.csv"
df = None
try:
    df = pd.read_csv(file_path)
    df.columns = [str(col).strip() for col in df.columns]
    print("DataFrame loaded successfully.")
except FileNotFoundError:
    print(f"Error: The file was not found at the specified path: {file_path}")
    exit()

try:
    genai.configure(api_key="XXXXXXXXXXXXXXXXXX")
except Exception as e:
    print(f"Error configuring Gemini API: {e}")
    exit()

output_filename = 'olympic_analysis.json'
if not os.path.exists(output_filename):
    print("\n'olympic_analysis.json' not found. Running data analysis...")
else:
    print(f"\nFound existing '{output_filename}'. Skipping analysis and proceeding to script generation.")


def generate_interview_script(json_filepath):
    """Reads the JSON fact sheet and asks Gemini to write a podcast script."""
    print("\nReading JSON data to generate the interview script...")
    
    try:
        with open(json_filepath, 'r') as f:
            fact_sheet = json.load(f)
    except FileNotFoundError:
        return "Error: JSON file not found. Please run the analysis first."

    facts_string = json.dumps(fact_sheet, indent=2)

    model = genai.GenerativeModel('gemini-1.5-flash')
    
    prompt = f"""
    You are a creative podcast scriptwriter. Your task is to write an engaging and natural-sounding interview script for a show called "Data Driven Athlete".

    **Characters:**
    - **Host:** Sarah Jenkins (Curious, engaging, and keeps the conversation flowing)
    - **Expert:** Dr. Alex Carter (A knowledgeable sports data scientist who explains the data)

    **Source Material:**
    Use the following list of questions and their data-driven answers as the factual basis for the entire conversation. Do NOT invent new data.

    ```json
    {facts_string}
    ```

    **Instructions:**
    1.  Create a complete script, including a brief introduction by Sarah and a short conclusion.
    2.  Sarah's questions should be conversational rephrasings of the "question" fields from the JSON.
    3.  Dr. Carter's answers should interpret and summarize the "answer" data from the JSON in an easy-to-understand way. He should not just read the raw data. For example, instead of reading a table, he should say "The data shows that..."
    4.  Weave the points together so the conversation flows naturally from one topic to the next.
    5.  The final output MUST be only the script text. Do not include any other explanations.
    6.  Format the script clearly with the character's name followed by a colon (e.g., "Sarah Jenkins:").
    """

    print("Sending facts to Gemini for script generation. This may take a moment...")
    try:
        response = model.generate_content(prompt)
        print("✅ Script generated successfully!")
        return response.text.strip()
    except Exception as e:
        return f"Error during script generation: {e}"

generated_script = generate_interview_script(output_filename)

print("\n" + "="*50)
print("--- GENERATED INTERVIEW SCRIPT ---")
print("="*50 + "\n")
print(generated_script)

script_filename = 'interview_script.txt'
with open(script_filename, 'w') as f:
    f.write(generated_script)

print("\n" + "="*50)
print(f"✅ Final script has been saved to '{script_filename}'.")
print("Next step: Use this script to generate the AI voices!")