In [None]:
# -------------------------------
# Base GT - ChatGPT
# -------------------------------

import os
import glob
import csv
import openai
import json
import sys
import time

openai.api_key = "placeholder"  # Replace with your actual API key

MAX_RETRIES = 3  # Number of retries on API failure

def get_gpt4_response(prompt, model="gpt-4o", temperature=1, max_tokens=10000):
    """
    Sends a prompt to GPT-4o and returns the response content or raises an Exception.
    """
    #print("\n[GPT-4o] Sending prompt...")
    response = openai.ChatCompletion.create(
        model=model,
        messages=[
            {"role": "system", "content": "You are a helpful assistant."},
            {"role": "user", "content": prompt}
        ],
        temperature=temperature,
        max_tokens=max_tokens,
    )
    content = response['choices'][0]['message']['content'].strip()
    #print("[GPT-4o] Received response:")
    #print(content)
    return content

def process_csv_files(folder_path, output_json_file):
    """
    Process all CSV files in a folder. For each CSV:
      1) Read the "Description" column in order.
      2) Construct a single prompt.
      3) Call GPT-4o with retries on error.
      4) Save partial progress in a JSON file so the script can be resumed.
    """
    # 1) Load existing results (if any) so we can resume from partial progress.
    if os.path.exists(output_json_file):
        with open(output_json_file, "r", encoding="utf-8") as jf:
            try:
                results = json.load(jf)
            except json.JSONDecodeError:
                # If the file is empty or invalid, start with an empty dictionary
                results = {}
        print(f"Loaded existing results from '{output_json_file}'.")
    else:
        results = {}
        print(f"No existing '{output_json_file}' found. Starting fresh.")
    
    # 2) Get all CSV files
    csv_files = sorted(glob.glob(os.path.join(folder_path, "*.csv")))
    print(f"Found {len(csv_files)} CSV file(s) in folder '{folder_path}'.")

    # 3) Process each CSV file
    for index, csv_file in enumerate(csv_files, start=1):
        # Check if we already have a result for this index (resume logic)
        if str(index) in results:
            print(f"Skipping file {index} ('{csv_file}') - already in JSON results.")
            continue
        
        #print(f"\nProcessing file {index}: '{csv_file}'")
        descriptions = []

        # Read the CSV and extract the "Description" column
        with open(csv_file, newline='', encoding='utf-8') as f:
            # Adjust delimiter if needed ("," or "\t", etc.)
            reader = csv.DictReader(f, delimiter=",")
            for row_number, row in enumerate(reader, start=1):
                if "Description" in row:
                    description = row["Description"]
                    #print(f"File {index}, Row {row_number} - Description: {description}")
                    descriptions.append(description)
                else:
                    print(
                        f"Warning: 'Description' column not found in row {row_number} of {csv_file}"
                    )

        # Build the prompt
        prompt = (
            "I have a list of scenes describing a movie step by step. "
            "Please transform them into a single paragraph, preserving the chronological order. "
            "Include every detail from each scene without adding or omitting any information. "
            "Use only straightforward, factual wording, and avoid any new or descriptive language beyond what is provided in the scene notes.\n\n"
            "Input: \tDescription\n"
        )
        for i, desc in enumerate(descriptions, start=1):
            prompt += f"{i}\t{desc}\n"

        #print(f"\nConstructed prompt for file {index}:\n{prompt}")

        # 4) Call GPT-4o with retries
        attempt = 0
        while attempt < MAX_RETRIES:
            try:
                response = get_gpt4_response(prompt)
                # If we get here, we have a successful response
                break
            except Exception as e:
                attempt += 1
                #print(f"Error on attempt {attempt} for file {index}: {e}")
                if attempt < MAX_RETRIES:
                    #print(f"Retrying (attempt {attempt+1}/{MAX_RETRIES})...")
                    time.sleep(3)  # short delay before retry
                else:
                    # Exceeded max retries, save partial progress and exit
                    #print("Max retries exceeded. Saving partial progress and exiting.")
                    results[str(index)] = f"Error after {MAX_RETRIES} attempts: {e}"
                    with open(output_json_file, "w", encoding='utf-8') as json_file:
                        json.dump(results, json_file, indent=4, ensure_ascii=False)
                    sys.exit(1)

        # 5) Store result in dictionary
        results[str(index)] = response
        #print(f"[File {index}] Stored GPT-4o response in results dictionary.")

        # 6) Save partial progress to JSON after each file
        with open(output_json_file, "w", encoding='utf-8') as json_file:
            json.dump(results, json_file, indent=4, ensure_ascii=False)
        print(f"Progress saved after file {index}.")

    #print(f"\nAll CSV files processed. Final results have been saved to '{output_json_file}'.")

if __name__ == "__main__":
    folder_path = "/home/jacks.local/hdubey/VADv6/experiment/rawGT3"  # Update to your folder
    output_json_file = "GT.json"

    print("Starting CSV processing and GPT-4o querying...")
    process_csv_files(folder_path, output_json_file)
    print("Processing complete.")