In [None]:
import openai
from openai import OpenAI
import pandas as pd
from datetime import datetime
import os
import openpyxl
from dotenv import load_dotenv
import pandas as pd
import logging
from time import sleep
from anthropic import Anthropic

In [None]:
load_dotenv()
api_key = os.getenv('OPENAI_API_KEY')

client = OpenAI(api_key=api_key)

In [None]:
claude_api_key = os.getenv('CLAUDE_API_KEY')
claude_client = Anthropic(
    api_key=claude_api_key
)

In [None]:
constraints_similar_df = pd.read_csv('../data/50_run1/constraints_similar_split.csv')
print(constraints_similar_df.head())


In [None]:
constraints_dissimilar_df = pd.read_csv('../data/50_run1/constraints_dissimilar_split.csv')
print(constraints_dissimilar_df.head())

In [None]:
# Functions to track API call cost. 

def log_usage(tokens):
    # Get the current date and time
    current_time = datetime.now().strftime("%m-%d-%Y %H:%M:%S")

    # Write the date-time and tokens used to the file
    with open("api_usage.txt", "a") as file:
        file.write(f"{current_time} : {tokens}\n")


def total_usage():
    total_tokens = 0
    with open("api_usage.txt", "r") as file:
        for line in file:
            # Split the line into date-time and tokens
            parts = line.split(" : ")
            if len(parts) == 2:
                _, tokens_str = parts
                tokens = int(tokens_str)
                total_tokens += tokens

    cost = (total_tokens*0.0015)/1000
    print("Total tokens used so far: ", total_tokens)
    print(f"Total cost so far: {cost}$")
    return

In [None]:
system_prompt="""You are an experienced blog writer.
Your task is to write a blog post that fulfills the main writing objective provided by the user."""

user_input = """
Main Task - {}
"""

In [None]:
def chat_fn(instruction, model="gpt-4.1-mini", system_prompt=system_prompt, log=False):
    response = client.chat.completions.create(
        model=model,
        messages=[
            {"role": "system", "content":system_prompt},
            {"role": "user", "content":user_input.format(instruction)},
        ]
    )

    # Un-comment the below 3 lines to track API usage cost. 
    log_usage(tokens=response.usage.total_tokens)
    if log:
       print("Total tokens used: ", response.usage.total_tokens)

    return response

In [None]:


def generate_base_blog(
    df: pd.DataFrame,
    chat_fn,
    system_prompt: str,
    model: str = "gpt-4.1-mini",
    output_path: str = "../data/base_blog.csv",
    retry_attempts: int = 3,
    delay: float = 1.0
):
    """
    Generates a base blog for each main task in `df` using an LLM chat function
    and saves the results as a CSV.

    Parameters:
        df (pd.DataFrame): Input DataFrame with 'main_task' column.
        chat_fn (callable): Function for LLM chat, e.g. chat_fn(prompt, model, system_prompt, log=True).
        system_prompt (str): System-level prompt defining model behavior.
        model (str): LLM model identifier, default 'gpt-4.1-mini'.
        output_path (str): Path for saving the new CSV.
        retry_attempts (int): Number of retries per failed generation.
        delay (float): Delay (seconds) between retries.
    """
    if 'Main Task' not in df.columns:
        raise ValueError("Input DataFrame must contain a 'main_task' column.")

    logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s] %(message)s")

    base_blogs = []

    for idx, row in df.iterrows():
        main_task = row['Main Task']
        instruction_num = row.get('Instruction Number', idx + 1)
        logging.info(f"Processing Instruction #{instruction_num}")

        base_blog = ""

        for attempt in range(1, retry_attempts + 1):
            try:
                # Generate blog from main task
                response = chat_fn(main_task, model=model, system_prompt=system_prompt, log=True)
                base_blog = response.choices[0].message.content.strip()
                break
            except Exception as e:
                logging.warning(f"Attempt {attempt}/{retry_attempts} failed for Instruction #{instruction_num}: {e}")
                if attempt < retry_attempts:
                    sleep(delay)
                else:
                    logging.error(f"Failed to generate base blog for Instruction #{instruction_num} after {retry_attempts} attempts.")

        base_blogs.append(base_blog)

    # Add base_blog column
    df["base_blog"] = base_blogs

    # Save to CSV
    df.to_csv(output_path, index=False)
    logging.info(f"Base blogs saved to {output_path}")

    return df


In [None]:
generate_base_blog(df=constraints_similar_df, output_path="../data/50_run1/base_blog_similar.csv", chat_fn=chat_fn, system_prompt=system_prompt)

In [None]:
generate_base_blog(df=constraints_dissimilar_df, output_path="../data/50_run1/base_blog_dissimilar.csv", chat_fn=chat_fn, system_prompt=system_prompt)

In [None]:
# #Prompt to summarize the blog while keep points intact

# system_prompt = """
# Given the blog post, rewrite a summarized version of the blog post that captures the main points.
# """

# def summarize_blog(df: pd.DataFrame, chat_fn, system_prompt: str, model: str = "gpt-4.1-mini", output_path: str = "../data/trials/summarized_blog.csv", retry_attempts: int = 3, delay: float = 1.0):
#     """
#     Summarizes each blog post in `df` using an LLM chat function
#     and saves the results as a CSV.

#     Parameters:
#         df (pd.DataFrame): Input DataFrame with 'blog' column.
#         chat_fn (callable): Function for LLM chat, e.g. chat_fn(prompt, model, system_prompt, log=True).
#         system_prompt (str): System-level prompt defining model behavior.
#         model (str): LLM model identifier, default 'gpt-4.1-mini'.
#         output_path (str): Path for saving the new CSV.
#     """
#     if 'base_blog' not in df.columns:
#         raise ValueError("Input DataFrame must contain a 'blog' column.")
    
#     summarized_blogs = []
    
    
#     for idx, row in df.iterrows():
#         base_blog = row['base_blog']
#         instruction_num = row.get('Instruction Number', idx + 1)
#         logging.info(f"Processing Instruction #{instruction_num}")
        
#         for attempt in range(1, retry_attempts + 1):
#             try:
#                 response = chat_fn(base_blog, model=model, system_prompt=system_prompt, log=True)
#                 summarized_blog = response.choices[0].message.content.strip()
#                 break
#             except Exception as e:
#                 logging.warning(f"Attempt {attempt}/{retry_attempts} failed for Instruction #{instruction_num}: {e}")
#                 if attempt < retry_attempts:
#                     sleep(delay)
#                 else:
#                     logging.error(f"Failed to summarize blog for Instruction #{instruction_num} after {retry_attempts} attempts.")
                    
#         summarized_blogs.append(summarized_blog)
        
#     df['summarized_blog'] = summarized_blogs
    
#     df.to_csv(output_path, index=False)
#     logging.info(f"Summarized blogs saved to {output_path}")
    
#     return df

# base_blog_df = pd.read_csv('../data/base_blog.csv')
# summarize_blog(df=base_blog_df, chat_fn=chat_fn, system_prompt=system_prompt)
    
