In [None]:
import openai
from openai import OpenAI
import os
import csv
import tiktoken
import pandas as pd

GPT_API_KEY = 'YOUR_API_KEY_HERE'
os.environ['OPENAI_API_KEY'] = GPT_API_KEY
openai.api_key = GPT_API_KEY
client = OpenAI()

language = "Hindi"

data = pd.read_csv(f"{language}_Reference_Summary.csv")
tokenizer = tiktoken.get_encoding('cl100k_base')

def generate_summary(text, comments):
    # Ensure the text is a string
    if not isinstance(text, str):
        return None
    
    # Prepare the messages list
    messages = [
        {
            "role": "system",
            "content": "You are an expert news editor. Your primary task is to create headlines that accurately reflect the main content of news articles. User comments should be considered as supplementary information only."
        },
        {
            "role": "user",
            "content": f"""Craft a precise and engaging news headline in {language} that primarily summarizes the main point of the article. Use insights from user comments only as secondary, supplementary information. Follow these guidelines:

            1. Focus predominantly (about 80-90%) on the key information and main message from the article.
            2. If relevant, subtly incorporate a minor element (about 10-20%) from the user comments to reflect public sentiment or add context.
            3. Keep the headline concise, ideally under 15 words.
            4. Use active voice and clear, impactful language.
            5. Ensure the headline is factual and directly related to the article's main content.
            6. Avoid overemphasizing or sensationalizing elements from user comments.

            Text content: {text}

            User comments: {comments}"""
        }
    ]
    
    # Call the model with the prepared messages
    try:
        completion = client.chat.completions.create(
            model="gpt-4o-mini",
            messages=messages
        )
        
        # Retrieve the generated summary
        summary = completion.choices[0].message.content
        print("summary_generated")
        return summary
    except Exception as e:
        print(f"Error generating summary: {e}")
        return None

# Main loop to process the data
with open(f'{language}_Summary_With_Comments.csv', 'w', newline='', encoding='utf-8') as f:
    writer = csv.writer(f)
    
    # Write the header
    writer.writerow(data.columns.tolist() + ['Generated Summary'])
    
    # Iterate through the DataFrame rows with a simple counter
    total_rows = len(data)
    for index, row in enumerate(data.iterrows(), 1):
        # Generate summary, now including error handling for user comments
        summary = generate_summary(row[1]['Content'], row[1].get('cleaned_comments', ''))
        
        # Prepare the row data
        row_data = row[1].tolist() + [summary]
        
        # Write the row to the CSV file
        writer.writerow(row_data)
        
        # Flush the file to ensure it's written immediately
        f.flush()
        
        # Print progress
        print(f"Processed article {index} of {total_rows}")

print(f"Summaries generated and saved to '{language}_Summary_With_Comments.csv'")