In [1]:
import openai
from openai import OpenAI
import os
import pandas as pd
import tiktoken
import csv

GPT_API_KEY = 'YOUR_API_KEY_HERE'
os.environ['OPENAI_API_KEY'] = GPT_API_KEY
openai.api_key = GPT_API_KEY
client = OpenAI()

tokenizer = tiktoken.get_encoding('cl100k_base')

In [2]:
language = "Hindi"

# Read both CSV files
relevant_comments_data = pd.read_csv(f"{language}_Relevant_Comments.csv")
reference_summary_data = pd.read_csv(f"{language}_Reference_Summary.csv")

def generate_summary(text, comments, img_url):
    # Ensure the text is a string
    if not isinstance(text, str):
        return None
    
    # Prepare the messages for the API call
    messages = [
        {
            "role": "system",
            "content": "You are an expert news editor. Your primary task is to create summaries that accurately reflect the main content of news articles and images. User comments should be considered as supplementary information only."
        },
        {
            "role": "user",
            "content": [
                {
                    "type": "text",
                    "text": f"""Craft a precise and engaging news summary in {language} that primarily summarizes the main point of the article or image. Use insights from user comments only as secondary, supplementary information. Follow these guidelines:

1. Focus predominantly (about 80-90%) on the key information and main message from the article or image.
2. If relevant, subtly incorporate a minor element (about 10-20%) from the user comments to reflect public sentiment or add context.
3. Use active voice and clear, impactful language.
4. Ensure the summary is factual and directly related to the article's main content or the key elements of the image.
5. Avoid overemphasizing or sensationalizing elements from user comments.
6. If an image is provided, balance the summary to reflect both textual and visual information when applicable.

Article content: {text}

Brief summary of relevant user comment insights (use sparingly):
{comments}

summary:"""
                }
            ],
        }
    ]
    
    # Try to add the image URL if it's provided and valid
    if img_url:
        try:
            messages[1]["content"].append({
                "type": "image_url",
                "image_url": {
                    "url": img_url,
                },
            })
        except Exception as e:
            print(f"Error processing image URL: {e}. Continuing without image.")
    
    try:
        # Call the model with the prepared messages
        completion = client.chat.completions.create(
            model="gpt-4o-mini",
            messages=messages,
        )
        
        summary = completion.choices[0].message.content
        print("summary_generated")
        return summary
    except Exception as e:
        print(f"Error generating summary: {e}")
        
        return None

# Now update your DataFrame

In [3]:
# Generate summaries for the relevant comments data
generated_summaries = []
for index, row in relevant_comments_data.iterrows():
    summary = generate_summary(row['Content'], row.get('cleaned_comments', ''), row.get('Image URL', ''))
    generated_summaries.append(summary)
    print(f"Processed article {index + 1} of {len(relevant_comments_data)}")

# Add the generated summaries to the reference summary data
reference_summary_data['Generated Summary'] = generated_summaries

# Save the updated reference summary data
reference_summary_data.to_csv(f'{language}_Summary_With_Image_Relevant_Comments.csv', index=False, encoding='utf-8')

print(f"Summaries generated and added to '{language}_Summary_With_Image_Relevant_Comments.csv'")

summary_generated
Processed article 1 of 543
summary_generated
Processed article 2 of 543
summary_generated
Processed article 3 of 543
summary_generated
Processed article 4 of 543
summary_generated
Processed article 5 of 543
summary_generated
Processed article 6 of 543
summary_generated
Processed article 7 of 543
summary_generated
Processed article 8 of 543
summary_generated
Processed article 9 of 543
summary_generated
Processed article 10 of 543
summary_generated
Processed article 11 of 543
summary_generated
Processed article 12 of 543
summary_generated
Processed article 13 of 543
summary_generated
Processed article 14 of 543
summary_generated
Processed article 15 of 543
summary_generated
Processed article 16 of 543
summary_generated
Processed article 17 of 543
summary_generated
Processed article 18 of 543
summary_generated
Processed article 19 of 543
summary_generated
Processed article 20 of 543
summary_generated
Processed article 21 of 543
summary_generated
Processed article 22 of 5