In [6]:
import openai
from openai import OpenAI
import os
import csv
import tiktoken
import pandas as pd
import langdetect

GPT_API_KEY = 'YOUR_API_KEY_HERE'
os.environ['OPENAI_API_KEY'] = GPT_API_KEY
openai.api_key = GPT_API_KEY
client = OpenAI()

In [7]:
data = pd.read_csv("Hindi_Summary_With_Image.csv")
tokenizer = tiktoken.get_encoding('cl100k_base')

In [8]:
def generate_summary(text, img_url):
    # Ensure the text is a string
    if not isinstance(text, str):
        return None
    
    # Prepare the messages list
    messages = [
        {
            "role": "system",
            "content": "Analyze the provided image and text, then generate a concise summary that incorporates key information from both. The summary should be in the same language as the original text."
        },
        {
            "role": "user",
            "content": [
                {"type": "text", "text": f"Examine the image and extract its key points. Combine this information with the following text content. Then, create a concise summary that integrates the important elements from both the image and the text. The summary should capture all crucial points while minimizing extraneous details. Provide only the final summary, in the same language as the original content.\n\nText content: {text}"}
            ]
        }
    ]
    
    # Try to add the image URL if it exists
    if img_url and isinstance(img_url, str):
        try:
            messages[1]["content"].append({
                "type": "image_url",
                "image_url": {
                    "url": img_url
                }
            })
        except Exception as e:
            print(f"Error processing image URL: {e}")
    
    # Call the model with the prepared messages
    try:
        completion = client.chat.completions.create(
            model="gpt-4o-mini",
            messages=messages
        )
        
        # Retrieve the generated summary
        summary = completion.choices[0].message.content
        print("summary_generated")
        return summary
    except Exception as e:
        print(f"Error generating summary: {e}")
        return None

In [10]:
def is_english(text):
    if pd.isna(text):
        return False
    try:
        return langdetect.detect(str(text)) == 'en'
    except:
        return False

# Find rows with English summaries or empty summaries
mask = data['Generated Summary'].apply(lambda x: is_english(x) or pd.isna(x))

# Count of summaries to be generated
total_to_summarize = mask.sum()

# Iterate through the DataFrame and update rows that need summarization
for index, needs_summary in enumerate(mask):
    if needs_summary:
        # Generate new summary
        new_summary = generate_summary(data.loc[index, 'Content'], data.loc[index, 'Image URL'])
        if new_summary:
            data.loc[index, 'Generated Summary'] = new_summary
            print(f"Processed article {index + 1} of {total_to_summarize}")
        else:
            print(f"Failed to generate summary for article {index + 1}")

# Save the updated DataFrame back to CSV
data.to_csv('Hindi_Summary_With_Image.csv', index=False)

print(f"Updated {total_to_summarize} summaries in 'Hindi_Summary_With_Image.csv'")

summary_generated
Processed article 591 of 1
Updated 1 summaries in 'Bangla_Summary_With_Image.csv'
