In [23]:
!pip install groq




In [24]:
from groq import Groq
import re

In [25]:
def draft_message(content, role='user'):
    return {
        "role": role,
        "content": content,
    }

def truncate_content(content, max_length=4000):
    return content[:max_length]

def truncate_comments(comments, max_length=480):
    truncated_comments = []
    total_length = 0
    for comment in comments:
        comment_length = len(comment)
        if total_length + comment_length <= max_length:
            truncated_comments.append(comment)
            total_length += comment_length
        else:
            break
    return truncated_comments

def parse_comments(raw_comments):

    comments = re.split(r'\d+\.\s', raw_comments)
    comments = [comment.strip() for comment in comments if comment.strip()]
    return comments

In [26]:
api_key = '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'
client = Groq(api_key=api_key)


In [27]:
from google.colab import drive
drive.mount('/content/drive')
import pandas as pd

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [28]:
df = pd.read_csv('/content/drive/MyDrive/Dailyhunt_dataset/Bangla_Reference_Summary.csv')
news_text = list(df['Content'])
comments = list(df['Comments'])

In [29]:
prompt = '''You are an AI assistant tasked with analyzing text and user comments to create concise summaries. Your summaries should integrate key information from all sources, maintaining the original language of the text. Focus on crucial points and minimize extraneous details.
            The summary should be in the same language as that of the original news. Present only the summary, no additional commentary or captions or titles.'''


In [30]:
generated_summaries = []
from tqdm import tqdm

In [31]:
for i in tqdm(range(len(news_text)), desc="Generating Headlines"):
    content = news_text[i]
    raw_comments = comments[i]
    reader_comments = parse_comments(raw_comments)
    formatted_comments = ' '.join(reader_comments)

    messages = [
        {
            "role": 'system',
            "content": f'''You are an AI assistant tasked with analyzing text and user comments to create concise summaries. Your summaries should integrate key information from all sources, maintaining the original language of the text. Focus on crucial points and minimize extraneous details.
            The summary should be in the same language as that of the original news. Present only the summary, no additional commentary or captions or titles.
            news: {content} comments: {formatted_comments}'''
        }
    ]
    messages.append(draft_message(prompt))

    try:
        chat_completion = client.chat.completions.create(
            temperature=1.0,
            n=1,
            model='llama3-8b-8192',
            messages=messages
        )
        generated_summary = chat_completion.choices[0].message.content
    except Exception as e:
        error_message = str(e)
        if "context_length_exceeded" in error_message:
            print(f"Error generating headline for article {i+1}: Context length exceeded. Truncating content and comments.")
            truncated_content = truncate_content(content)
            truncated_reader_comments = truncate_comments(reader_comments, max_length=480)
            formatted_comments = ' '.join(truncated_reader_comments)
            messages[0]['content'] = f'''news: {truncated_content} comments: {formatted_comments}'''
            try:
                chat_completion = client.chat.completions.create(
                    temperature=1.0,
                    n=1,
                    model='llama3-8b-8192',
                    messages=messages
                )
                generated_summary = chat_completion.choices[0].message.content
            except Exception as e2:
                print(f"Error generating headline for article {i+1} after truncation: {e2}")
                generated_summary = "Error generating summary"
        else:
            print(f"Error generating headline for article {i+1}: {e}")
            generated_summary = "Error generating summary"

    generated_summaries.append(generated_summary)


if len(generated_summaries) != len(df):
    missing_entries = len(df) - len(generated_summaries)
    generated_summaries.extend(["Error generating summary"] * missing_entries)

df['Generated_summary_comments'] = generated_summaries
df.to_csv('/content/drive/MyDrive/Dailyhunt_dataset/Bangla_Reference_Summary.csv', index=False)

print("done sal!")


Generating Headlines:   0%|          | 1/627 [00:00<09:55,  1.05it/s]

Error generating headline for article 2: Context length exceeded. Truncating content and comments.


Generating Headlines:  10%|█         | 64/627 [06:50<1:03:55,  6.81s/it]

Error generating headline for article 65: Context length exceeded. Truncating content and comments.


Generating Headlines:  17%|█▋        | 108/627 [12:08<1:09:12,  8.00s/it]

Error generating headline for article 109: Context length exceeded. Truncating content and comments.


Generating Headlines:  19%|█▉        | 121/627 [13:39<56:50,  6.74s/it]

Error generating headline for article 122: Context length exceeded. Truncating content and comments.


Generating Headlines:  20%|█▉        | 123/627 [13:53<52:58,  6.31s/it]  

Error generating headline for article 124: Context length exceeded. Truncating content and comments.


Generating Headlines:  21%|██        | 131/627 [14:44<45:16,  5.48s/it]

Error generating headline for article 132: Context length exceeded. Truncating content and comments.


Generating Headlines:  27%|██▋       | 167/627 [18:25<54:20,  7.09s/it]

Error generating headline for article 168: Context length exceeded. Truncating content and comments.


Generating Headlines:  30%|██▉       | 186/627 [20:26<45:42,  6.22s/it]

Error generating headline for article 187: Context length exceeded. Truncating content and comments.


Generating Headlines:  51%|█████     | 320/627 [33:11<26:26,  5.17s/it]

Error generating headline for article 321: Context length exceeded. Truncating content and comments.


Generating Headlines:  54%|█████▍    | 339/627 [35:15<30:25,  6.34s/it]

Error generating headline for article 340: Context length exceeded. Truncating content and comments.


Generating Headlines:  80%|████████  | 504/627 [53:19<11:12,  5.47s/it]

Error generating headline for article 505: Context length exceeded. Truncating content and comments.


Generating Headlines:  83%|████████▎ | 518/627 [54:52<12:19,  6.78s/it]

Error generating headline for article 519: Context length exceeded. Truncating content and comments.


Generating Headlines: 100%|██████████| 627/627 [1:07:22<00:00,  6.45s/it]

done sal!



