In [None]:
import os
import re
import json
import torch
import pandas as pd

from transformers import pipeline
from groq import Groq

In [None]:
# Constants
LLM_MODEL = "llama-3.2-90b-text-preview"
DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [None]:
summarizer = pipeline("summarization", device=DEVICE)
def get_summary(text):
    summary = summarizer(text, truncation=True, max_length=300, min_length=50, do_sample=True, temperature=0.3)
    return summary[0]['summary_text']

In [None]:
client = Groq(api_key=os.environ['GROQ_API_KEY'])
def get_favourability_ratings(text):
    completion = client.chat.completions.create(
        model=LLM_MODEL,
        messages=[
            {
                "role": "system",
                "content": (
                    "You are an expert political analyst. Read the text provided by the user. "
                    "Describe how favourable is it to democrats, and to republicans, in under 50 words. "
                    "Also provide two scores on a scale of 1 to 5, quantifying this favourability to the two parties "
                    "in a JSON format with two keys \"democrats\" & \"republicans\"."
                )
            },
            {
                "role": "user",
                "content": text
            }
        ],
        temperature=0.25,
        max_tokens=512,
        top_p=1,
        stream=False,
        stop=None,
    )
    ratings_with_explanation = completion.choices[0].message.content

    print('completion_tokens: ', completion.usage.completion_tokens)
    print('prompt_tokens: ', completion.usage.prompt_tokens)
    print('total_tokens: ', completion.usage.total_tokens)
    print(ratings_with_explanation)
    return ratings_with_explanation

In [None]:
def find_json_objects(input_string):
    json_pattern = r'\{.*?\}'
    potential_jsons = re.findall(json_pattern, input_string)

    valid_jsons = []
    for json_str in potential_jsons:
        try:
            json_obj = json.loads(json_str)
            valid_jsons.append(json_obj)
        except json.JSONDecodeError:
            continue

    return valid_jsons

In [None]:
def generate_ratings(article):
    summary = get_summary(article)
    ratings_with_explanation = get_favourability_ratings(summary)
    rating = find_json_objects(ratings_with_explanation)[0]
    return rating

In [None]:
article_df = pd.read_excel('articles.xlsx')
article_df['ratings'] = article_df['article'].apply(generate_ratings)
article_df.to_excel('articles_with_ratings.xlsx', index=False)