In [None]:
!pip install openai
!pip install transformers accelerate huggingface_hub
!pip install llamaapi
!pip install -q -U google-generativeai
!pip install anthropic

In [None]:
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
import transformers
from torch import cuda, bfloat16
import pandas as pd
import os
from groq import Groq
from google.api_core.exceptions import InternalServerError
from openai import OpenAI
from llamaapi import LlamaAPI
import google.generativeai as genai
import json


In [None]:
base_url = "https://api.aimlapi.com/v1"
api_key = ""
api = OpenAI(api_key=api_key, base_url=base_url)

In [None]:
# Load the CSV file
csv_file_path = '/kaggle/input/souvika-madam-dataset/NewsBlog_ProthomAlo_FinalDataset.json'  

with open(csv_file_path, "r", encoding='utf-8') as f:
    data = json.loads(f.read())

In [None]:
from transformers import AutoTokenizer

def extract_data_from_json(article):
    title = article.get("Article Title", "")
    context = article.get("Article Text", "")
    labels = article.get("Article Topics", [])

    tokens = context.split()

    truncated_context = " ".join(tokens[:1000])
    return title, truncated_context, labels

def generate_labels_with_llama(context, title, label_set):
   
    reserved_output_tokens = 128  # Reserve tokens for output
    max_input_tokens = 4097 - reserved_output_tokens  # Maximum input token limit

    # Define the prompt structure
    prompt_template = """
    Context:
    {context}

    Title:
    {title}

    Label Set:
    {label_set}

    Task:
    Based on the provided context and title, identify the most relevant labels from the label set. 
    Choose the labels that best represent the meaning or main idea of the context and title.
    Return the selected labels as a comma-separated list.
    """
    
    # Format the label set as a string
    label_set_str = ', '.join(label_set)
    prompt = prompt_template.format(context=context, title=title, label_set=label_set_str)

    try:
        # Simulate Llama API call
        outputs = api.chat.completions.create(
            model="databricks/dbrx-instruct",
            messages=[
                {"role": "system", "content": "You are a helpful assistant. You are a classifier that outputs the best associated labels from the label set. Please give only that output not anything else."},
                {"role": "user", "content": prompt},
            ],
            temperature=0.7,
            max_tokens=reserved_output_tokens,
            top_p=1.0
        )

        # Decode and process the response
        response = outputs.choices[0].message.content
        #print(response)
        response = [item.strip() for item in response.split(",")]

        # Filter the response to include only valid labels
        selected_labels = [label for label in response if label in label_set]
        print(selected_labels)
        return selected_labels
    except Exception as e:
        print("Error:", e)
        return []



label_set = ['চাকরিবাকরি', 'করোনাভাইরাস', 'চলচ্চিত্র ও তারকা', 'স্বাস্থ্য', 'ব্যাংক', 
             'অর্থনীতি', 'শিক্ষা', 'প্রাকৃতিক দুর্যোগ', 'আইন ও আদালত', 'কূটনীতি', 
             'শিল্প ও বাণিজ্য', 'ভ্রমণ', 'নকশা', 'ফুটবল', 'খাবারদাবার', 
             'দেশ ও রাজনীতি', 'আন্তর্জাতিক', 'দেশের খবর', 'রাশিয়া ইউক্রেন সংঘাত', 
             'ক্রিকেট', 'নারী']

# Process articles
results = []
for i in range(len(data)):
    if i % 100 == 0:
        print(f"Processing article {i}...")
    try:
        title, context, true_labels = extract_data_from_json(data[i])
        predicted_labels = generate_labels_with_llama(context, title, label_set)
        results.append({
            "title": title,
            "text": context,
            "true_labels": true_labels,
            "predicted_labels": predicted_labels
        })
    except Exception as e:
        print(f"Error processing article {i}: {e}")

# Save results
output_file = "/kaggle/working/DBRX_results.json"
with open(output_file, 'w', encoding='utf-8') as f:
    json.dump(results, f, ensure_ascii=False, indent=4)