## Test groq

In [3]:
from dotenv import load_dotenv
from groq import Groq

load_dotenv()

client = Groq()

chat_completion = client.chat.completions.create(
    messages=[
        {
            "role": "system",
            "content": "you are a helpful assistant."
        },
        # Set a user message for the assistant to respond to.
        {
            "role": "user",
            "content": "Explain the importance of fast language models",
        }
    ],
    model="llama3-8b-8192",

    #temperature=0.5,

    # The maximum number of tokens to generate. Requests can use up to
    # 32,768 tokens shared between prompt and completion.
    max_tokens=1024,
    top_p=1,
)

# Print the completion returned by the LLM.
print(chat_completion.choices[0].message.content)

Fast language models have gained significant attention in recent years due to their potential to revolutionize various aspects of natural language processing (NLP) and artificial intelligence (AI). Here are some reasons why fast language models are important:

1. **Speed and Efficiency**: Fast language models can process large amounts of text data quickly, making them ideal for applications that require rapid analysis and generation of text. This is particularly important in industries such as customer service, where timely responses are crucial.

2. **Scalability**: Fast language models can handle large volumes of data, making them suitable for applications that involve processing massive amounts of text data, such as social media monitoring or sentiment analysis.

3. **Improved Accuracy**: Fast language models can be fine-tuned for specific tasks, leading to improved accuracy in tasks such as language translation, text classification, and sentiment analysis.

4. **Enhanced User Exper

## Groq free API

In [5]:
import logging
import pandas as pd
import glob
from dotenv import load_dotenv
from langchain_core.prompts import ChatPromptTemplate
from langchain_groq import ChatGroq
from langchain.schema.output_parser import StrOutputParser

# Load environment variables from .env file
load_dotenv()

def classify_news_categories(text: str, model_name: str):
    logging.info(f"Starting classification for news text {text[:20]}...")
    try:
        model = ChatGroq(model=model_name, temperature=0.1, max_tokens=20)
        
        system_message = "You are a helpful document classifier."
        human_message = f"Classify the following news text into these categories tech,politics,entertainment,business,sport,others and return only the final prediction : {text}"
        prompt = ChatPromptTemplate.from_messages([("system", system_message), ("human", human_message)])

        chain = prompt | model | StrOutputParser()
        result = chain.invoke({"text_to_be_classified": text})
        
        # Convert result to lowercase
        result = result.casefold()
        logging.info(f"Classification for news text {text[:20]} completed successfully. Result: {result}")
        return result

    except Exception as e:
        logging.error(f"Error during classification: {e}")
        return "error"

# Configure logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

# Starting batch classification
logging.info("Starting classification...")
models = ['Llama3-8b-8192', 'Llama3-70b-8192', 'Mixtral-8x7b-32768', 'Gemma-7b-It']

for model_name in models:
    for file in glob.glob("data/news_category_classification/test-examples_*.csv"):
        try:
            df = pd.read_csv(file)
            logging.info(f"Classifying {file} using model: {model_name}")
            
            df[f'predicted_category_{model_name}'] = df['text'].apply(
                lambda x: classify_news_categories(text=x, model_name=model_name)
            )
            
            print(df)  # Optionally print the dataframe to see the results
            df.to_csv(file, index=False)
            logging.info(f"Done classifying {file}.")
        except Exception as e:
            logging.error(f"Failed to classify {file} with model {model_name}. Error: {e}")

logging.info("Classification completed.")


         category                                               text  \
0        politics  nhs waiting time target is cut hospital waitin...   
1        politics  crisis  ahead in social sciences  a national b...   
2            tech  football manager scores big time for the past ...   
3        politics  uk will stand firm on eu rebate  britain s £3b...   
4   entertainment  greer attacks  bully  big brother germaine gre...   
5        politics  pm apology over jailings tony blair has apolog...   
6        politics  parties  plans for council tax anger at counci...   
7        politics  lib dems unveil women s manifesto the liberal ...   
8   entertainment  uk tv channel rapped for csi ad tv channel fiv...   
9            tech  broadband in the uk gathers pace one person in...   
10  entertainment  u2 stars enter rock hall of fame singer bruce ...   
11       business  macy s owner buys rival for $11bn us retail gi...   
12  entertainment  tarantino to direct csi episode film director

## for OPENAPI

In [8]:
import logging
import pandas as pd
import glob
from openai import OpenAI
from dotenv import load_dotenv

load_dotenv()

def classify_news_categories(text: str, model_name: str):
    logging.info(f"Starting classification for news text {text[:20]}...")
    try:
        client = OpenAI()
        messages = [
                {"role": "system", "content": "You are a helpful document classifier.G"},
                {"role": "user", "content": f"Classify the following news text into these categories tech, politics, entertainment, business, sport, others and return only the final prediction: {text}"}
                ]

    
        completion = client.chat.completions.create(model="gpt-3.5-turbo", 
                                                    messages=messages,
                                                    temperature=0.1,
                                                    )
        result = completion.choices[0].message.content
        result = result.casefold()
        logging.info(f"Classification for news text {text[:20]} completed successfully. Result: {result}")
        return result

    except Exception as e:
        logging.error(f"Error during classification: {e}")
        return "error"


#####################################################################################################
# Configure logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

# Starting batch classification
logging.info("Starting classification...")
models = ['gpt-3.5-turbo']

for model_name in models:
    for file in glob.glob("data/news_category_classification/test-examples_*.csv"):
        try:
            df = pd.read_csv(file)
            logging.info(f"Classifying {file} using model: {model_name}")
            
            df[f'predicted_category_{model_name}'] = df['text'].apply(
                lambda x: classify_news_categories(text=x, model_name=model_name)
            )
            
            print(df)  # Optionally print the dataframe to see the results
            df.to_csv(file, index=False)
            logging.info(f"Done classifying {file}.")
        except Exception as e:
            logging.error(f"Failed to classify {file} with model {model_name}. Error: {e}")

logging.info("Classification completed.")


         category                                               text  \
0        politics  nhs waiting time target is cut hospital waitin...   
1        politics  crisis  ahead in social sciences  a national b...   
2            tech  football manager scores big time for the past ...   
3        politics  uk will stand firm on eu rebate  britain s £3b...   
4   entertainment  greer attacks  bully  big brother germaine gre...   
5        politics  pm apology over jailings tony blair has apolog...   
6        politics  parties  plans for council tax anger at counci...   
7        politics  lib dems unveil women s manifesto the liberal ...   
8   entertainment  uk tv channel rapped for csi ad tv channel fiv...   
9            tech  broadband in the uk gathers pace one person in...   
10  entertainment  u2 stars enter rock hall of fame singer bruce ...   
11       business  macy s owner buys rival for $11bn us retail gi...   
12  entertainment  tarantino to direct csi episode film director