# Zero shot using an API (LLAMA)

In [41]:
import pandas as pd
import os
from groq import Groq
from tqdm import tqdm
from sklearn.metrics import accuracy_score, recall_score, precision_score, f1_score, classification_report

Either notebook requires credentials to be loaded from a `.env` file, which should contain respectively either of the following lines, if not both:
```
GROQ_API_KEY=<your groq api key>
```

- [Groq API Key](https://console.groq.com/playground) can be generated and used free of charge

### Load df

In [7]:
df = pd.read_csv('../data/data_small.csv')

In [8]:
df.head()

Unnamed: 0.1,Unnamed: 0,dataset,text,logical_fallacies,source
0,18384,8,Testing on animals could save the life of you ...,appeal_to_emotion,
1,11271,3,"I remember when China took over Hong Kong, I r...",none,
2,15702,4,": The only ""Light at the End of the Tunnel"", i...",appeal_to_emotion,
3,7148,3,So you only believe there are two ways to run ...,none,
4,8147,3,Keep things the way they are or change them co...,false_dilemma,


### Load dotenv with API Key

In [12]:
from dotenv import load_dotenv
load_dotenv('../../.env')

True

In [14]:
# validate API key
assert os.environ.get("GROQ_API_KEY"), "GROQ_API_KEY not found in .env file"

### Define LLM

In [32]:
import warnings
warnings.filterwarnings("ignore")
from langchain_groq import ChatGroq

#calling llm from platform, llama3 is called the llm
#temperature is hyperparameter, how creative do I want the llm to be (0 is not creative), sometimes, when it is not 0, it can give you the second likely word

llm = ChatGroq(
    model="llama3-8b-8192",  # Replace with your desired Llama model version
    temperature=0,           # No randomness, deterministic output
    max_tokens=None,         # Unlimited token length (adjust as needed)
    # timeout=None,            # No timeout (can be adjusted)
    max_retries=2            # Retry twice on transient failures
)

### Define a prompt

In [33]:
def classify_fallacy(text: str) -> str:
    """Classifies text into one of the predefined logical fallacies."""
    try:
        # Combine system instructions and user content in one message
        # This avoids system message compatibility issues
        prompt = """Classify the following text into exactly one logical fallacy category:
- faulty_generalization
- ad_hominem
- false_dilemma 
- appeal_to_authority
- appeal_to_emotion  
- none

Text to classify: {0}

Respond ONLY with the category name and nothing else.""".format(text)

        # Simplified message structure - only user message
        response = llm.invoke(prompt)
        
        # Properly extract content based on LangChain's response structure
        prediction = response.content.strip().lower()
        
        # Normalize the response
        valid_categories = ["faulty_generalization", "ad_hominem", "false_dilemma", 
                          "appeal_to_authority", "appeal_to_emotion", "none"]
        
        # Match to valid categories
        for category in valid_categories:
            if category in prediction:
                return category
                
        return prediction if prediction in valid_categories else "none"
        
    except Exception as e:
        print(f"Error processing text: {text[:50]}... | Error: {str(e)}")
        return "Error"

### Process dataframe

In [34]:
def process_dataframe(df: pd.DataFrame, batch_size=10) -> pd.DataFrame:
    """Process DataFrame with chunking for better performance."""
    result_df = df.copy()
    
    # Process in smaller batches to reduce API errors
    chunks = [df[i:i+batch_size] for i in range(0, len(df), batch_size)]
    
    with tqdm(total=len(df), desc="Classifying Logical Fallacies") as pbar:
        for chunk in chunks:
            # Process each text in the chunk
            chunk_results = []
            for text in chunk['text']:
                result = classify_fallacy(text)
                chunk_results.append(result)
                pbar.update(1)
                
            # Update results for this chunk
            result_df.loc[chunk.index, 'predicted_fallacy'] = chunk_results
    
    return result_df

### Make predictions

In [39]:
# make predictions only based on 1000 rows
df_small = df.iloc[:1000]

In [40]:
# Process the DataFrame and classify logical fallacies
processed_df = process_dataframe(df_small)

Classifying Logical Fallacies: 100%|██████████| 1000/1000 [38:18<00:00,  2.30s/it]


### Evaluation

In [43]:
processed_df.head()

Unnamed: 0.1,Unnamed: 0,dataset,text,logical_fallacies,source,predicted_fallacy
0,18384,8,Testing on animals could save the life of you ...,appeal_to_emotion,,faulty_generalization
1,11271,3,"I remember when China took over Hong Kong, I r...",none,,faulty_generalization
2,15702,4,": The only ""Light at the End of the Tunnel"", i...",appeal_to_emotion,,faulty_generalization
3,7148,3,So you only believe there are two ways to run ...,none,,false_dilemma
4,8147,3,Keep things the way they are or change them co...,false_dilemma,,false_dilemma


In [42]:
print(classification_report(processed_df["logical_fallacies"], processed_df["predicted_fallacy"]))

                       precision    recall  f1-score   support

           ad_hominem       0.72      0.56      0.63       101
  appeal_to_authority       0.81      0.46      0.59        56
    appeal_to_emotion       0.24      0.08      0.12       155
        false_dilemma       0.82      0.52      0.64        86
faulty_generalization       0.16      0.92      0.27       133
                 none       0.63      0.03      0.05       469

             accuracy                           0.27      1000
            macro avg       0.56      0.43      0.38      1000
         weighted avg       0.54      0.27      0.23      1000

